1use crate::utils::regex_cache::{
7 DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
8 INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
9 SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
10};
11#[derive(Clone)]
13pub struct ReflowOptions {
14 pub line_length: usize,
16 pub break_on_sentences: bool,
18 pub preserve_breaks: bool,
20 pub sentence_per_line: bool,
22}
23
24impl Default for ReflowOptions {
25 fn default() -> Self {
26 Self {
27 line_length: 80,
28 break_on_sentences: true,
29 preserve_breaks: false,
30 sentence_per_line: false,
31 }
32 }
33}
34
35fn is_sentence_boundary(text: &str, pos: usize) -> bool {
38 let chars: Vec<char> = text.chars().collect();
39
40 if pos + 1 >= chars.len() {
41 return false;
42 }
43
44 let c = chars[pos];
46 if c != '.' && c != '!' && c != '?' {
47 return false;
48 }
49
50 if chars[pos + 1] != ' ' {
52 return false;
53 }
54
55 let mut next_char_pos = pos + 2;
57 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
58 next_char_pos += 1;
59 }
60
61 if next_char_pos >= chars.len() {
63 return false;
64 }
65
66 if !chars[next_char_pos].is_uppercase() {
68 return false;
69 }
70
71 if pos > 0 {
73 let prev_word = &text[..pos];
75 let ignored_words = [
76 "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr", "Jr",
77 ];
78 for word in &ignored_words {
79 if prev_word.to_lowercase().ends_with(&word.to_lowercase()) {
80 return false;
81 }
82 }
83
84 if pos > 0 && chars[pos - 1].is_numeric() && next_char_pos < chars.len() && chars[next_char_pos].is_numeric() {
87 return false;
88 }
89 }
90 true
91}
92
93pub fn split_into_sentences(text: &str) -> Vec<String> {
95 let mut sentences = Vec::new();
96 let mut current_sentence = String::new();
97 let mut chars = text.chars().peekable();
98 let mut pos = 0;
99
100 while let Some(c) = chars.next() {
101 current_sentence.push(c);
102
103 if is_sentence_boundary(text, pos) {
104 if chars.peek() == Some(&' ') {
106 chars.next();
107 pos += 1;
108 }
109 sentences.push(current_sentence.trim().to_string());
110 current_sentence.clear();
111 }
112
113 pos += 1;
114 }
115
116 if !current_sentence.trim().is_empty() {
118 sentences.push(current_sentence.trim().to_string());
119 }
120 sentences
121}
122
123fn is_horizontal_rule(line: &str) -> bool {
125 if line.len() < 3 {
126 return false;
127 }
128
129 let chars: Vec<char> = line.chars().collect();
131 if chars.is_empty() {
132 return false;
133 }
134
135 let first_char = chars[0];
136 if first_char != '-' && first_char != '_' && first_char != '*' {
137 return false;
138 }
139
140 for c in &chars {
142 if *c != first_char && *c != ' ' {
143 return false;
144 }
145 }
146
147 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
149 non_space_count >= 3
150}
151
152fn is_numbered_list_item(line: &str) -> bool {
154 let mut chars = line.chars();
155
156 if !chars.next().is_some_and(|c| c.is_numeric()) {
158 return false;
159 }
160
161 while let Some(c) = chars.next() {
163 if c == '.' {
164 return chars.next().is_none_or(|c| c == ' ');
166 }
167 if !c.is_numeric() {
168 return false;
169 }
170 }
171
172 false
173}
174
175fn has_hard_break(line: &str) -> bool {
181 let line = line.strip_suffix('\r').unwrap_or(line);
182 line.ends_with(" ") || line.ends_with('\\')
183}
184
185fn trim_preserving_hard_break(s: &str) -> String {
191 let s = s.strip_suffix('\r').unwrap_or(s);
193
194 if s.ends_with('\\') {
196 return s.to_string();
198 }
199
200 if s.ends_with(" ") {
202 let content_end = s.trim_end().len();
204 if content_end == 0 {
205 return String::new();
207 }
208 format!("{} ", &s[..content_end])
210 } else {
211 s.trim_end().to_string()
213 }
214}
215
216pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
217 if options.sentence_per_line {
219 let elements = parse_markdown_elements(line);
220 return reflow_elements_sentence_per_line(&elements);
221 }
222
223 if line.chars().count() <= options.line_length {
225 return vec![line.to_string()];
226 }
227
228 let elements = parse_markdown_elements(line);
230
231 reflow_elements(&elements, options)
233}
234
235#[derive(Debug, Clone)]
237enum Element {
238 Text(String),
240 Link { text: String, url: String },
242 ReferenceLink { text: String, reference: String },
244 EmptyReferenceLink { text: String },
246 ShortcutReference { reference: String },
248 InlineImage { alt: String, url: String },
250 ReferenceImage { alt: String, reference: String },
252 EmptyReferenceImage { alt: String },
254 FootnoteReference { note: String },
256 Strikethrough(String),
258 WikiLink(String),
260 InlineMath(String),
262 DisplayMath(String),
264 EmojiShortcode(String),
266 HtmlTag(String),
268 HtmlEntity(String),
270 Code(String),
272 Bold(String),
274 Italic(String),
276}
277
278impl std::fmt::Display for Element {
279 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
280 match self {
281 Element::Text(s) => write!(f, "{s}"),
282 Element::Link { text, url } => write!(f, "[{text}]({url})"),
283 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
284 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
285 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
286 Element::InlineImage { alt, url } => write!(f, ""),
287 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
288 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
289 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
290 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
291 Element::WikiLink(s) => write!(f, "[[{s}]]"),
292 Element::InlineMath(s) => write!(f, "${s}$"),
293 Element::DisplayMath(s) => write!(f, "$${s}$$"),
294 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
295 Element::HtmlTag(s) => write!(f, "{s}"),
296 Element::HtmlEntity(s) => write!(f, "{s}"),
297 Element::Code(s) => write!(f, "`{s}`"),
298 Element::Bold(s) => write!(f, "**{s}**"),
299 Element::Italic(s) => write!(f, "*{s}*"),
300 }
301 }
302}
303
304impl Element {
305 fn len(&self) -> usize {
306 match self {
307 Element::Text(s) => s.chars().count(),
308 Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, Element::EmptyReferenceLink { text } => text.chars().count() + 4, Element::ShortcutReference { reference } => reference.chars().count() + 2, Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, Element::FootnoteReference { note } => note.chars().count() + 3, Element::Strikethrough(s) => s.chars().count() + 4, Element::WikiLink(s) => s.chars().count() + 4, Element::InlineMath(s) => s.chars().count() + 2, Element::DisplayMath(s) => s.chars().count() + 4, Element::EmojiShortcode(s) => s.chars().count() + 2, Element::HtmlTag(s) => s.chars().count(), Element::HtmlEntity(s) => s.chars().count(), Element::Code(s) => s.chars().count() + 2, Element::Bold(s) => s.chars().count() + 4, Element::Italic(s) => s.chars().count() + 2, }
327 }
328}
329
330fn parse_markdown_elements(text: &str) -> Vec<Element> {
339 let mut elements = Vec::new();
340 let mut remaining = text;
341
342 while !remaining.is_empty() {
343 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
345
346 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
349 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
350 {
351 earliest_match = Some((m.start(), "inline_image", m));
352 }
353
354 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
356 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
357 {
358 earliest_match = Some((m.start(), "ref_image", m));
359 }
360
361 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
363 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
364 {
365 earliest_match = Some((m.start(), "footnote_ref", m));
366 }
367
368 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
370 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
371 {
372 earliest_match = Some((m.start(), "inline_link", m));
373 }
374
375 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
377 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
378 {
379 earliest_match = Some((m.start(), "ref_link", m));
380 }
381
382 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
385 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
386 {
387 earliest_match = Some((m.start(), "shortcut_ref", m));
388 }
389
390 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
392 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
393 {
394 earliest_match = Some((m.start(), "wiki_link", m));
395 }
396
397 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
399 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
400 {
401 earliest_match = Some((m.start(), "display_math", m));
402 }
403
404 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
406 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
407 {
408 earliest_match = Some((m.start(), "inline_math", m));
409 }
410
411 if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
413 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
414 {
415 earliest_match = Some((m.start(), "strikethrough", m));
416 }
417
418 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
420 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
421 {
422 earliest_match = Some((m.start(), "emoji", m));
423 }
424
425 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
427 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
428 {
429 earliest_match = Some((m.start(), "html_entity", m));
430 }
431
432 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
435 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
436 {
437 let matched_text = &remaining[m.start()..m.end()];
439 let is_autolink = matched_text.starts_with("<http://")
440 || matched_text.starts_with("<https://")
441 || matched_text.starts_with("<mailto:")
442 || matched_text.starts_with("<ftp://")
443 || matched_text.starts_with("<ftps://");
444
445 if !is_autolink {
446 earliest_match = Some((m.start(), "html_tag", m));
447 }
448 }
449
450 let mut next_special = remaining.len();
452 let mut special_type = "";
453
454 if let Some(pos) = remaining.find('`')
455 && pos < next_special
456 {
457 next_special = pos;
458 special_type = "code";
459 }
460 if let Some(pos) = remaining.find("**")
461 && pos < next_special
462 {
463 next_special = pos;
464 special_type = "bold";
465 }
466 if let Some(pos) = remaining.find('*')
467 && pos < next_special
468 && !remaining[pos..].starts_with("**")
469 {
470 next_special = pos;
471 special_type = "italic";
472 }
473
474 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
476 pos < next_special
477 } else {
478 false
479 };
480
481 if should_process_markdown_link {
482 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
483
484 if pos > 0 {
486 elements.push(Element::Text(remaining[..pos].to_string()));
487 }
488
489 match pattern_type {
491 "inline_image" => {
492 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
493 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
494 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
495 elements.push(Element::InlineImage {
496 alt: alt.to_string(),
497 url: url.to_string(),
498 });
499 remaining = &remaining[match_obj.end()..];
500 } else {
501 elements.push(Element::Text("!".to_string()));
502 remaining = &remaining[1..];
503 }
504 }
505 "ref_image" => {
506 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
507 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
508 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
509
510 if reference.is_empty() {
511 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
512 } else {
513 elements.push(Element::ReferenceImage {
514 alt: alt.to_string(),
515 reference: reference.to_string(),
516 });
517 }
518 remaining = &remaining[match_obj.end()..];
519 } else {
520 elements.push(Element::Text("!".to_string()));
521 remaining = &remaining[1..];
522 }
523 }
524 "footnote_ref" => {
525 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
526 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
527 elements.push(Element::FootnoteReference { note: note.to_string() });
528 remaining = &remaining[match_obj.end()..];
529 } else {
530 elements.push(Element::Text("[".to_string()));
531 remaining = &remaining[1..];
532 }
533 }
534 "inline_link" => {
535 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
536 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
537 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
538 elements.push(Element::Link {
539 text: text.to_string(),
540 url: url.to_string(),
541 });
542 remaining = &remaining[match_obj.end()..];
543 } else {
544 elements.push(Element::Text("[".to_string()));
546 remaining = &remaining[1..];
547 }
548 }
549 "ref_link" => {
550 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
551 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
552 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
553
554 if reference.is_empty() {
555 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
557 } else {
558 elements.push(Element::ReferenceLink {
560 text: text.to_string(),
561 reference: reference.to_string(),
562 });
563 }
564 remaining = &remaining[match_obj.end()..];
565 } else {
566 elements.push(Element::Text("[".to_string()));
568 remaining = &remaining[1..];
569 }
570 }
571 "shortcut_ref" => {
572 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
573 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
574 elements.push(Element::ShortcutReference {
575 reference: reference.to_string(),
576 });
577 remaining = &remaining[match_obj.end()..];
578 } else {
579 elements.push(Element::Text("[".to_string()));
581 remaining = &remaining[1..];
582 }
583 }
584 "wiki_link" => {
585 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
586 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
587 elements.push(Element::WikiLink(content.to_string()));
588 remaining = &remaining[match_obj.end()..];
589 } else {
590 elements.push(Element::Text("[[".to_string()));
591 remaining = &remaining[2..];
592 }
593 }
594 "display_math" => {
595 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
596 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
597 elements.push(Element::DisplayMath(math.to_string()));
598 remaining = &remaining[match_obj.end()..];
599 } else {
600 elements.push(Element::Text("$$".to_string()));
601 remaining = &remaining[2..];
602 }
603 }
604 "inline_math" => {
605 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
606 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
607 elements.push(Element::InlineMath(math.to_string()));
608 remaining = &remaining[match_obj.end()..];
609 } else {
610 elements.push(Element::Text("$".to_string()));
611 remaining = &remaining[1..];
612 }
613 }
614 "strikethrough" => {
615 if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
616 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
617 elements.push(Element::Strikethrough(text.to_string()));
618 remaining = &remaining[match_obj.end()..];
619 } else {
620 elements.push(Element::Text("~~".to_string()));
621 remaining = &remaining[2..];
622 }
623 }
624 "emoji" => {
625 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
626 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
627 elements.push(Element::EmojiShortcode(emoji.to_string()));
628 remaining = &remaining[match_obj.end()..];
629 } else {
630 elements.push(Element::Text(":".to_string()));
631 remaining = &remaining[1..];
632 }
633 }
634 "html_entity" => {
635 elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
637 remaining = &remaining[match_obj.end()..];
638 }
639 "html_tag" => {
640 elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
642 remaining = &remaining[match_obj.end()..];
643 }
644 _ => {
645 elements.push(Element::Text("[".to_string()));
647 remaining = &remaining[1..];
648 }
649 }
650 } else {
651 if next_special > 0 && next_special < remaining.len() {
655 elements.push(Element::Text(remaining[..next_special].to_string()));
656 remaining = &remaining[next_special..];
657 }
658
659 match special_type {
661 "code" => {
662 if let Some(code_end) = remaining[1..].find('`') {
664 let code = &remaining[1..1 + code_end];
665 elements.push(Element::Code(code.to_string()));
666 remaining = &remaining[1 + code_end + 1..];
667 } else {
668 elements.push(Element::Text(remaining.to_string()));
670 break;
671 }
672 }
673 "bold" => {
674 if let Some(bold_end) = remaining[2..].find("**") {
676 let bold_text = &remaining[2..2 + bold_end];
677 elements.push(Element::Bold(bold_text.to_string()));
678 remaining = &remaining[2 + bold_end + 2..];
679 } else {
680 elements.push(Element::Text("**".to_string()));
682 remaining = &remaining[2..];
683 }
684 }
685 "italic" => {
686 if let Some(italic_end) = remaining[1..].find('*') {
688 let italic_text = &remaining[1..1 + italic_end];
689 elements.push(Element::Italic(italic_text.to_string()));
690 remaining = &remaining[1 + italic_end + 1..];
691 } else {
692 elements.push(Element::Text("*".to_string()));
694 remaining = &remaining[1..];
695 }
696 }
697 _ => {
698 elements.push(Element::Text(remaining.to_string()));
700 break;
701 }
702 }
703 }
704 }
705
706 elements
707}
708
709fn reflow_elements_sentence_per_line(elements: &[Element]) -> Vec<String> {
711 let mut lines = Vec::new();
712 let mut current_line = String::new();
713
714 for element in elements.iter() {
715 let element_str = format!("{element}");
716
717 if let Element::Text(text) = element {
719 let combined = format!("{current_line}{text}");
721 let sentences = split_into_sentences(&combined);
722
723 if sentences.len() > 1 {
724 for (i, sentence) in sentences.iter().enumerate() {
726 if i == 0 {
727 let trimmed = sentence.trim();
730 let ends_with_sentence_punct =
731 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
732 let ends_with_abbreviation = if ends_with_sentence_punct {
733 let without_punct = trimmed
735 .trim_end_matches('.')
736 .trim_end_matches('!')
737 .trim_end_matches('?');
738 let ignored_words = [
739 "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr",
740 "Jr",
741 ];
742 ignored_words
743 .iter()
744 .any(|word| without_punct.to_lowercase().ends_with(&word.to_lowercase()))
745 } else {
746 false
747 };
748
749 if ends_with_abbreviation {
750 current_line = sentence.to_string();
752 } else {
753 lines.push(sentence.to_string());
755 current_line.clear();
756 }
757 } else if i == sentences.len() - 1 {
758 let trimmed = sentence.trim();
760 let ends_with_sentence_punct =
761 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
762
763 let ends_with_abbreviation = if ends_with_sentence_punct {
765 let without_punct = trimmed
767 .trim_end_matches('.')
768 .trim_end_matches('!')
769 .trim_end_matches('?');
770 let ignored_words = [
771 "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr",
772 "Jr",
773 ];
774 ignored_words
775 .iter()
776 .any(|word| without_punct.to_lowercase().ends_with(&word.to_lowercase()))
777 } else {
778 false
779 };
780
781 if ends_with_sentence_punct && !ends_with_abbreviation {
782 lines.push(sentence.to_string());
784 current_line.clear();
785 } else {
786 current_line = sentence.to_string();
788 }
789 } else {
790 lines.push(sentence.to_string());
792 }
793 }
794 } else {
795 current_line = combined;
797 }
798 } else {
799 if !current_line.is_empty()
802 && !current_line.ends_with(' ')
803 && !current_line.ends_with('(')
804 && !current_line.ends_with('[')
805 {
806 current_line.push(' ');
807 }
808 current_line.push_str(&element_str);
809 }
810 }
811
812 if !current_line.is_empty() {
814 lines.push(current_line.trim().to_string());
815 }
816 lines
817}
818
819fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
821 let mut lines = Vec::new();
822 let mut current_line = String::new();
823 let mut current_length = 0;
824
825 for element in elements {
826 let element_str = format!("{element}");
827 let element_len = element.len();
828
829 if let Element::Text(text) = element {
831 let words: Vec<&str> = text.split_whitespace().collect();
833
834 for word in words {
835 let word_len = word.chars().count();
836 if current_length > 0 && current_length + 1 + word_len > options.line_length {
837 lines.push(current_line.trim().to_string());
839 current_line = word.to_string();
840 current_length = word_len;
841 } else {
842 if current_length > 0 {
844 current_line.push(' ');
845 current_length += 1;
846 }
847 current_line.push_str(word);
848 current_length += word_len;
849 }
850 }
851 } else {
852 if current_length > 0 && current_length + 1 + element_len > options.line_length {
855 lines.push(current_line.trim().to_string());
857 current_line = element_str;
858 current_length = element_len;
859 } else {
860 if current_length > 0 {
862 current_line.push(' ');
863 current_length += 1;
864 }
865 current_line.push_str(&element_str);
866 current_length += element_len;
867 }
868 }
869 }
870
871 if !current_line.is_empty() {
873 lines.push(current_line.trim_end().to_string());
874 }
875
876 lines
877}
878
879pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
881 let lines: Vec<&str> = content.lines().collect();
882 let mut result = Vec::new();
883 let mut i = 0;
884
885 while i < lines.len() {
886 let line = lines[i];
887 let trimmed = line.trim();
888
889 if trimmed.is_empty() {
891 result.push(String::new());
892 i += 1;
893 continue;
894 }
895
896 if trimmed.starts_with('#') {
898 result.push(line.to_string());
899 i += 1;
900 continue;
901 }
902
903 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
905 result.push(line.to_string());
906 i += 1;
907 while i < lines.len() {
909 result.push(lines[i].to_string());
910 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
911 i += 1;
912 break;
913 }
914 i += 1;
915 }
916 continue;
917 }
918
919 if line.starts_with(" ") || line.starts_with("\t") {
921 result.push(line.to_string());
923 i += 1;
924 while i < lines.len() {
925 let next_line = lines[i];
926 if next_line.starts_with(" ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
928 result.push(next_line.to_string());
929 i += 1;
930 } else {
931 break;
932 }
933 }
934 continue;
935 }
936
937 if trimmed.starts_with('>') {
939 let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
940 let quote_content = &line[quote_prefix.len()..].trim_start();
941
942 let reflowed = reflow_line(quote_content, options);
943 for reflowed_line in reflowed.iter() {
944 result.push(format!("{quote_prefix} {reflowed_line}"));
945 }
946 i += 1;
947 continue;
948 }
949
950 if is_horizontal_rule(trimmed) {
952 result.push(line.to_string());
953 i += 1;
954 continue;
955 }
956
957 if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
959 || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
960 || trimmed.starts_with('+')
961 || is_numbered_list_item(trimmed)
962 {
963 let indent = line.len() - line.trim_start().len();
965 let indent_str = " ".repeat(indent);
966
967 let mut marker_end = indent;
970 let mut content_start = indent;
971
972 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
973 if let Some(period_pos) = line[indent..].find('.') {
975 marker_end = indent + period_pos + 1; content_start = marker_end;
977 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
979 content_start += 1;
980 }
981 }
982 } else {
983 marker_end = indent + 1; content_start = marker_end;
986 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
988 content_start += 1;
989 }
990 }
991
992 let marker = &line[indent..marker_end];
993
994 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
997 i += 1;
998
999 while i < lines.len() {
1001 let next_line = lines[i];
1002 let next_trimmed = next_line.trim();
1003
1004 if next_trimmed.is_empty()
1006 || next_trimmed.starts_with('#')
1007 || next_trimmed.starts_with("```")
1008 || next_trimmed.starts_with("~~~")
1009 || next_trimmed.starts_with('>')
1010 || next_trimmed.starts_with('|')
1011 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1012 || is_horizontal_rule(next_trimmed)
1013 || (next_trimmed.starts_with('-')
1014 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1015 || (next_trimmed.starts_with('*')
1016 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1017 || (next_trimmed.starts_with('+')
1018 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1019 || is_numbered_list_item(next_trimmed)
1020 {
1021 break;
1022 }
1023
1024 let next_indent = next_line.len() - next_line.trim_start().len();
1026 if next_indent >= content_start {
1027 let trimmed_start = next_line.trim_start();
1030 list_content.push(trim_preserving_hard_break(trimmed_start));
1031 i += 1;
1032 } else {
1033 break;
1035 }
1036 }
1037
1038 let combined_content = if options.preserve_breaks {
1041 list_content[0].clone()
1042 } else {
1043 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1045 if has_hard_breaks {
1046 list_content.join("\n")
1048 } else {
1049 list_content.join(" ")
1051 }
1052 };
1053
1054 let trimmed_marker = marker;
1056 let continuation_spaces = content_start;
1057
1058 let prefix_length = indent + trimmed_marker.len() + 1;
1060
1061 let adjusted_options = ReflowOptions {
1063 line_length: options.line_length.saturating_sub(prefix_length),
1064 ..options.clone()
1065 };
1066
1067 let reflowed = reflow_line(&combined_content, &adjusted_options);
1068 for (j, reflowed_line) in reflowed.iter().enumerate() {
1069 if j == 0 {
1070 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1071 } else {
1072 let continuation_indent = " ".repeat(continuation_spaces);
1074 result.push(format!("{continuation_indent}{reflowed_line}"));
1075 }
1076 }
1077 continue;
1078 }
1079
1080 if trimmed.contains('|') {
1082 result.push(line.to_string());
1083 i += 1;
1084 continue;
1085 }
1086
1087 if trimmed.starts_with('[') && line.contains("]:") {
1089 result.push(line.to_string());
1090 i += 1;
1091 continue;
1092 }
1093
1094 let mut is_single_line_paragraph = true;
1096 if i + 1 < lines.len() {
1097 let next_line = lines[i + 1];
1098 let next_trimmed = next_line.trim();
1099 if !next_trimmed.is_empty()
1101 && !next_trimmed.starts_with('#')
1102 && !next_trimmed.starts_with("```")
1103 && !next_trimmed.starts_with("~~~")
1104 && !next_trimmed.starts_with('>')
1105 && !next_trimmed.starts_with('|')
1106 && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1107 && !is_horizontal_rule(next_trimmed)
1108 && !(next_trimmed.starts_with('-')
1109 && !is_horizontal_rule(next_trimmed)
1110 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1111 && !(next_trimmed.starts_with('*')
1112 && !is_horizontal_rule(next_trimmed)
1113 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1114 && !(next_trimmed.starts_with('+')
1115 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1116 && !is_numbered_list_item(next_trimmed)
1117 {
1118 is_single_line_paragraph = false;
1119 }
1120 }
1121
1122 if is_single_line_paragraph && line.chars().count() <= options.line_length {
1124 result.push(line.to_string());
1125 i += 1;
1126 continue;
1127 }
1128
1129 let mut paragraph_parts = Vec::new();
1131 let mut current_part = vec![line];
1132 i += 1;
1133
1134 if options.preserve_breaks {
1136 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1138 Some("\\")
1139 } else if line.ends_with(" ") {
1140 Some(" ")
1141 } else {
1142 None
1143 };
1144 let reflowed = reflow_line(line, options);
1145
1146 if let Some(break_marker) = hard_break_type {
1148 if !reflowed.is_empty() {
1149 let mut reflowed_with_break = reflowed;
1150 let last_idx = reflowed_with_break.len() - 1;
1151 if !has_hard_break(&reflowed_with_break[last_idx]) {
1152 reflowed_with_break[last_idx].push_str(break_marker);
1153 }
1154 result.extend(reflowed_with_break);
1155 }
1156 } else {
1157 result.extend(reflowed);
1158 }
1159 } else {
1160 while i < lines.len() {
1162 let prev_line = if !current_part.is_empty() {
1163 current_part.last().unwrap()
1164 } else {
1165 ""
1166 };
1167 let next_line = lines[i];
1168 let next_trimmed = next_line.trim();
1169
1170 if next_trimmed.is_empty()
1172 || next_trimmed.starts_with('#')
1173 || next_trimmed.starts_with("```")
1174 || next_trimmed.starts_with("~~~")
1175 || next_trimmed.starts_with('>')
1176 || next_trimmed.starts_with('|')
1177 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1178 || is_horizontal_rule(next_trimmed)
1179 || (next_trimmed.starts_with('-')
1180 && !is_horizontal_rule(next_trimmed)
1181 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1182 || (next_trimmed.starts_with('*')
1183 && !is_horizontal_rule(next_trimmed)
1184 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1185 || (next_trimmed.starts_with('+')
1186 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1187 || is_numbered_list_item(next_trimmed)
1188 {
1189 break;
1190 }
1191
1192 if has_hard_break(prev_line) {
1194 paragraph_parts.push(current_part.join(" "));
1196 current_part = vec![next_line];
1197 } else {
1198 current_part.push(next_line);
1199 }
1200 i += 1;
1201 }
1202
1203 if !current_part.is_empty() {
1205 if current_part.len() == 1 {
1206 paragraph_parts.push(current_part[0].to_string());
1208 } else {
1209 paragraph_parts.push(current_part.join(" "));
1210 }
1211 }
1212
1213 for (j, part) in paragraph_parts.iter().enumerate() {
1215 let reflowed = reflow_line(part, options);
1216 result.extend(reflowed);
1217
1218 if j < paragraph_parts.len() - 1 && !result.is_empty() {
1221 let last_idx = result.len() - 1;
1222 if !has_hard_break(&result[last_idx]) {
1223 result[last_idx].push_str(" ");
1224 }
1225 }
1226 }
1227 }
1228 }
1229
1230 let result_text = result.join("\n");
1232 if content.ends_with('\n') && !result_text.ends_with('\n') {
1233 format!("{result_text}\n")
1234 } else {
1235 result_text
1236 }
1237}
1238
1239#[derive(Debug, Clone)]
1241pub struct ParagraphReflow {
1242 pub start_byte: usize,
1244 pub end_byte: usize,
1246 pub reflowed_text: String,
1248}
1249
1250pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1268 if line_number == 0 {
1269 return None;
1270 }
1271
1272 let lines: Vec<&str> = content.lines().collect();
1273
1274 if line_number > lines.len() {
1276 return None;
1277 }
1278
1279 let target_idx = line_number - 1; let target_line = lines[target_idx];
1281 let trimmed = target_line.trim();
1282
1283 if trimmed.is_empty()
1285 || trimmed.starts_with('#')
1286 || trimmed.starts_with("```")
1287 || trimmed.starts_with("~~~")
1288 || target_line.starts_with(" ")
1289 || target_line.starts_with('\t')
1290 || trimmed.starts_with('>')
1291 || trimmed.contains('|') || (trimmed.starts_with('[') && target_line.contains("]:")) || is_horizontal_rule(trimmed)
1294 || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1295 && !is_horizontal_rule(trimmed)
1296 && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1297 || is_numbered_list_item(trimmed)
1298 {
1299 return None;
1300 }
1301
1302 let mut para_start = target_idx;
1304 while para_start > 0 {
1305 let prev_idx = para_start - 1;
1306 let prev_line = lines[prev_idx];
1307 let prev_trimmed = prev_line.trim();
1308
1309 if prev_trimmed.is_empty()
1311 || prev_trimmed.starts_with('#')
1312 || prev_trimmed.starts_with("```")
1313 || prev_trimmed.starts_with("~~~")
1314 || prev_line.starts_with(" ")
1315 || prev_line.starts_with('\t')
1316 || prev_trimmed.starts_with('>')
1317 || prev_trimmed.contains('|')
1318 || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1319 || is_horizontal_rule(prev_trimmed)
1320 || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1321 && !is_horizontal_rule(prev_trimmed)
1322 && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1323 || is_numbered_list_item(prev_trimmed)
1324 {
1325 break;
1326 }
1327
1328 para_start = prev_idx;
1329 }
1330
1331 let mut para_end = target_idx;
1333 while para_end + 1 < lines.len() {
1334 let next_idx = para_end + 1;
1335 let next_line = lines[next_idx];
1336 let next_trimmed = next_line.trim();
1337
1338 if next_trimmed.is_empty()
1340 || next_trimmed.starts_with('#')
1341 || next_trimmed.starts_with("```")
1342 || next_trimmed.starts_with("~~~")
1343 || next_line.starts_with(" ")
1344 || next_line.starts_with('\t')
1345 || next_trimmed.starts_with('>')
1346 || next_trimmed.contains('|')
1347 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1348 || is_horizontal_rule(next_trimmed)
1349 || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1350 && !is_horizontal_rule(next_trimmed)
1351 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1352 || is_numbered_list_item(next_trimmed)
1353 {
1354 break;
1355 }
1356
1357 para_end = next_idx;
1358 }
1359
1360 let paragraph_lines = &lines[para_start..=para_end];
1362
1363 let mut start_byte = 0;
1365 for line in lines.iter().take(para_start) {
1366 start_byte += line.len() + 1; }
1368
1369 let mut end_byte = start_byte;
1370 for line in paragraph_lines.iter() {
1371 end_byte += line.len() + 1; }
1373
1374 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1377
1378 if !includes_trailing_newline {
1380 end_byte -= 1;
1381 }
1382
1383 let paragraph_text = paragraph_lines.join("\n");
1385
1386 let options = ReflowOptions {
1388 line_length,
1389 break_on_sentences: true,
1390 preserve_breaks: false,
1391 sentence_per_line: false,
1392 };
1393
1394 let reflowed = reflow_markdown(¶graph_text, &options);
1396
1397 let reflowed_text = if includes_trailing_newline {
1401 if reflowed.ends_with('\n') {
1403 reflowed
1404 } else {
1405 format!("{reflowed}\n")
1406 }
1407 } else {
1408 if reflowed.ends_with('\n') {
1410 reflowed.trim_end_matches('\n').to_string()
1411 } else {
1412 reflowed
1413 }
1414 };
1415
1416 Some(ParagraphReflow {
1417 start_byte,
1418 end_byte,
1419 reflowed_text,
1420 })
1421}
1422
1423#[cfg(test)]
1424mod tests {
1425 use super::*;
1426
1427 #[test]
1428 fn test_list_item_trailing_whitespace_removal() {
1429 let input = "1. First line with trailing spaces \n Second line with trailing spaces \n Third line\n";
1432
1433 let options = ReflowOptions {
1434 line_length: 999999,
1435 break_on_sentences: true, preserve_breaks: false,
1437 sentence_per_line: false,
1438 };
1439
1440 let result = reflow_markdown(input, &options);
1441
1442 assert!(
1445 !result.contains(" "),
1446 "Result should not contain 3+ consecutive spaces: {result:?}"
1447 );
1448
1449 assert!(result.contains(" \n"), "Hard breaks should be preserved: {result:?}");
1451
1452 assert!(
1455 result.lines().count() >= 2,
1456 "Should have multiple lines (not reflowed due to hard breaks), got: {}",
1457 result.lines().count()
1458 );
1459 }
1460
1461 #[test]
1462 fn test_reflow_simple_text() {
1463 let options = ReflowOptions {
1464 line_length: 20,
1465 ..Default::default()
1466 };
1467
1468 let input = "This is a very long line that needs to be wrapped";
1469 let result = reflow_line(input, &options);
1470
1471 assert_eq!(result.len(), 3);
1472 assert!(result[0].chars().count() <= 20);
1473 assert!(result[1].chars().count() <= 20);
1474 assert!(result[2].chars().count() <= 20);
1475 }
1476
1477 #[test]
1478 fn test_preserve_inline_code() {
1479 let options = ReflowOptions {
1480 line_length: 30,
1481 ..Default::default()
1482 };
1483
1484 let result = reflow_line("This line has `inline code` that should be preserved", &options);
1485 let joined = result.join(" ");
1487 assert!(joined.contains("`inline code`"));
1488 }
1489
1490 #[test]
1491 fn test_preserve_links() {
1492 let options = ReflowOptions {
1493 line_length: 40,
1494 ..Default::default()
1495 };
1496
1497 let text = "Check out [this link](https://example.com/very/long/url) for more info";
1498 let result = reflow_line(text, &options);
1499
1500 let joined = result.join(" ");
1502 assert!(joined.contains("[this link](https://example.com/very/long/url)"));
1503 }
1504
1505 #[test]
1506 fn test_reference_link_patterns_fixed() {
1507 let options = ReflowOptions {
1508 line_length: 30,
1509 break_on_sentences: true,
1510 preserve_breaks: false,
1511 sentence_per_line: false,
1512 };
1513
1514 let test_cases = vec![
1516 ("Check out [text][ref] for details", vec!["[text][ref]"]),
1518 ("See [text][] for info", vec!["[text][]"]),
1520 ("Visit [homepage] today", vec!["[homepage]"]),
1522 (
1524 "Links: [first][ref1] and [second][ref2] here",
1525 vec!["[first][ref1]", "[second][ref2]"],
1526 ),
1527 (
1529 "See [inline](url) and [reference][ref] links",
1530 vec", "[reference][ref]"],
1531 ),
1532 ];
1533
1534 for (input, expected_patterns) in test_cases {
1535 println!("\nTesting: {input}");
1536 let result = reflow_line(input, &options);
1537 let joined = result.join(" ");
1538 println!("Result: {joined}");
1539
1540 for expected_pattern in expected_patterns {
1542 assert!(
1543 joined.contains(expected_pattern),
1544 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1545 );
1546 }
1547
1548 assert!(
1550 !joined.contains("[ ") || !joined.contains("] ["),
1551 "Detected broken reference link pattern with spaces inside brackets in '{joined}'"
1552 );
1553 }
1554 }
1555
1556 #[test]
1557 fn test_sentence_detection_basic() {
1558 assert!(is_sentence_boundary("Hello. World", 5));
1560 assert!(is_sentence_boundary("Test! Another", 4));
1561 assert!(is_sentence_boundary("Question? Answer", 8));
1562
1563 assert!(!is_sentence_boundary("Hello world", 5));
1565 assert!(!is_sentence_boundary("Test.com", 4));
1566 assert!(!is_sentence_boundary("3.14 pi", 1));
1567 }
1568
1569 #[test]
1570 fn test_sentence_detection_abbreviations() {
1571 assert!(!is_sentence_boundary("Mr. Smith", 2));
1573 assert!(!is_sentence_boundary("Dr. Jones", 2));
1574 assert!(!is_sentence_boundary("e.g. example", 3));
1575 assert!(!is_sentence_boundary("i.e. that is", 3));
1576 assert!(!is_sentence_boundary("etc. items", 3));
1577
1578 assert!(is_sentence_boundary("Mr. Smith arrived. Next sentence.", 17));
1580 }
1581
1582 #[test]
1583 fn test_split_into_sentences() {
1584 let text = "First sentence. Second sentence. Third one!";
1585 let sentences = split_into_sentences(text);
1586 assert_eq!(sentences.len(), 3);
1587 assert_eq!(sentences[0], "First sentence.");
1588 assert_eq!(sentences[1], "Second sentence.");
1589 assert_eq!(sentences[2], "Third one!");
1590
1591 let text2 = "Mr. Smith met Dr. Jones.";
1593 let sentences2 = split_into_sentences(text2);
1594 assert_eq!(sentences2.len(), 1);
1595 assert_eq!(sentences2[0], "Mr. Smith met Dr. Jones.");
1596
1597 let text3 = "This is a single sentence.";
1599 let sentences3 = split_into_sentences(text3);
1600 assert_eq!(sentences3.len(), 1);
1601 assert_eq!(sentences3[0], "This is a single sentence.");
1602 }
1603
1604 #[test]
1605 fn test_sentence_per_line_reflow() {
1606 let options = ReflowOptions {
1607 line_length: 80,
1608 break_on_sentences: true,
1609 preserve_breaks: false,
1610 sentence_per_line: true,
1611 };
1612
1613 let input = "First sentence. Second sentence. Third sentence.";
1615 let result = reflow_line(input, &options);
1616 assert_eq!(result.len(), 3);
1617 assert_eq!(result[0], "First sentence.");
1618 assert_eq!(result[1], "Second sentence.");
1619 assert_eq!(result[2], "Third sentence.");
1620
1621 let input2 = "This has **bold**. And [a link](url).";
1623 let result2 = reflow_line(input2, &options);
1624 assert_eq!(result2.len(), 2);
1625 assert_eq!(result2[0], "This has **bold**.");
1626 assert_eq!(result2[1], "And [a link](url).");
1627 }
1628
1629 #[test]
1630 fn test_sentence_per_line_with_backticks() {
1631 let options = ReflowOptions {
1632 line_length: 80,
1633 break_on_sentences: true,
1634 preserve_breaks: false,
1635 sentence_per_line: true,
1636 };
1637
1638 let input = "This sentence has `code` in it. And this has `more code` too.";
1639 let result = reflow_line(input, &options);
1640 assert_eq!(result.len(), 2);
1641 assert_eq!(result[0], "This sentence has `code` in it.");
1642 assert_eq!(result[1], "And this has `more code` too.");
1643 }
1644
1645 #[test]
1646 fn test_sentence_per_line_with_backticks_in_parens() {
1647 let options = ReflowOptions {
1648 line_length: 80,
1649 break_on_sentences: true,
1650 preserve_breaks: false,
1651 sentence_per_line: true,
1652 };
1653
1654 let input = "Configure in (`.rumdl.toml` or `pyproject.toml`). Next sentence.";
1655 let result = reflow_line(input, &options);
1656 assert_eq!(result.len(), 2);
1657 assert_eq!(result[0], "Configure in (`.rumdl.toml` or `pyproject.toml`).");
1658 assert_eq!(result[1], "Next sentence.");
1659 }
1660
1661 #[test]
1662 fn test_sentence_per_line_with_questions_exclamations() {
1663 let options = ReflowOptions {
1664 line_length: 80,
1665 break_on_sentences: true,
1666 preserve_breaks: false,
1667 sentence_per_line: true,
1668 };
1669
1670 let input = "Is this a question? Yes it is! And a statement.";
1671 let result = reflow_line(input, &options);
1672 assert_eq!(result.len(), 3);
1673 assert_eq!(result[0], "Is this a question?");
1674 assert_eq!(result[1], "Yes it is!");
1675 assert_eq!(result[2], "And a statement.");
1676 }
1677
1678 #[test]
1679 fn test_split_sentences_issue_124() {
1680 let text = "If you are sure that all data structures exposed in a `PyModule` are thread-safe, then pass `gil_used = false` as a parameter to the `pymodule` procedural macro declaring the module or call `PyModule::gil_used` on a `PyModule` instance. For example:";
1682
1683 let sentences = split_into_sentences(text);
1684
1685 assert_eq!(sentences.len(), 2, "Should detect 2 sentences in the text");
1689 }
1690
1691 #[test]
1692 fn test_reference_link_edge_cases() {
1693 let options = ReflowOptions {
1694 line_length: 40,
1695 break_on_sentences: true,
1696 preserve_breaks: false,
1697 sentence_per_line: false,
1698 };
1699
1700 let test_cases = vec![
1702 ("Text with \\[escaped\\] brackets", vec!["\\[escaped\\]"]),
1704 (
1706 "Link [text with [nested] content][ref]",
1707 vec!["[text with [nested] content][ref]"],
1708 ),
1709 (
1711 "First [ref][link] then [inline](url)",
1712 vec!["[ref][link]", "[inline](url)"],
1713 ),
1714 ("Array [0] and reference [link] here", vec!["[0]", "[link]"]),
1716 (
1718 "Complex [text with *emphasis*][] reference",
1719 vec!["[text with *emphasis*][]"],
1720 ),
1721 ];
1722
1723 for (input, expected_patterns) in test_cases {
1724 println!("\nTesting edge case: {input}");
1725 let result = reflow_line(input, &options);
1726 let joined = result.join(" ");
1727 println!("Result: {joined}");
1728
1729 for expected_pattern in expected_patterns {
1731 assert!(
1732 joined.contains(expected_pattern),
1733 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1734 );
1735 }
1736 }
1737 }
1738
1739 #[test]
1740 fn test_reflow_with_emphasis() {
1741 let options = ReflowOptions {
1742 line_length: 25,
1743 ..Default::default()
1744 };
1745
1746 let result = reflow_line("This is *emphasized* and **strong** text that needs wrapping", &options);
1747
1748 let joined = result.join(" ");
1750 assert!(joined.contains("*emphasized*"));
1751 assert!(joined.contains("**strong**"));
1752 }
1753
1754 #[test]
1755 fn test_image_patterns_preserved() {
1756 let options = ReflowOptions {
1757 line_length: 30,
1758 ..Default::default()
1759 };
1760
1761 let test_cases = vec for details",
1766 vec"],
1767 ),
1768 ("See ![image][ref] for info", vec!["![image][ref]"]),
1770 ("Visit ![homepage][] today", vec!["![homepage][]"]),
1772 (
1774 "Images:  and ![second][ref2]",
1775 vec", "![second][ref2]"],
1776 ),
1777 ];
1778
1779 for (input, expected_patterns) in test_cases {
1780 println!("\nTesting: {input}");
1781 let result = reflow_line(input, &options);
1782 let joined = result.join(" ");
1783 println!("Result: {joined}");
1784
1785 for expected_pattern in expected_patterns {
1786 assert!(
1787 joined.contains(expected_pattern),
1788 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1789 );
1790 }
1791 }
1792 }
1793
1794 #[test]
1795 fn test_extended_markdown_patterns() {
1796 let options = ReflowOptions {
1797 line_length: 40,
1798 ..Default::default()
1799 };
1800
1801 let test_cases = vec![
1802 ("Text with ~~strikethrough~~ preserved", vec!["~~strikethrough~~"]),
1804 (
1806 "Check [[wiki link]] and [[page|display]]",
1807 vec!["[[wiki link]]", "[[page|display]]"],
1808 ),
1809 (
1811 "Inline $x^2 + y^2$ and display $$\\int f(x) dx$$",
1812 vec!["$x^2 + y^2$", "$$\\int f(x) dx$$"],
1813 ),
1814 ("Use :smile: and :heart: emojis", vec![":smile:", ":heart:"]),
1816 (
1818 "Text with <span>tag</span> and <br/>",
1819 vec!["<span>", "</span>", "<br/>"],
1820 ),
1821 ("Non-breaking space and em—dash", vec![" ", "—"]),
1823 ];
1824
1825 for (input, expected_patterns) in test_cases {
1826 let result = reflow_line(input, &options);
1827 let joined = result.join(" ");
1828
1829 for pattern in expected_patterns {
1830 assert!(
1831 joined.contains(pattern),
1832 "Expected '{pattern}' to be preserved in '{input}', but got '{joined}'"
1833 );
1834 }
1835 }
1836 }
1837
1838 #[test]
1839 fn test_complex_mixed_patterns() {
1840 let options = ReflowOptions {
1841 line_length: 50,
1842 ..Default::default()
1843 };
1844
1845 let input = "Line with **bold**, `code`, [link](url), , ~~strike~~, $math$, :emoji:, and <tag> all together";
1847 let result = reflow_line(input, &options);
1848 let joined = result.join(" ");
1849
1850 assert!(joined.contains("**bold**"));
1852 assert!(joined.contains("`code`"));
1853 assert!(joined.contains("[link](url)"));
1854 assert!(joined.contains(""));
1855 assert!(joined.contains("~~strike~~"));
1856 assert!(joined.contains("$math$"));
1857 assert!(joined.contains(":emoji:"));
1858 assert!(joined.contains("<tag>"));
1859 }
1860
1861 #[test]
1862 fn test_footnote_patterns_preserved() {
1863 let options = ReflowOptions {
1864 line_length: 40,
1865 ..Default::default()
1866 };
1867
1868 let test_cases = vec![
1869 ("This has a footnote[^1] reference", vec!["[^1]"]),
1871 ("Text with [^first] and [^second] notes", vec!["[^first]", "[^second]"]),
1873 ("Reference to [^long-footnote-name] here", vec!["[^long-footnote-name]"]),
1875 ];
1876
1877 for (input, expected_patterns) in test_cases {
1878 let result = reflow_line(input, &options);
1879 let joined = result.join(" ");
1880
1881 for expected_pattern in expected_patterns {
1882 assert!(
1883 joined.contains(expected_pattern),
1884 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1885 );
1886 }
1887 }
1888 }
1889
1890 #[test]
1891 fn test_reflow_markdown_numbered_lists() {
1892 let options = ReflowOptions {
1894 line_length: 50,
1895 ..Default::default()
1896 };
1897
1898 let content = r#"1. List `manifest` to find the manifest with the largest ID. Say it's `00000000000000000002.manifest` in this example.
18992. Short item
19003. Another long item that definitely exceeds the fifty character limit and needs wrapping"#;
1901
1902 let result = reflow_markdown(content, &options);
1903
1904 let expected = r#"1. List `manifest` to find the manifest with the
1906 largest ID. Say it's
1907 `00000000000000000002.manifest` in this
1908 example.
19092. Short item
19103. Another long item that definitely exceeds the
1911 fifty character limit and needs wrapping"#;
1912
1913 assert_eq!(
1914 result, expected,
1915 "Numbered lists should be reflowed with proper markers and indentation.\nExpected:\n{expected}\nGot:\n{result}"
1916 );
1917 }
1918
1919 #[test]
1920 fn test_reflow_markdown_bullet_lists() {
1921 let options = ReflowOptions {
1922 line_length: 40,
1923 ..Default::default()
1924 };
1925
1926 let content = r#"- First bullet point with a very long line that needs wrapping
1927* Second bullet using asterisk
1928+ Third bullet using plus sign
1929- Short one"#;
1930
1931 let result = reflow_markdown(content, &options);
1932
1933 let expected = r#"- First bullet point with a very long
1935 line that needs wrapping
1936* Second bullet using asterisk
1937+ Third bullet using plus sign
1938- Short one"#;
1939
1940 assert_eq!(
1941 result, expected,
1942 "Bullet lists should preserve markers and indent continuations with 2 spaces.\nExpected:\n{expected}\nGot:\n{result}"
1943 );
1944 }
1945
1946 #[test]
1947 fn test_ie_abbreviation_split_debug() {
1948 let input = "This results in extracting directly from the input object, i.e. `obj.extract()`, rather than trying to access an item or attribute.";
1949
1950 let options = ReflowOptions {
1951 line_length: 80,
1952 break_on_sentences: true,
1953 preserve_breaks: false,
1954 sentence_per_line: true,
1955 };
1956
1957 let result = reflow_line(input, &options);
1958
1959 assert_eq!(result.len(), 1, "Should not split after i.e. abbreviation");
1961 }
1962
1963 #[test]
1964 fn test_ie_abbreviation_paragraph() {
1965 let input = "The `pyo3(transparent)` attribute can be used on structs with exactly one field.\nThis results in extracting directly from the input object, i.e. `obj.extract()`, rather than trying to access an item or attribute.\nThis behaviour is enabled per default for newtype structs and tuple-variants with a single field.";
1967
1968 let options = ReflowOptions {
1969 line_length: 80,
1970 break_on_sentences: true,
1971 preserve_breaks: false,
1972 sentence_per_line: true,
1973 };
1974
1975 let result = reflow_markdown(input, &options);
1976
1977 let line_count = result.lines().count();
1979 assert_eq!(line_count, 3, "Should have 3 sentences, not {line_count}");
1980
1981 let lines: Vec<&str> = result.lines().collect();
1983 assert!(lines.len() >= 2, "Should have at least 2 lines");
1984 assert!(lines[1].contains("i.e."), "Second line should contain 'i.e.'");
1985 assert!(
1986 lines[1].contains("`obj.extract()`"),
1987 "Second line should contain the code span"
1988 );
1989 assert!(
1990 lines[1].contains("attribute."),
1991 "Second line should end with 'attribute.'"
1992 );
1993 }
1994}