1use crate::utils::is_definition_list_item;
7use crate::utils::regex_cache::{
8 DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
9 INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
10 SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
11};
12#[derive(Clone)]
14pub struct ReflowOptions {
15 pub line_length: usize,
17 pub break_on_sentences: bool,
19 pub preserve_breaks: bool,
21 pub sentence_per_line: bool,
23}
24
25impl Default for ReflowOptions {
26 fn default() -> Self {
27 Self {
28 line_length: 80,
29 break_on_sentences: true,
30 preserve_breaks: false,
31 sentence_per_line: false,
32 }
33 }
34}
35
36fn is_sentence_boundary(text: &str, pos: usize) -> bool {
39 let chars: Vec<char> = text.chars().collect();
40
41 if pos + 1 >= chars.len() {
42 return false;
43 }
44
45 let c = chars[pos];
47 if c != '.' && c != '!' && c != '?' {
48 return false;
49 }
50
51 if chars[pos + 1] != ' ' {
53 return false;
54 }
55
56 let mut next_char_pos = pos + 2;
58 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
59 next_char_pos += 1;
60 }
61
62 if next_char_pos >= chars.len() {
64 return false;
65 }
66
67 if !chars[next_char_pos].is_uppercase() {
69 return false;
70 }
71
72 if pos > 0 {
74 let prev_word = &text[..pos];
76 let ignored_words = [
77 "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr", "Jr",
78 ];
79 for word in &ignored_words {
80 if prev_word.to_lowercase().ends_with(&word.to_lowercase()) {
81 return false;
82 }
83 }
84
85 if pos > 0 && chars[pos - 1].is_numeric() && next_char_pos < chars.len() && chars[next_char_pos].is_numeric() {
88 return false;
89 }
90 }
91 true
92}
93
94pub fn split_into_sentences(text: &str) -> Vec<String> {
96 let mut sentences = Vec::new();
97 let mut current_sentence = String::new();
98 let mut chars = text.chars().peekable();
99 let mut pos = 0;
100
101 while let Some(c) = chars.next() {
102 current_sentence.push(c);
103
104 if is_sentence_boundary(text, pos) {
105 if chars.peek() == Some(&' ') {
107 chars.next();
108 pos += 1;
109 }
110 sentences.push(current_sentence.trim().to_string());
111 current_sentence.clear();
112 }
113
114 pos += 1;
115 }
116
117 if !current_sentence.trim().is_empty() {
119 sentences.push(current_sentence.trim().to_string());
120 }
121 sentences
122}
123
124fn is_horizontal_rule(line: &str) -> bool {
126 if line.len() < 3 {
127 return false;
128 }
129
130 let chars: Vec<char> = line.chars().collect();
132 if chars.is_empty() {
133 return false;
134 }
135
136 let first_char = chars[0];
137 if first_char != '-' && first_char != '_' && first_char != '*' {
138 return false;
139 }
140
141 for c in &chars {
143 if *c != first_char && *c != ' ' {
144 return false;
145 }
146 }
147
148 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
150 non_space_count >= 3
151}
152
153fn is_numbered_list_item(line: &str) -> bool {
155 let mut chars = line.chars();
156
157 if !chars.next().is_some_and(|c| c.is_numeric()) {
159 return false;
160 }
161
162 while let Some(c) = chars.next() {
164 if c == '.' {
165 return chars.next().is_none_or(|c| c == ' ');
167 }
168 if !c.is_numeric() {
169 return false;
170 }
171 }
172
173 false
174}
175
176fn has_hard_break(line: &str) -> bool {
182 let line = line.strip_suffix('\r').unwrap_or(line);
183 line.ends_with(" ") || line.ends_with('\\')
184}
185
186fn trim_preserving_hard_break(s: &str) -> String {
192 let s = s.strip_suffix('\r').unwrap_or(s);
194
195 if s.ends_with('\\') {
197 return s.to_string();
199 }
200
201 if s.ends_with(" ") {
203 let content_end = s.trim_end().len();
205 if content_end == 0 {
206 return String::new();
208 }
209 format!("{} ", &s[..content_end])
211 } else {
212 s.trim_end().to_string()
214 }
215}
216
217pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
218 if options.sentence_per_line {
220 let elements = parse_markdown_elements(line);
221 return reflow_elements_sentence_per_line(&elements);
222 }
223
224 if line.chars().count() <= options.line_length {
226 return vec![line.to_string()];
227 }
228
229 let elements = parse_markdown_elements(line);
231
232 reflow_elements(&elements, options)
234}
235
236#[derive(Debug, Clone)]
238enum Element {
239 Text(String),
241 Link { text: String, url: String },
243 ReferenceLink { text: String, reference: String },
245 EmptyReferenceLink { text: String },
247 ShortcutReference { reference: String },
249 InlineImage { alt: String, url: String },
251 ReferenceImage { alt: String, reference: String },
253 EmptyReferenceImage { alt: String },
255 FootnoteReference { note: String },
257 Strikethrough(String),
259 WikiLink(String),
261 InlineMath(String),
263 DisplayMath(String),
265 EmojiShortcode(String),
267 HtmlTag(String),
269 HtmlEntity(String),
271 Code(String),
273 Bold(String),
275 Italic(String),
277}
278
279impl std::fmt::Display for Element {
280 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
281 match self {
282 Element::Text(s) => write!(f, "{s}"),
283 Element::Link { text, url } => write!(f, "[{text}]({url})"),
284 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
285 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
286 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
287 Element::InlineImage { alt, url } => write!(f, ""),
288 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
289 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
290 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
291 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
292 Element::WikiLink(s) => write!(f, "[[{s}]]"),
293 Element::InlineMath(s) => write!(f, "${s}$"),
294 Element::DisplayMath(s) => write!(f, "$${s}$$"),
295 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
296 Element::HtmlTag(s) => write!(f, "{s}"),
297 Element::HtmlEntity(s) => write!(f, "{s}"),
298 Element::Code(s) => write!(f, "`{s}`"),
299 Element::Bold(s) => write!(f, "**{s}**"),
300 Element::Italic(s) => write!(f, "*{s}*"),
301 }
302 }
303}
304
305impl Element {
306 fn len(&self) -> usize {
307 match self {
308 Element::Text(s) => s.chars().count(),
309 Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, Element::EmptyReferenceLink { text } => text.chars().count() + 4, Element::ShortcutReference { reference } => reference.chars().count() + 2, Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, Element::FootnoteReference { note } => note.chars().count() + 3, Element::Strikethrough(s) => s.chars().count() + 4, Element::WikiLink(s) => s.chars().count() + 4, Element::InlineMath(s) => s.chars().count() + 2, Element::DisplayMath(s) => s.chars().count() + 4, Element::EmojiShortcode(s) => s.chars().count() + 2, Element::HtmlTag(s) => s.chars().count(), Element::HtmlEntity(s) => s.chars().count(), Element::Code(s) => s.chars().count() + 2, Element::Bold(s) => s.chars().count() + 4, Element::Italic(s) => s.chars().count() + 2, }
328 }
329}
330
331fn parse_markdown_elements(text: &str) -> Vec<Element> {
340 let mut elements = Vec::new();
341 let mut remaining = text;
342
343 while !remaining.is_empty() {
344 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
346
347 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
350 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
351 {
352 earliest_match = Some((m.start(), "inline_image", m));
353 }
354
355 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
357 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
358 {
359 earliest_match = Some((m.start(), "ref_image", m));
360 }
361
362 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
364 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
365 {
366 earliest_match = Some((m.start(), "footnote_ref", m));
367 }
368
369 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
371 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
372 {
373 earliest_match = Some((m.start(), "inline_link", m));
374 }
375
376 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
378 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
379 {
380 earliest_match = Some((m.start(), "ref_link", m));
381 }
382
383 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
386 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
387 {
388 earliest_match = Some((m.start(), "shortcut_ref", m));
389 }
390
391 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
393 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
394 {
395 earliest_match = Some((m.start(), "wiki_link", m));
396 }
397
398 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
400 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
401 {
402 earliest_match = Some((m.start(), "display_math", m));
403 }
404
405 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
407 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
408 {
409 earliest_match = Some((m.start(), "inline_math", m));
410 }
411
412 if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
414 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
415 {
416 earliest_match = Some((m.start(), "strikethrough", m));
417 }
418
419 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
421 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
422 {
423 earliest_match = Some((m.start(), "emoji", m));
424 }
425
426 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
428 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
429 {
430 earliest_match = Some((m.start(), "html_entity", m));
431 }
432
433 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
436 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
437 {
438 let matched_text = &remaining[m.start()..m.end()];
440 let is_autolink = matched_text.starts_with("<http://")
441 || matched_text.starts_with("<https://")
442 || matched_text.starts_with("<mailto:")
443 || matched_text.starts_with("<ftp://")
444 || matched_text.starts_with("<ftps://");
445
446 if !is_autolink {
447 earliest_match = Some((m.start(), "html_tag", m));
448 }
449 }
450
451 let mut next_special = remaining.len();
453 let mut special_type = "";
454
455 if let Some(pos) = remaining.find('`')
456 && pos < next_special
457 {
458 next_special = pos;
459 special_type = "code";
460 }
461 if let Some(pos) = remaining.find("**")
462 && pos < next_special
463 {
464 next_special = pos;
465 special_type = "bold";
466 }
467 if let Some(pos) = remaining.find('*')
468 && pos < next_special
469 && !remaining[pos..].starts_with("**")
470 {
471 next_special = pos;
472 special_type = "italic";
473 }
474
475 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
477 pos < next_special
478 } else {
479 false
480 };
481
482 if should_process_markdown_link {
483 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
484
485 if pos > 0 {
487 elements.push(Element::Text(remaining[..pos].to_string()));
488 }
489
490 match pattern_type {
492 "inline_image" => {
493 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
494 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
495 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
496 elements.push(Element::InlineImage {
497 alt: alt.to_string(),
498 url: url.to_string(),
499 });
500 remaining = &remaining[match_obj.end()..];
501 } else {
502 elements.push(Element::Text("!".to_string()));
503 remaining = &remaining[1..];
504 }
505 }
506 "ref_image" => {
507 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
508 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
509 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
510
511 if reference.is_empty() {
512 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
513 } else {
514 elements.push(Element::ReferenceImage {
515 alt: alt.to_string(),
516 reference: reference.to_string(),
517 });
518 }
519 remaining = &remaining[match_obj.end()..];
520 } else {
521 elements.push(Element::Text("!".to_string()));
522 remaining = &remaining[1..];
523 }
524 }
525 "footnote_ref" => {
526 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
527 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
528 elements.push(Element::FootnoteReference { note: note.to_string() });
529 remaining = &remaining[match_obj.end()..];
530 } else {
531 elements.push(Element::Text("[".to_string()));
532 remaining = &remaining[1..];
533 }
534 }
535 "inline_link" => {
536 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
537 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
538 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
539 elements.push(Element::Link {
540 text: text.to_string(),
541 url: url.to_string(),
542 });
543 remaining = &remaining[match_obj.end()..];
544 } else {
545 elements.push(Element::Text("[".to_string()));
547 remaining = &remaining[1..];
548 }
549 }
550 "ref_link" => {
551 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
552 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
553 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
554
555 if reference.is_empty() {
556 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
558 } else {
559 elements.push(Element::ReferenceLink {
561 text: text.to_string(),
562 reference: reference.to_string(),
563 });
564 }
565 remaining = &remaining[match_obj.end()..];
566 } else {
567 elements.push(Element::Text("[".to_string()));
569 remaining = &remaining[1..];
570 }
571 }
572 "shortcut_ref" => {
573 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
574 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
575 elements.push(Element::ShortcutReference {
576 reference: reference.to_string(),
577 });
578 remaining = &remaining[match_obj.end()..];
579 } else {
580 elements.push(Element::Text("[".to_string()));
582 remaining = &remaining[1..];
583 }
584 }
585 "wiki_link" => {
586 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
587 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
588 elements.push(Element::WikiLink(content.to_string()));
589 remaining = &remaining[match_obj.end()..];
590 } else {
591 elements.push(Element::Text("[[".to_string()));
592 remaining = &remaining[2..];
593 }
594 }
595 "display_math" => {
596 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
597 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
598 elements.push(Element::DisplayMath(math.to_string()));
599 remaining = &remaining[match_obj.end()..];
600 } else {
601 elements.push(Element::Text("$$".to_string()));
602 remaining = &remaining[2..];
603 }
604 }
605 "inline_math" => {
606 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
607 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
608 elements.push(Element::InlineMath(math.to_string()));
609 remaining = &remaining[match_obj.end()..];
610 } else {
611 elements.push(Element::Text("$".to_string()));
612 remaining = &remaining[1..];
613 }
614 }
615 "strikethrough" => {
616 if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
617 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
618 elements.push(Element::Strikethrough(text.to_string()));
619 remaining = &remaining[match_obj.end()..];
620 } else {
621 elements.push(Element::Text("~~".to_string()));
622 remaining = &remaining[2..];
623 }
624 }
625 "emoji" => {
626 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
627 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
628 elements.push(Element::EmojiShortcode(emoji.to_string()));
629 remaining = &remaining[match_obj.end()..];
630 } else {
631 elements.push(Element::Text(":".to_string()));
632 remaining = &remaining[1..];
633 }
634 }
635 "html_entity" => {
636 elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
638 remaining = &remaining[match_obj.end()..];
639 }
640 "html_tag" => {
641 elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
643 remaining = &remaining[match_obj.end()..];
644 }
645 _ => {
646 elements.push(Element::Text("[".to_string()));
648 remaining = &remaining[1..];
649 }
650 }
651 } else {
652 if next_special > 0 && next_special < remaining.len() {
656 elements.push(Element::Text(remaining[..next_special].to_string()));
657 remaining = &remaining[next_special..];
658 }
659
660 match special_type {
662 "code" => {
663 if let Some(code_end) = remaining[1..].find('`') {
665 let code = &remaining[1..1 + code_end];
666 elements.push(Element::Code(code.to_string()));
667 remaining = &remaining[1 + code_end + 1..];
668 } else {
669 elements.push(Element::Text(remaining.to_string()));
671 break;
672 }
673 }
674 "bold" => {
675 if let Some(bold_end) = remaining[2..].find("**") {
677 let bold_text = &remaining[2..2 + bold_end];
678 elements.push(Element::Bold(bold_text.to_string()));
679 remaining = &remaining[2 + bold_end + 2..];
680 } else {
681 elements.push(Element::Text("**".to_string()));
683 remaining = &remaining[2..];
684 }
685 }
686 "italic" => {
687 if let Some(italic_end) = remaining[1..].find('*') {
689 let italic_text = &remaining[1..1 + italic_end];
690 elements.push(Element::Italic(italic_text.to_string()));
691 remaining = &remaining[1 + italic_end + 1..];
692 } else {
693 elements.push(Element::Text("*".to_string()));
695 remaining = &remaining[1..];
696 }
697 }
698 _ => {
699 elements.push(Element::Text(remaining.to_string()));
701 break;
702 }
703 }
704 }
705 }
706
707 elements
708}
709
710fn reflow_elements_sentence_per_line(elements: &[Element]) -> Vec<String> {
712 let mut lines = Vec::new();
713 let mut current_line = String::new();
714
715 for element in elements.iter() {
716 let element_str = format!("{element}");
717
718 if let Element::Text(text) = element {
720 let combined = format!("{current_line}{text}");
722 let sentences = split_into_sentences(&combined);
723
724 if sentences.len() > 1 {
725 for (i, sentence) in sentences.iter().enumerate() {
727 if i == 0 {
728 let trimmed = sentence.trim();
731 let ends_with_sentence_punct =
732 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
733 let ends_with_abbreviation = if ends_with_sentence_punct {
734 let without_punct = trimmed
736 .trim_end_matches('.')
737 .trim_end_matches('!')
738 .trim_end_matches('?');
739 let ignored_words = [
740 "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr",
741 "Jr",
742 ];
743 ignored_words
744 .iter()
745 .any(|word| without_punct.to_lowercase().ends_with(&word.to_lowercase()))
746 } else {
747 false
748 };
749
750 if ends_with_abbreviation {
751 current_line = sentence.to_string();
753 } else {
754 lines.push(sentence.to_string());
756 current_line.clear();
757 }
758 } else if i == sentences.len() - 1 {
759 let trimmed = sentence.trim();
761 let ends_with_sentence_punct =
762 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
763
764 let ends_with_abbreviation = if ends_with_sentence_punct {
766 let without_punct = trimmed
768 .trim_end_matches('.')
769 .trim_end_matches('!')
770 .trim_end_matches('?');
771 let ignored_words = [
772 "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr",
773 "Jr",
774 ];
775 ignored_words
776 .iter()
777 .any(|word| without_punct.to_lowercase().ends_with(&word.to_lowercase()))
778 } else {
779 false
780 };
781
782 if ends_with_sentence_punct && !ends_with_abbreviation {
783 lines.push(sentence.to_string());
785 current_line.clear();
786 } else {
787 current_line = sentence.to_string();
789 }
790 } else {
791 lines.push(sentence.to_string());
793 }
794 }
795 } else {
796 current_line = combined;
798 }
799 } else {
800 if !current_line.is_empty()
803 && !current_line.ends_with(' ')
804 && !current_line.ends_with('(')
805 && !current_line.ends_with('[')
806 {
807 current_line.push(' ');
808 }
809 current_line.push_str(&element_str);
810 }
811 }
812
813 if !current_line.is_empty() {
815 lines.push(current_line.trim().to_string());
816 }
817 lines
818}
819
820fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
822 let mut lines = Vec::new();
823 let mut current_line = String::new();
824 let mut current_length = 0;
825
826 for element in elements {
827 let element_str = format!("{element}");
828 let element_len = element.len();
829
830 if let Element::Text(text) = element {
832 let words: Vec<&str> = text.split_whitespace().collect();
834
835 for word in words {
836 let word_len = word.chars().count();
837 if current_length > 0 && current_length + 1 + word_len > options.line_length {
838 lines.push(current_line.trim().to_string());
840 current_line = word.to_string();
841 current_length = word_len;
842 } else {
843 if current_length > 0 {
845 current_line.push(' ');
846 current_length += 1;
847 }
848 current_line.push_str(word);
849 current_length += word_len;
850 }
851 }
852 } else {
853 if current_length > 0 && current_length + 1 + element_len > options.line_length {
856 lines.push(current_line.trim().to_string());
858 current_line = element_str;
859 current_length = element_len;
860 } else {
861 if current_length > 0 {
863 current_line.push(' ');
864 current_length += 1;
865 }
866 current_line.push_str(&element_str);
867 current_length += element_len;
868 }
869 }
870 }
871
872 if !current_line.is_empty() {
874 lines.push(current_line.trim_end().to_string());
875 }
876
877 lines
878}
879
880pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
882 let lines: Vec<&str> = content.lines().collect();
883 let mut result = Vec::new();
884 let mut i = 0;
885
886 while i < lines.len() {
887 let line = lines[i];
888 let trimmed = line.trim();
889
890 if trimmed.is_empty() {
892 result.push(String::new());
893 i += 1;
894 continue;
895 }
896
897 if trimmed.starts_with('#') {
899 result.push(line.to_string());
900 i += 1;
901 continue;
902 }
903
904 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
906 result.push(line.to_string());
907 i += 1;
908 while i < lines.len() {
910 result.push(lines[i].to_string());
911 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
912 i += 1;
913 break;
914 }
915 i += 1;
916 }
917 continue;
918 }
919
920 if line.starts_with(" ") || line.starts_with("\t") {
922 result.push(line.to_string());
924 i += 1;
925 while i < lines.len() {
926 let next_line = lines[i];
927 if next_line.starts_with(" ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
929 result.push(next_line.to_string());
930 i += 1;
931 } else {
932 break;
933 }
934 }
935 continue;
936 }
937
938 if trimmed.starts_with('>') {
940 let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
941 let quote_content = &line[quote_prefix.len()..].trim_start();
942
943 let reflowed = reflow_line(quote_content, options);
944 for reflowed_line in reflowed.iter() {
945 result.push(format!("{quote_prefix} {reflowed_line}"));
946 }
947 i += 1;
948 continue;
949 }
950
951 if is_horizontal_rule(trimmed) {
953 result.push(line.to_string());
954 i += 1;
955 continue;
956 }
957
958 if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
960 || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
961 || trimmed.starts_with('+')
962 || is_numbered_list_item(trimmed)
963 {
964 let indent = line.len() - line.trim_start().len();
966 let indent_str = " ".repeat(indent);
967
968 let mut marker_end = indent;
971 let mut content_start = indent;
972
973 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
974 if let Some(period_pos) = line[indent..].find('.') {
976 marker_end = indent + period_pos + 1; content_start = marker_end;
978 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
980 content_start += 1;
981 }
982 }
983 } else {
984 marker_end = indent + 1; content_start = marker_end;
987 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
989 content_start += 1;
990 }
991 }
992
993 let marker = &line[indent..marker_end];
994
995 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
998 i += 1;
999
1000 while i < lines.len() {
1002 let next_line = lines[i];
1003 let next_trimmed = next_line.trim();
1004
1005 if next_trimmed.is_empty()
1007 || next_trimmed.starts_with('#')
1008 || next_trimmed.starts_with("```")
1009 || next_trimmed.starts_with("~~~")
1010 || next_trimmed.starts_with('>')
1011 || next_trimmed.starts_with('|')
1012 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1013 || is_horizontal_rule(next_trimmed)
1014 || (next_trimmed.starts_with('-')
1015 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1016 || (next_trimmed.starts_with('*')
1017 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1018 || (next_trimmed.starts_with('+')
1019 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1020 || is_numbered_list_item(next_trimmed)
1021 || is_definition_list_item(next_trimmed)
1022 {
1023 break;
1024 }
1025
1026 let next_indent = next_line.len() - next_line.trim_start().len();
1028 if next_indent >= content_start {
1029 let trimmed_start = next_line.trim_start();
1032 list_content.push(trim_preserving_hard_break(trimmed_start));
1033 i += 1;
1034 } else {
1035 break;
1037 }
1038 }
1039
1040 let combined_content = if options.preserve_breaks {
1043 list_content[0].clone()
1044 } else {
1045 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1047 if has_hard_breaks {
1048 list_content.join("\n")
1050 } else {
1051 list_content.join(" ")
1053 }
1054 };
1055
1056 let trimmed_marker = marker;
1058 let continuation_spaces = content_start;
1059
1060 let prefix_length = indent + trimmed_marker.len() + 1;
1062
1063 let adjusted_options = ReflowOptions {
1065 line_length: options.line_length.saturating_sub(prefix_length),
1066 ..options.clone()
1067 };
1068
1069 let reflowed = reflow_line(&combined_content, &adjusted_options);
1070 for (j, reflowed_line) in reflowed.iter().enumerate() {
1071 if j == 0 {
1072 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1073 } else {
1074 let continuation_indent = " ".repeat(continuation_spaces);
1076 result.push(format!("{continuation_indent}{reflowed_line}"));
1077 }
1078 }
1079 continue;
1080 }
1081
1082 if trimmed.contains('|') {
1084 result.push(line.to_string());
1085 i += 1;
1086 continue;
1087 }
1088
1089 if trimmed.starts_with('[') && line.contains("]:") {
1091 result.push(line.to_string());
1092 i += 1;
1093 continue;
1094 }
1095
1096 if is_definition_list_item(trimmed) {
1098 result.push(line.to_string());
1099 i += 1;
1100 continue;
1101 }
1102
1103 let mut is_single_line_paragraph = true;
1105 if i + 1 < lines.len() {
1106 let next_line = lines[i + 1];
1107 let next_trimmed = next_line.trim();
1108 if !next_trimmed.is_empty()
1110 && !next_trimmed.starts_with('#')
1111 && !next_trimmed.starts_with("```")
1112 && !next_trimmed.starts_with("~~~")
1113 && !next_trimmed.starts_with('>')
1114 && !next_trimmed.starts_with('|')
1115 && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1116 && !is_horizontal_rule(next_trimmed)
1117 && !(next_trimmed.starts_with('-')
1118 && !is_horizontal_rule(next_trimmed)
1119 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1120 && !(next_trimmed.starts_with('*')
1121 && !is_horizontal_rule(next_trimmed)
1122 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1123 && !(next_trimmed.starts_with('+')
1124 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1125 && !is_numbered_list_item(next_trimmed)
1126 {
1127 is_single_line_paragraph = false;
1128 }
1129 }
1130
1131 if is_single_line_paragraph && line.chars().count() <= options.line_length {
1133 result.push(line.to_string());
1134 i += 1;
1135 continue;
1136 }
1137
1138 let mut paragraph_parts = Vec::new();
1140 let mut current_part = vec![line];
1141 i += 1;
1142
1143 if options.preserve_breaks {
1145 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1147 Some("\\")
1148 } else if line.ends_with(" ") {
1149 Some(" ")
1150 } else {
1151 None
1152 };
1153 let reflowed = reflow_line(line, options);
1154
1155 if let Some(break_marker) = hard_break_type {
1157 if !reflowed.is_empty() {
1158 let mut reflowed_with_break = reflowed;
1159 let last_idx = reflowed_with_break.len() - 1;
1160 if !has_hard_break(&reflowed_with_break[last_idx]) {
1161 reflowed_with_break[last_idx].push_str(break_marker);
1162 }
1163 result.extend(reflowed_with_break);
1164 }
1165 } else {
1166 result.extend(reflowed);
1167 }
1168 } else {
1169 while i < lines.len() {
1171 let prev_line = if !current_part.is_empty() {
1172 current_part.last().unwrap()
1173 } else {
1174 ""
1175 };
1176 let next_line = lines[i];
1177 let next_trimmed = next_line.trim();
1178
1179 if next_trimmed.is_empty()
1181 || next_trimmed.starts_with('#')
1182 || next_trimmed.starts_with("```")
1183 || next_trimmed.starts_with("~~~")
1184 || next_trimmed.starts_with('>')
1185 || next_trimmed.starts_with('|')
1186 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1187 || is_horizontal_rule(next_trimmed)
1188 || (next_trimmed.starts_with('-')
1189 && !is_horizontal_rule(next_trimmed)
1190 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1191 || (next_trimmed.starts_with('*')
1192 && !is_horizontal_rule(next_trimmed)
1193 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1194 || (next_trimmed.starts_with('+')
1195 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1196 || is_numbered_list_item(next_trimmed)
1197 || is_definition_list_item(next_trimmed)
1198 {
1199 break;
1200 }
1201
1202 if has_hard_break(prev_line) {
1204 paragraph_parts.push(current_part.join(" "));
1206 current_part = vec![next_line];
1207 } else {
1208 current_part.push(next_line);
1209 }
1210 i += 1;
1211 }
1212
1213 if !current_part.is_empty() {
1215 if current_part.len() == 1 {
1216 paragraph_parts.push(current_part[0].to_string());
1218 } else {
1219 paragraph_parts.push(current_part.join(" "));
1220 }
1221 }
1222
1223 for (j, part) in paragraph_parts.iter().enumerate() {
1225 let reflowed = reflow_line(part, options);
1226 result.extend(reflowed);
1227
1228 if j < paragraph_parts.len() - 1 && !result.is_empty() {
1231 let last_idx = result.len() - 1;
1232 if !has_hard_break(&result[last_idx]) {
1233 result[last_idx].push_str(" ");
1234 }
1235 }
1236 }
1237 }
1238 }
1239
1240 let result_text = result.join("\n");
1242 if content.ends_with('\n') && !result_text.ends_with('\n') {
1243 format!("{result_text}\n")
1244 } else {
1245 result_text
1246 }
1247}
1248
1249#[derive(Debug, Clone)]
1251pub struct ParagraphReflow {
1252 pub start_byte: usize,
1254 pub end_byte: usize,
1256 pub reflowed_text: String,
1258}
1259
1260pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1278 if line_number == 0 {
1279 return None;
1280 }
1281
1282 let lines: Vec<&str> = content.lines().collect();
1283
1284 if line_number > lines.len() {
1286 return None;
1287 }
1288
1289 let target_idx = line_number - 1; let target_line = lines[target_idx];
1291 let trimmed = target_line.trim();
1292
1293 if trimmed.is_empty()
1295 || trimmed.starts_with('#')
1296 || trimmed.starts_with("```")
1297 || trimmed.starts_with("~~~")
1298 || target_line.starts_with(" ")
1299 || target_line.starts_with('\t')
1300 || trimmed.starts_with('>')
1301 || trimmed.contains('|') || (trimmed.starts_with('[') && target_line.contains("]:")) || is_horizontal_rule(trimmed)
1304 || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1305 && !is_horizontal_rule(trimmed)
1306 && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1307 || is_numbered_list_item(trimmed)
1308 || is_definition_list_item(trimmed)
1309 {
1310 return None;
1311 }
1312
1313 let mut para_start = target_idx;
1315 while para_start > 0 {
1316 let prev_idx = para_start - 1;
1317 let prev_line = lines[prev_idx];
1318 let prev_trimmed = prev_line.trim();
1319
1320 if prev_trimmed.is_empty()
1322 || prev_trimmed.starts_with('#')
1323 || prev_trimmed.starts_with("```")
1324 || prev_trimmed.starts_with("~~~")
1325 || prev_line.starts_with(" ")
1326 || prev_line.starts_with('\t')
1327 || prev_trimmed.starts_with('>')
1328 || prev_trimmed.contains('|')
1329 || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1330 || is_horizontal_rule(prev_trimmed)
1331 || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1332 && !is_horizontal_rule(prev_trimmed)
1333 && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1334 || is_numbered_list_item(prev_trimmed)
1335 || is_definition_list_item(prev_trimmed)
1336 {
1337 break;
1338 }
1339
1340 para_start = prev_idx;
1341 }
1342
1343 let mut para_end = target_idx;
1345 while para_end + 1 < lines.len() {
1346 let next_idx = para_end + 1;
1347 let next_line = lines[next_idx];
1348 let next_trimmed = next_line.trim();
1349
1350 if next_trimmed.is_empty()
1352 || next_trimmed.starts_with('#')
1353 || next_trimmed.starts_with("```")
1354 || next_trimmed.starts_with("~~~")
1355 || next_line.starts_with(" ")
1356 || next_line.starts_with('\t')
1357 || next_trimmed.starts_with('>')
1358 || next_trimmed.contains('|')
1359 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1360 || is_horizontal_rule(next_trimmed)
1361 || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1362 && !is_horizontal_rule(next_trimmed)
1363 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1364 || is_numbered_list_item(next_trimmed)
1365 || is_definition_list_item(next_trimmed)
1366 {
1367 break;
1368 }
1369
1370 para_end = next_idx;
1371 }
1372
1373 let paragraph_lines = &lines[para_start..=para_end];
1375
1376 let mut start_byte = 0;
1378 for line in lines.iter().take(para_start) {
1379 start_byte += line.len() + 1; }
1381
1382 let mut end_byte = start_byte;
1383 for line in paragraph_lines.iter() {
1384 end_byte += line.len() + 1; }
1386
1387 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1390
1391 if !includes_trailing_newline {
1393 end_byte -= 1;
1394 }
1395
1396 let paragraph_text = paragraph_lines.join("\n");
1398
1399 let options = ReflowOptions {
1401 line_length,
1402 break_on_sentences: true,
1403 preserve_breaks: false,
1404 sentence_per_line: false,
1405 };
1406
1407 let reflowed = reflow_markdown(¶graph_text, &options);
1409
1410 let reflowed_text = if includes_trailing_newline {
1414 if reflowed.ends_with('\n') {
1416 reflowed
1417 } else {
1418 format!("{reflowed}\n")
1419 }
1420 } else {
1421 if reflowed.ends_with('\n') {
1423 reflowed.trim_end_matches('\n').to_string()
1424 } else {
1425 reflowed
1426 }
1427 };
1428
1429 Some(ParagraphReflow {
1430 start_byte,
1431 end_byte,
1432 reflowed_text,
1433 })
1434}
1435
1436#[cfg(test)]
1437mod tests {
1438 use super::*;
1439
1440 #[test]
1441 fn test_list_item_trailing_whitespace_removal() {
1442 let input = "1. First line with trailing spaces \n Second line with trailing spaces \n Third line\n";
1445
1446 let options = ReflowOptions {
1447 line_length: 999999,
1448 break_on_sentences: true, preserve_breaks: false,
1450 sentence_per_line: false,
1451 };
1452
1453 let result = reflow_markdown(input, &options);
1454
1455 assert!(
1458 !result.contains(" "),
1459 "Result should not contain 3+ consecutive spaces: {result:?}"
1460 );
1461
1462 assert!(result.contains(" \n"), "Hard breaks should be preserved: {result:?}");
1464
1465 assert!(
1468 result.lines().count() >= 2,
1469 "Should have multiple lines (not reflowed due to hard breaks), got: {}",
1470 result.lines().count()
1471 );
1472 }
1473
1474 #[test]
1475 fn test_reflow_simple_text() {
1476 let options = ReflowOptions {
1477 line_length: 20,
1478 ..Default::default()
1479 };
1480
1481 let input = "This is a very long line that needs to be wrapped";
1482 let result = reflow_line(input, &options);
1483
1484 assert_eq!(result.len(), 3);
1485 assert!(result[0].chars().count() <= 20);
1486 assert!(result[1].chars().count() <= 20);
1487 assert!(result[2].chars().count() <= 20);
1488 }
1489
1490 #[test]
1491 fn test_preserve_inline_code() {
1492 let options = ReflowOptions {
1493 line_length: 30,
1494 ..Default::default()
1495 };
1496
1497 let result = reflow_line("This line has `inline code` that should be preserved", &options);
1498 let joined = result.join(" ");
1500 assert!(joined.contains("`inline code`"));
1501 }
1502
1503 #[test]
1504 fn test_preserve_links() {
1505 let options = ReflowOptions {
1506 line_length: 40,
1507 ..Default::default()
1508 };
1509
1510 let text = "Check out [this link](https://example.com/very/long/url) for more info";
1511 let result = reflow_line(text, &options);
1512
1513 let joined = result.join(" ");
1515 assert!(joined.contains("[this link](https://example.com/very/long/url)"));
1516 }
1517
1518 #[test]
1519 fn test_reference_link_patterns_fixed() {
1520 let options = ReflowOptions {
1521 line_length: 30,
1522 break_on_sentences: true,
1523 preserve_breaks: false,
1524 sentence_per_line: false,
1525 };
1526
1527 let test_cases = vec![
1529 ("Check out [text][ref] for details", vec!["[text][ref]"]),
1531 ("See [text][] for info", vec!["[text][]"]),
1533 ("Visit [homepage] today", vec!["[homepage]"]),
1535 (
1537 "Links: [first][ref1] and [second][ref2] here",
1538 vec!["[first][ref1]", "[second][ref2]"],
1539 ),
1540 (
1542 "See [inline](url) and [reference][ref] links",
1543 vec", "[reference][ref]"],
1544 ),
1545 ];
1546
1547 for (input, expected_patterns) in test_cases {
1548 println!("\nTesting: {input}");
1549 let result = reflow_line(input, &options);
1550 let joined = result.join(" ");
1551 println!("Result: {joined}");
1552
1553 for expected_pattern in expected_patterns {
1555 assert!(
1556 joined.contains(expected_pattern),
1557 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1558 );
1559 }
1560
1561 assert!(
1563 !joined.contains("[ ") || !joined.contains("] ["),
1564 "Detected broken reference link pattern with spaces inside brackets in '{joined}'"
1565 );
1566 }
1567 }
1568
1569 #[test]
1570 fn test_sentence_detection_basic() {
1571 assert!(is_sentence_boundary("Hello. World", 5));
1573 assert!(is_sentence_boundary("Test! Another", 4));
1574 assert!(is_sentence_boundary("Question? Answer", 8));
1575
1576 assert!(!is_sentence_boundary("Hello world", 5));
1578 assert!(!is_sentence_boundary("Test.com", 4));
1579 assert!(!is_sentence_boundary("3.14 pi", 1));
1580 }
1581
1582 #[test]
1583 fn test_sentence_detection_abbreviations() {
1584 assert!(!is_sentence_boundary("Mr. Smith", 2));
1586 assert!(!is_sentence_boundary("Dr. Jones", 2));
1587 assert!(!is_sentence_boundary("e.g. example", 3));
1588 assert!(!is_sentence_boundary("i.e. that is", 3));
1589 assert!(!is_sentence_boundary("etc. items", 3));
1590
1591 assert!(is_sentence_boundary("Mr. Smith arrived. Next sentence.", 17));
1593 }
1594
1595 #[test]
1596 fn test_split_into_sentences() {
1597 let text = "First sentence. Second sentence. Third one!";
1598 let sentences = split_into_sentences(text);
1599 assert_eq!(sentences.len(), 3);
1600 assert_eq!(sentences[0], "First sentence.");
1601 assert_eq!(sentences[1], "Second sentence.");
1602 assert_eq!(sentences[2], "Third one!");
1603
1604 let text2 = "Mr. Smith met Dr. Jones.";
1606 let sentences2 = split_into_sentences(text2);
1607 assert_eq!(sentences2.len(), 1);
1608 assert_eq!(sentences2[0], "Mr. Smith met Dr. Jones.");
1609
1610 let text3 = "This is a single sentence.";
1612 let sentences3 = split_into_sentences(text3);
1613 assert_eq!(sentences3.len(), 1);
1614 assert_eq!(sentences3[0], "This is a single sentence.");
1615 }
1616
1617 #[test]
1618 fn test_sentence_per_line_reflow() {
1619 let options = ReflowOptions {
1620 line_length: 80,
1621 break_on_sentences: true,
1622 preserve_breaks: false,
1623 sentence_per_line: true,
1624 };
1625
1626 let input = "First sentence. Second sentence. Third sentence.";
1628 let result = reflow_line(input, &options);
1629 assert_eq!(result.len(), 3);
1630 assert_eq!(result[0], "First sentence.");
1631 assert_eq!(result[1], "Second sentence.");
1632 assert_eq!(result[2], "Third sentence.");
1633
1634 let input2 = "This has **bold**. And [a link](url).";
1636 let result2 = reflow_line(input2, &options);
1637 assert_eq!(result2.len(), 2);
1638 assert_eq!(result2[0], "This has **bold**.");
1639 assert_eq!(result2[1], "And [a link](url).");
1640 }
1641
1642 #[test]
1643 fn test_sentence_per_line_with_backticks() {
1644 let options = ReflowOptions {
1645 line_length: 80,
1646 break_on_sentences: true,
1647 preserve_breaks: false,
1648 sentence_per_line: true,
1649 };
1650
1651 let input = "This sentence has `code` in it. And this has `more code` too.";
1652 let result = reflow_line(input, &options);
1653 assert_eq!(result.len(), 2);
1654 assert_eq!(result[0], "This sentence has `code` in it.");
1655 assert_eq!(result[1], "And this has `more code` too.");
1656 }
1657
1658 #[test]
1659 fn test_sentence_per_line_with_backticks_in_parens() {
1660 let options = ReflowOptions {
1661 line_length: 80,
1662 break_on_sentences: true,
1663 preserve_breaks: false,
1664 sentence_per_line: true,
1665 };
1666
1667 let input = "Configure in (`.rumdl.toml` or `pyproject.toml`). Next sentence.";
1668 let result = reflow_line(input, &options);
1669 assert_eq!(result.len(), 2);
1670 assert_eq!(result[0], "Configure in (`.rumdl.toml` or `pyproject.toml`).");
1671 assert_eq!(result[1], "Next sentence.");
1672 }
1673
1674 #[test]
1675 fn test_sentence_per_line_with_questions_exclamations() {
1676 let options = ReflowOptions {
1677 line_length: 80,
1678 break_on_sentences: true,
1679 preserve_breaks: false,
1680 sentence_per_line: true,
1681 };
1682
1683 let input = "Is this a question? Yes it is! And a statement.";
1684 let result = reflow_line(input, &options);
1685 assert_eq!(result.len(), 3);
1686 assert_eq!(result[0], "Is this a question?");
1687 assert_eq!(result[1], "Yes it is!");
1688 assert_eq!(result[2], "And a statement.");
1689 }
1690
1691 #[test]
1692 fn test_split_sentences_issue_124() {
1693 let text = "If you are sure that all data structures exposed in a `PyModule` are thread-safe, then pass `gil_used = false` as a parameter to the `pymodule` procedural macro declaring the module or call `PyModule::gil_used` on a `PyModule` instance. For example:";
1695
1696 let sentences = split_into_sentences(text);
1697
1698 assert_eq!(sentences.len(), 2, "Should detect 2 sentences in the text");
1702 }
1703
1704 #[test]
1705 fn test_reference_link_edge_cases() {
1706 let options = ReflowOptions {
1707 line_length: 40,
1708 break_on_sentences: true,
1709 preserve_breaks: false,
1710 sentence_per_line: false,
1711 };
1712
1713 let test_cases = vec![
1715 ("Text with \\[escaped\\] brackets", vec!["\\[escaped\\]"]),
1717 (
1719 "Link [text with [nested] content][ref]",
1720 vec!["[text with [nested] content][ref]"],
1721 ),
1722 (
1724 "First [ref][link] then [inline](url)",
1725 vec!["[ref][link]", "[inline](url)"],
1726 ),
1727 ("Array [0] and reference [link] here", vec!["[0]", "[link]"]),
1729 (
1731 "Complex [text with *emphasis*][] reference",
1732 vec!["[text with *emphasis*][]"],
1733 ),
1734 ];
1735
1736 for (input, expected_patterns) in test_cases {
1737 println!("\nTesting edge case: {input}");
1738 let result = reflow_line(input, &options);
1739 let joined = result.join(" ");
1740 println!("Result: {joined}");
1741
1742 for expected_pattern in expected_patterns {
1744 assert!(
1745 joined.contains(expected_pattern),
1746 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1747 );
1748 }
1749 }
1750 }
1751
1752 #[test]
1753 fn test_reflow_with_emphasis() {
1754 let options = ReflowOptions {
1755 line_length: 25,
1756 ..Default::default()
1757 };
1758
1759 let result = reflow_line("This is *emphasized* and **strong** text that needs wrapping", &options);
1760
1761 let joined = result.join(" ");
1763 assert!(joined.contains("*emphasized*"));
1764 assert!(joined.contains("**strong**"));
1765 }
1766
1767 #[test]
1768 fn test_image_patterns_preserved() {
1769 let options = ReflowOptions {
1770 line_length: 30,
1771 ..Default::default()
1772 };
1773
1774 let test_cases = vec for details",
1779 vec"],
1780 ),
1781 ("See ![image][ref] for info", vec!["![image][ref]"]),
1783 ("Visit ![homepage][] today", vec!["![homepage][]"]),
1785 (
1787 "Images:  and ![second][ref2]",
1788 vec", "![second][ref2]"],
1789 ),
1790 ];
1791
1792 for (input, expected_patterns) in test_cases {
1793 println!("\nTesting: {input}");
1794 let result = reflow_line(input, &options);
1795 let joined = result.join(" ");
1796 println!("Result: {joined}");
1797
1798 for expected_pattern in expected_patterns {
1799 assert!(
1800 joined.contains(expected_pattern),
1801 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1802 );
1803 }
1804 }
1805 }
1806
1807 #[test]
1808 fn test_extended_markdown_patterns() {
1809 let options = ReflowOptions {
1810 line_length: 40,
1811 ..Default::default()
1812 };
1813
1814 let test_cases = vec![
1815 ("Text with ~~strikethrough~~ preserved", vec!["~~strikethrough~~"]),
1817 (
1819 "Check [[wiki link]] and [[page|display]]",
1820 vec!["[[wiki link]]", "[[page|display]]"],
1821 ),
1822 (
1824 "Inline $x^2 + y^2$ and display $$\\int f(x) dx$$",
1825 vec!["$x^2 + y^2$", "$$\\int f(x) dx$$"],
1826 ),
1827 ("Use :smile: and :heart: emojis", vec![":smile:", ":heart:"]),
1829 (
1831 "Text with <span>tag</span> and <br/>",
1832 vec!["<span>", "</span>", "<br/>"],
1833 ),
1834 ("Non-breaking space and em—dash", vec![" ", "—"]),
1836 ];
1837
1838 for (input, expected_patterns) in test_cases {
1839 let result = reflow_line(input, &options);
1840 let joined = result.join(" ");
1841
1842 for pattern in expected_patterns {
1843 assert!(
1844 joined.contains(pattern),
1845 "Expected '{pattern}' to be preserved in '{input}', but got '{joined}'"
1846 );
1847 }
1848 }
1849 }
1850
1851 #[test]
1852 fn test_complex_mixed_patterns() {
1853 let options = ReflowOptions {
1854 line_length: 50,
1855 ..Default::default()
1856 };
1857
1858 let input = "Line with **bold**, `code`, [link](url), , ~~strike~~, $math$, :emoji:, and <tag> all together";
1860 let result = reflow_line(input, &options);
1861 let joined = result.join(" ");
1862
1863 assert!(joined.contains("**bold**"));
1865 assert!(joined.contains("`code`"));
1866 assert!(joined.contains("[link](url)"));
1867 assert!(joined.contains(""));
1868 assert!(joined.contains("~~strike~~"));
1869 assert!(joined.contains("$math$"));
1870 assert!(joined.contains(":emoji:"));
1871 assert!(joined.contains("<tag>"));
1872 }
1873
1874 #[test]
1875 fn test_footnote_patterns_preserved() {
1876 let options = ReflowOptions {
1877 line_length: 40,
1878 ..Default::default()
1879 };
1880
1881 let test_cases = vec![
1882 ("This has a footnote[^1] reference", vec!["[^1]"]),
1884 ("Text with [^first] and [^second] notes", vec!["[^first]", "[^second]"]),
1886 ("Reference to [^long-footnote-name] here", vec!["[^long-footnote-name]"]),
1888 ];
1889
1890 for (input, expected_patterns) in test_cases {
1891 let result = reflow_line(input, &options);
1892 let joined = result.join(" ");
1893
1894 for expected_pattern in expected_patterns {
1895 assert!(
1896 joined.contains(expected_pattern),
1897 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1898 );
1899 }
1900 }
1901 }
1902
1903 #[test]
1904 fn test_reflow_markdown_numbered_lists() {
1905 let options = ReflowOptions {
1907 line_length: 50,
1908 ..Default::default()
1909 };
1910
1911 let content = r#"1. List `manifest` to find the manifest with the largest ID. Say it's `00000000000000000002.manifest` in this example.
19122. Short item
19133. Another long item that definitely exceeds the fifty character limit and needs wrapping"#;
1914
1915 let result = reflow_markdown(content, &options);
1916
1917 let expected = r#"1. List `manifest` to find the manifest with the
1919 largest ID. Say it's
1920 `00000000000000000002.manifest` in this
1921 example.
19222. Short item
19233. Another long item that definitely exceeds the
1924 fifty character limit and needs wrapping"#;
1925
1926 assert_eq!(
1927 result, expected,
1928 "Numbered lists should be reflowed with proper markers and indentation.\nExpected:\n{expected}\nGot:\n{result}"
1929 );
1930 }
1931
1932 #[test]
1933 fn test_reflow_markdown_bullet_lists() {
1934 let options = ReflowOptions {
1935 line_length: 40,
1936 ..Default::default()
1937 };
1938
1939 let content = r#"- First bullet point with a very long line that needs wrapping
1940* Second bullet using asterisk
1941+ Third bullet using plus sign
1942- Short one"#;
1943
1944 let result = reflow_markdown(content, &options);
1945
1946 let expected = r#"- First bullet point with a very long
1948 line that needs wrapping
1949* Second bullet using asterisk
1950+ Third bullet using plus sign
1951- Short one"#;
1952
1953 assert_eq!(
1954 result, expected,
1955 "Bullet lists should preserve markers and indent continuations with 2 spaces.\nExpected:\n{expected}\nGot:\n{result}"
1956 );
1957 }
1958
1959 #[test]
1960 fn test_ie_abbreviation_split_debug() {
1961 let input = "This results in extracting directly from the input object, i.e. `obj.extract()`, rather than trying to access an item or attribute.";
1962
1963 let options = ReflowOptions {
1964 line_length: 80,
1965 break_on_sentences: true,
1966 preserve_breaks: false,
1967 sentence_per_line: true,
1968 };
1969
1970 let result = reflow_line(input, &options);
1971
1972 assert_eq!(result.len(), 1, "Should not split after i.e. abbreviation");
1974 }
1975
1976 #[test]
1977 fn test_ie_abbreviation_paragraph() {
1978 let input = "The `pyo3(transparent)` attribute can be used on structs with exactly one field.\nThis results in extracting directly from the input object, i.e. `obj.extract()`, rather than trying to access an item or attribute.\nThis behaviour is enabled per default for newtype structs and tuple-variants with a single field.";
1980
1981 let options = ReflowOptions {
1982 line_length: 80,
1983 break_on_sentences: true,
1984 preserve_breaks: false,
1985 sentence_per_line: true,
1986 };
1987
1988 let result = reflow_markdown(input, &options);
1989
1990 let line_count = result.lines().count();
1992 assert_eq!(line_count, 3, "Should have 3 sentences, not {line_count}");
1993
1994 let lines: Vec<&str> = result.lines().collect();
1996 assert!(lines.len() >= 2, "Should have at least 2 lines");
1997 assert!(lines[1].contains("i.e."), "Second line should contain 'i.e.'");
1998 assert!(
1999 lines[1].contains("`obj.extract()`"),
2000 "Second line should contain the code span"
2001 );
2002 assert!(
2003 lines[1].contains("attribute."),
2004 "Second line should end with 'attribute.'"
2005 );
2006 }
2007
2008 #[test]
2009 fn test_definition_list_preservation() {
2010 let options = ReflowOptions {
2011 line_length: 80,
2012 break_on_sentences: true,
2013 preserve_breaks: false,
2014 sentence_per_line: true,
2015 };
2016
2017 let content = "Term\n: Definition text here.";
2018 let result = reflow_markdown(content, &options);
2019
2020 assert_eq!(result, "Term\n: Definition text here.");
2022 }
2023
2024 #[test]
2025 fn test_definition_list_multiline() {
2026 let options = ReflowOptions {
2027 line_length: 80,
2028 break_on_sentences: true,
2029 preserve_breaks: false,
2030 sentence_per_line: true,
2031 };
2032
2033 let content = "Term\n: First sentence of definition. Second sentence.";
2034 let result = reflow_markdown(content, &options);
2035
2036 assert!(result.starts_with("Term\n"));
2038 assert!(result.contains("\n: First sentence of definition. Second sentence."));
2041 }
2042
2043 #[test]
2044 fn test_definition_list_multiple() {
2045 let options = ReflowOptions {
2046 line_length: 80,
2047 sentence_per_line: true,
2048 ..Default::default()
2049 };
2050
2051 let content = "Term 1\n: Definition 1\n: Another definition for term 1\n\nTerm 2\n: Definition 2";
2052 let result = reflow_markdown(content, &options);
2053
2054 assert!(result.lines().filter(|l| l.trim_start().starts_with(": ")).count() >= 3);
2056 }
2057
2058 #[test]
2059 fn test_definition_list_with_paragraphs() {
2060 let options = ReflowOptions {
2061 line_length: 0, break_on_sentences: true,
2063 preserve_breaks: false,
2064 sentence_per_line: true,
2065 };
2066
2067 let content = "Regular paragraph. With multiple sentences.\n\nTerm\n: Definition.\n\nAnother paragraph.";
2068 let result = reflow_markdown(content, &options);
2069
2070 assert!(result.contains("Regular paragraph."));
2072 assert!(result.contains("\nWith multiple sentences."));
2073 assert!(result.contains("Term\n: Definition."));
2075 assert!(result.contains("Another paragraph."));
2077 }
2078
2079 #[test]
2080 fn test_definition_list_edge_cases() {
2081 let options = ReflowOptions::default();
2082
2083 let content1 = "Term\n : Indented definition";
2085 let result1 = reflow_markdown(content1, &options);
2086 assert!(result1.contains("\n : Indented definition"));
2087
2088 let content2 = "Term\n: Definition";
2090 let result2 = reflow_markdown(content2, &options);
2091 assert!(result2.contains("\n: Definition"));
2092
2093 let content3 = "Term\n:\tDefinition";
2095 let result3 = reflow_markdown(content3, &options);
2096 assert!(result3.contains("\n:\tDefinition"));
2097 }
2098}