1use crate::utils::regex_cache::{
7 DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
8 INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
9 SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
10};
11#[derive(Clone)]
13pub struct ReflowOptions {
14 pub line_length: usize,
16 pub break_on_sentences: bool,
18 pub preserve_breaks: bool,
20 pub sentence_per_line: bool,
22}
23
24impl Default for ReflowOptions {
25 fn default() -> Self {
26 Self {
27 line_length: 80,
28 break_on_sentences: true,
29 preserve_breaks: false,
30 sentence_per_line: false,
31 }
32 }
33}
34
35fn is_sentence_boundary(text: &str, pos: usize) -> bool {
38 let chars: Vec<char> = text.chars().collect();
39
40 if pos + 2 >= chars.len() {
41 return false;
42 }
43
44 let c = chars[pos];
46 if c != '.' && c != '!' && c != '?' {
47 return false;
48 }
49
50 if chars[pos + 1] != ' ' {
52 return false;
53 }
54
55 if !chars[pos + 2].is_uppercase() {
57 return false;
58 }
59
60 if pos > 0 {
62 let prev_word = &text[..pos];
64 let ignored_words = [
65 "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr", "Jr",
66 ];
67 for word in &ignored_words {
68 if prev_word.to_lowercase().ends_with(&word.to_lowercase()) {
69 return false;
70 }
71 }
72
73 if pos > 0 && chars[pos - 1].is_numeric() && pos + 2 < chars.len() && chars[pos + 2].is_numeric() {
75 return false;
76 }
77 }
78
79 true
80}
81
82pub fn split_into_sentences(text: &str) -> Vec<String> {
84 let mut sentences = Vec::new();
85 let mut current_sentence = String::new();
86 let mut chars = text.chars().peekable();
87 let mut pos = 0;
88
89 while let Some(c) = chars.next() {
90 current_sentence.push(c);
91
92 if is_sentence_boundary(text, pos) {
93 if chars.peek() == Some(&' ') {
95 chars.next();
96 pos += 1;
97 }
98
99 sentences.push(current_sentence.trim().to_string());
100 current_sentence.clear();
101 }
102
103 pos += 1;
104 }
105
106 if !current_sentence.trim().is_empty() {
108 sentences.push(current_sentence.trim().to_string());
109 }
110
111 sentences
112}
113
114fn is_horizontal_rule(line: &str) -> bool {
116 if line.len() < 3 {
117 return false;
118 }
119
120 let chars: Vec<char> = line.chars().collect();
122 if chars.is_empty() {
123 return false;
124 }
125
126 let first_char = chars[0];
127 if first_char != '-' && first_char != '_' && first_char != '*' {
128 return false;
129 }
130
131 for c in &chars {
133 if *c != first_char && *c != ' ' {
134 return false;
135 }
136 }
137
138 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
140 non_space_count >= 3
141}
142
143fn is_numbered_list_item(line: &str) -> bool {
145 let mut chars = line.chars();
146
147 if !chars.next().is_some_and(|c| c.is_numeric()) {
149 return false;
150 }
151
152 while let Some(c) = chars.next() {
154 if c == '.' {
155 return chars.next().is_none_or(|c| c == ' ');
157 }
158 if !c.is_numeric() {
159 return false;
160 }
161 }
162
163 false
164}
165
166fn trim_preserving_hard_break(s: &str) -> String {
170 let s = s.strip_suffix('\r').unwrap_or(s);
172
173 if s.ends_with(" ") {
175 let content_end = s.trim_end().len();
177 if content_end == 0 {
178 return String::new();
180 }
181 format!("{} ", &s[..content_end])
183 } else {
184 s.trim_end().to_string()
186 }
187}
188
189pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
190 if options.sentence_per_line {
192 let elements = parse_markdown_elements(line);
193 return reflow_elements_sentence_per_line(&elements);
194 }
195
196 if line.chars().count() <= options.line_length {
198 return vec![line.to_string()];
199 }
200
201 let elements = parse_markdown_elements(line);
203
204 reflow_elements(&elements, options)
206}
207
208#[derive(Debug, Clone)]
210enum Element {
211 Text(String),
213 Link { text: String, url: String },
215 ReferenceLink { text: String, reference: String },
217 EmptyReferenceLink { text: String },
219 ShortcutReference { reference: String },
221 InlineImage { alt: String, url: String },
223 ReferenceImage { alt: String, reference: String },
225 EmptyReferenceImage { alt: String },
227 FootnoteReference { note: String },
229 Strikethrough(String),
231 WikiLink(String),
233 InlineMath(String),
235 DisplayMath(String),
237 EmojiShortcode(String),
239 HtmlTag(String),
241 HtmlEntity(String),
243 Code(String),
245 Bold(String),
247 Italic(String),
249}
250
251impl std::fmt::Display for Element {
252 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
253 match self {
254 Element::Text(s) => write!(f, "{s}"),
255 Element::Link { text, url } => write!(f, "[{text}]({url})"),
256 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
257 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
258 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
259 Element::InlineImage { alt, url } => write!(f, ""),
260 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
261 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
262 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
263 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
264 Element::WikiLink(s) => write!(f, "[[{s}]]"),
265 Element::InlineMath(s) => write!(f, "${s}$"),
266 Element::DisplayMath(s) => write!(f, "$${s}$$"),
267 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
268 Element::HtmlTag(s) => write!(f, "{s}"),
269 Element::HtmlEntity(s) => write!(f, "{s}"),
270 Element::Code(s) => write!(f, "`{s}`"),
271 Element::Bold(s) => write!(f, "**{s}**"),
272 Element::Italic(s) => write!(f, "*{s}*"),
273 }
274 }
275}
276
277impl Element {
278 fn len(&self) -> usize {
279 match self {
280 Element::Text(s) => s.chars().count(),
281 Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, Element::EmptyReferenceLink { text } => text.chars().count() + 4, Element::ShortcutReference { reference } => reference.chars().count() + 2, Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, Element::FootnoteReference { note } => note.chars().count() + 3, Element::Strikethrough(s) => s.chars().count() + 4, Element::WikiLink(s) => s.chars().count() + 4, Element::InlineMath(s) => s.chars().count() + 2, Element::DisplayMath(s) => s.chars().count() + 4, Element::EmojiShortcode(s) => s.chars().count() + 2, Element::HtmlTag(s) => s.chars().count(), Element::HtmlEntity(s) => s.chars().count(), Element::Code(s) => s.chars().count() + 2, Element::Bold(s) => s.chars().count() + 4, Element::Italic(s) => s.chars().count() + 2, }
300 }
301}
302
303fn parse_markdown_elements(text: &str) -> Vec<Element> {
312 let mut elements = Vec::new();
313 let mut remaining = text;
314
315 while !remaining.is_empty() {
316 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
318
319 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
322 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
323 {
324 earliest_match = Some((m.start(), "inline_image", m));
325 }
326
327 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
329 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
330 {
331 earliest_match = Some((m.start(), "ref_image", m));
332 }
333
334 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
336 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
337 {
338 earliest_match = Some((m.start(), "footnote_ref", m));
339 }
340
341 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
343 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
344 {
345 earliest_match = Some((m.start(), "inline_link", m));
346 }
347
348 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
350 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
351 {
352 earliest_match = Some((m.start(), "ref_link", m));
353 }
354
355 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
358 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
359 {
360 earliest_match = Some((m.start(), "shortcut_ref", m));
361 }
362
363 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
365 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
366 {
367 earliest_match = Some((m.start(), "wiki_link", m));
368 }
369
370 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
372 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
373 {
374 earliest_match = Some((m.start(), "display_math", m));
375 }
376
377 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
379 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
380 {
381 earliest_match = Some((m.start(), "inline_math", m));
382 }
383
384 if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
386 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
387 {
388 earliest_match = Some((m.start(), "strikethrough", m));
389 }
390
391 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
393 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
394 {
395 earliest_match = Some((m.start(), "emoji", m));
396 }
397
398 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
400 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
401 {
402 earliest_match = Some((m.start(), "html_entity", m));
403 }
404
405 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
407 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
408 {
409 earliest_match = Some((m.start(), "html_tag", m));
410 }
411
412 let mut next_special = remaining.len();
414 let mut special_type = "";
415
416 if let Some(pos) = remaining.find('`')
417 && pos < next_special
418 {
419 next_special = pos;
420 special_type = "code";
421 }
422 if let Some(pos) = remaining.find("**")
423 && pos < next_special
424 {
425 next_special = pos;
426 special_type = "bold";
427 }
428 if let Some(pos) = remaining.find('*')
429 && pos < next_special
430 && !remaining[pos..].starts_with("**")
431 {
432 next_special = pos;
433 special_type = "italic";
434 }
435
436 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
438 pos < next_special
439 } else {
440 false
441 };
442
443 if should_process_markdown_link {
444 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
445
446 if pos > 0 {
448 elements.push(Element::Text(remaining[..pos].to_string()));
449 }
450
451 match pattern_type {
453 "inline_image" => {
454 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
455 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
456 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
457 elements.push(Element::InlineImage {
458 alt: alt.to_string(),
459 url: url.to_string(),
460 });
461 remaining = &remaining[match_obj.end()..];
462 } else {
463 elements.push(Element::Text("!".to_string()));
464 remaining = &remaining[1..];
465 }
466 }
467 "ref_image" => {
468 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
469 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
470 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
471
472 if reference.is_empty() {
473 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
474 } else {
475 elements.push(Element::ReferenceImage {
476 alt: alt.to_string(),
477 reference: reference.to_string(),
478 });
479 }
480 remaining = &remaining[match_obj.end()..];
481 } else {
482 elements.push(Element::Text("!".to_string()));
483 remaining = &remaining[1..];
484 }
485 }
486 "footnote_ref" => {
487 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
488 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
489 elements.push(Element::FootnoteReference { note: note.to_string() });
490 remaining = &remaining[match_obj.end()..];
491 } else {
492 elements.push(Element::Text("[".to_string()));
493 remaining = &remaining[1..];
494 }
495 }
496 "inline_link" => {
497 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
498 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
499 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
500 elements.push(Element::Link {
501 text: text.to_string(),
502 url: url.to_string(),
503 });
504 remaining = &remaining[match_obj.end()..];
505 } else {
506 elements.push(Element::Text("[".to_string()));
508 remaining = &remaining[1..];
509 }
510 }
511 "ref_link" => {
512 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
513 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
514 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
515
516 if reference.is_empty() {
517 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
519 } else {
520 elements.push(Element::ReferenceLink {
522 text: text.to_string(),
523 reference: reference.to_string(),
524 });
525 }
526 remaining = &remaining[match_obj.end()..];
527 } else {
528 elements.push(Element::Text("[".to_string()));
530 remaining = &remaining[1..];
531 }
532 }
533 "shortcut_ref" => {
534 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
535 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
536 elements.push(Element::ShortcutReference {
537 reference: reference.to_string(),
538 });
539 remaining = &remaining[match_obj.end()..];
540 } else {
541 elements.push(Element::Text("[".to_string()));
543 remaining = &remaining[1..];
544 }
545 }
546 "wiki_link" => {
547 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
548 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
549 elements.push(Element::WikiLink(content.to_string()));
550 remaining = &remaining[match_obj.end()..];
551 } else {
552 elements.push(Element::Text("[[".to_string()));
553 remaining = &remaining[2..];
554 }
555 }
556 "display_math" => {
557 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
558 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
559 elements.push(Element::DisplayMath(math.to_string()));
560 remaining = &remaining[match_obj.end()..];
561 } else {
562 elements.push(Element::Text("$$".to_string()));
563 remaining = &remaining[2..];
564 }
565 }
566 "inline_math" => {
567 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
568 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
569 elements.push(Element::InlineMath(math.to_string()));
570 remaining = &remaining[match_obj.end()..];
571 } else {
572 elements.push(Element::Text("$".to_string()));
573 remaining = &remaining[1..];
574 }
575 }
576 "strikethrough" => {
577 if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
578 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
579 elements.push(Element::Strikethrough(text.to_string()));
580 remaining = &remaining[match_obj.end()..];
581 } else {
582 elements.push(Element::Text("~~".to_string()));
583 remaining = &remaining[2..];
584 }
585 }
586 "emoji" => {
587 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
588 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
589 elements.push(Element::EmojiShortcode(emoji.to_string()));
590 remaining = &remaining[match_obj.end()..];
591 } else {
592 elements.push(Element::Text(":".to_string()));
593 remaining = &remaining[1..];
594 }
595 }
596 "html_entity" => {
597 elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
599 remaining = &remaining[match_obj.end()..];
600 }
601 "html_tag" => {
602 elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
604 remaining = &remaining[match_obj.end()..];
605 }
606 _ => {
607 elements.push(Element::Text("[".to_string()));
609 remaining = &remaining[1..];
610 }
611 }
612 } else {
613 if next_special > 0 && next_special < remaining.len() {
617 elements.push(Element::Text(remaining[..next_special].to_string()));
618 remaining = &remaining[next_special..];
619 }
620
621 match special_type {
623 "code" => {
624 if let Some(code_end) = remaining[1..].find('`') {
626 let code = &remaining[1..1 + code_end];
627 elements.push(Element::Code(code.to_string()));
628 remaining = &remaining[1 + code_end + 1..];
629 } else {
630 elements.push(Element::Text(remaining.to_string()));
632 break;
633 }
634 }
635 "bold" => {
636 if let Some(bold_end) = remaining[2..].find("**") {
638 let bold_text = &remaining[2..2 + bold_end];
639 elements.push(Element::Bold(bold_text.to_string()));
640 remaining = &remaining[2 + bold_end + 2..];
641 } else {
642 elements.push(Element::Text("**".to_string()));
644 remaining = &remaining[2..];
645 }
646 }
647 "italic" => {
648 if let Some(italic_end) = remaining[1..].find('*') {
650 let italic_text = &remaining[1..1 + italic_end];
651 elements.push(Element::Italic(italic_text.to_string()));
652 remaining = &remaining[1 + italic_end + 1..];
653 } else {
654 elements.push(Element::Text("*".to_string()));
656 remaining = &remaining[1..];
657 }
658 }
659 _ => {
660 elements.push(Element::Text(remaining.to_string()));
662 break;
663 }
664 }
665 }
666 }
667
668 elements
669}
670
671fn reflow_elements_sentence_per_line(elements: &[Element]) -> Vec<String> {
673 let mut lines = Vec::new();
674 let mut current_line = String::new();
675
676 for element in elements {
677 let element_str = format!("{element}");
678
679 if let Element::Text(text) = element {
681 let combined = format!("{current_line}{text}");
683 let sentences = split_into_sentences(&combined);
684
685 if sentences.len() > 1 {
686 for (i, sentence) in sentences.iter().enumerate() {
688 if i == 0 {
689 lines.push(sentence.to_string());
691 } else if i == sentences.len() - 1 {
692 current_line = sentence.to_string();
694 } else {
695 lines.push(sentence.to_string());
697 }
698 }
699 } else {
700 current_line = combined;
702 }
703 } else {
704 if !current_line.is_empty()
707 && !current_line.ends_with(' ')
708 && !current_line.ends_with('(')
709 && !current_line.ends_with('[')
710 {
711 current_line.push(' ');
712 }
713 current_line.push_str(&element_str);
714 }
715 }
716
717 if !current_line.is_empty() {
719 lines.push(current_line.trim().to_string());
720 }
721
722 lines
723}
724
725fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
727 let mut lines = Vec::new();
728 let mut current_line = String::new();
729 let mut current_length = 0;
730
731 for element in elements {
732 let element_str = format!("{element}");
733 let element_len = element.len();
734
735 if let Element::Text(text) = element {
737 let words: Vec<&str> = text.split_whitespace().collect();
739
740 for word in words {
741 let word_len = word.chars().count();
742 if current_length > 0 && current_length + 1 + word_len > options.line_length {
743 lines.push(current_line.trim().to_string());
745 current_line = word.to_string();
746 current_length = word_len;
747 } else {
748 if current_length > 0 {
750 current_line.push(' ');
751 current_length += 1;
752 }
753 current_line.push_str(word);
754 current_length += word_len;
755 }
756 }
757 } else {
758 if current_length > 0 && current_length + 1 + element_len > options.line_length {
761 lines.push(current_line.trim().to_string());
763 current_line = element_str;
764 current_length = element_len;
765 } else {
766 if current_length > 0 {
768 current_line.push(' ');
769 current_length += 1;
770 }
771 current_line.push_str(&element_str);
772 current_length += element_len;
773 }
774 }
775 }
776
777 if !current_line.is_empty() {
779 lines.push(current_line.trim_end().to_string());
780 }
781
782 lines
783}
784
785pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
787 let lines: Vec<&str> = content.lines().collect();
788 let mut result = Vec::new();
789 let mut i = 0;
790
791 while i < lines.len() {
792 let line = lines[i];
793 let trimmed = line.trim();
794
795 if trimmed.is_empty() {
797 result.push(String::new());
798 i += 1;
799 continue;
800 }
801
802 if trimmed.starts_with('#') {
804 result.push(line.to_string());
805 i += 1;
806 continue;
807 }
808
809 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
811 result.push(line.to_string());
812 i += 1;
813 while i < lines.len() {
815 result.push(lines[i].to_string());
816 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
817 i += 1;
818 break;
819 }
820 i += 1;
821 }
822 continue;
823 }
824
825 if line.starts_with(" ") || line.starts_with("\t") {
827 result.push(line.to_string());
829 i += 1;
830 while i < lines.len() {
831 let next_line = lines[i];
832 if next_line.starts_with(" ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
834 result.push(next_line.to_string());
835 i += 1;
836 } else {
837 break;
838 }
839 }
840 continue;
841 }
842
843 if trimmed.starts_with('>') {
845 let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
846 let quote_content = &line[quote_prefix.len()..].trim_start();
847
848 let reflowed = reflow_line(quote_content, options);
849 for reflowed_line in reflowed.iter() {
850 result.push(format!("{quote_prefix} {reflowed_line}"));
851 }
852 i += 1;
853 continue;
854 }
855
856 if is_horizontal_rule(trimmed) {
858 result.push(line.to_string());
859 i += 1;
860 continue;
861 }
862
863 if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
865 || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
866 || trimmed.starts_with('+')
867 || is_numbered_list_item(trimmed)
868 {
869 let indent = line.len() - line.trim_start().len();
871 let indent_str = " ".repeat(indent);
872
873 let mut marker_end = indent;
876 let mut content_start = indent;
877
878 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
879 if let Some(period_pos) = line[indent..].find('.') {
881 marker_end = indent + period_pos + 1; content_start = marker_end;
883 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
885 content_start += 1;
886 }
887 }
888 } else {
889 marker_end = indent + 1; content_start = marker_end;
892 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
894 content_start += 1;
895 }
896 }
897
898 let marker = &line[indent..marker_end];
899
900 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
903 i += 1;
904
905 while i < lines.len() {
907 let next_line = lines[i];
908 let next_trimmed = next_line.trim();
909
910 if next_trimmed.is_empty()
912 || next_trimmed.starts_with('#')
913 || next_trimmed.starts_with("```")
914 || next_trimmed.starts_with("~~~")
915 || next_trimmed.starts_with('>')
916 || next_trimmed.starts_with('|')
917 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
918 || is_horizontal_rule(next_trimmed)
919 || (next_trimmed.starts_with('-')
920 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
921 || (next_trimmed.starts_with('*')
922 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
923 || (next_trimmed.starts_with('+')
924 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
925 || is_numbered_list_item(next_trimmed)
926 {
927 break;
928 }
929
930 let next_indent = next_line.len() - next_line.trim_start().len();
932 if next_indent >= content_start {
933 let trimmed_start = next_line.trim_start();
936 list_content.push(trim_preserving_hard_break(trimmed_start));
937 i += 1;
938 } else {
939 break;
941 }
942 }
943
944 let combined_content = if options.preserve_breaks {
947 list_content[0].clone()
948 } else {
949 let has_hard_breaks = list_content.iter().any(|line| line.ends_with(" "));
951 if has_hard_breaks {
952 list_content.join("\n")
954 } else {
955 list_content.join(" ")
957 }
958 };
959
960 let trimmed_marker = marker;
962 let continuation_spaces = content_start;
963
964 let prefix_length = indent + trimmed_marker.len() + 1;
966
967 let adjusted_options = ReflowOptions {
969 line_length: options.line_length.saturating_sub(prefix_length),
970 ..options.clone()
971 };
972
973 let reflowed = reflow_line(&combined_content, &adjusted_options);
974 for (j, reflowed_line) in reflowed.iter().enumerate() {
975 if j == 0 {
976 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
977 } else {
978 let continuation_indent = " ".repeat(continuation_spaces);
980 result.push(format!("{continuation_indent}{reflowed_line}"));
981 }
982 }
983 continue;
984 }
985
986 if trimmed.contains('|') {
988 result.push(line.to_string());
989 i += 1;
990 continue;
991 }
992
993 if trimmed.starts_with('[') && line.contains("]:") {
995 result.push(line.to_string());
996 i += 1;
997 continue;
998 }
999
1000 let mut is_single_line_paragraph = true;
1002 if i + 1 < lines.len() {
1003 let next_line = lines[i + 1];
1004 let next_trimmed = next_line.trim();
1005 if !next_trimmed.is_empty()
1007 && !next_trimmed.starts_with('#')
1008 && !next_trimmed.starts_with("```")
1009 && !next_trimmed.starts_with("~~~")
1010 && !next_trimmed.starts_with('>')
1011 && !next_trimmed.starts_with('|')
1012 && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1013 && !is_horizontal_rule(next_trimmed)
1014 && !(next_trimmed.starts_with('-')
1015 && !is_horizontal_rule(next_trimmed)
1016 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1017 && !(next_trimmed.starts_with('*')
1018 && !is_horizontal_rule(next_trimmed)
1019 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1020 && !(next_trimmed.starts_with('+')
1021 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1022 && !is_numbered_list_item(next_trimmed)
1023 {
1024 is_single_line_paragraph = false;
1025 }
1026 }
1027
1028 if is_single_line_paragraph && line.chars().count() <= options.line_length {
1030 result.push(line.to_string());
1031 i += 1;
1032 continue;
1033 }
1034
1035 let mut paragraph_parts = Vec::new();
1037 let mut current_part = vec![line];
1038 i += 1;
1039
1040 if options.preserve_breaks {
1042 let has_hard_break = line.ends_with(" ");
1044 let reflowed = reflow_line(line, options);
1045
1046 if has_hard_break && !reflowed.is_empty() {
1048 let mut reflowed_with_break = reflowed;
1049 let last_idx = reflowed_with_break.len() - 1;
1050 if !reflowed_with_break[last_idx].ends_with(" ") {
1051 reflowed_with_break[last_idx].push_str(" ");
1052 }
1053 result.extend(reflowed_with_break);
1054 } else {
1055 result.extend(reflowed);
1056 }
1057 } else {
1058 while i < lines.len() {
1060 let prev_line = if !current_part.is_empty() {
1061 current_part.last().unwrap()
1062 } else {
1063 ""
1064 };
1065 let next_line = lines[i];
1066 let next_trimmed = next_line.trim();
1067
1068 if next_trimmed.is_empty()
1070 || next_trimmed.starts_with('#')
1071 || next_trimmed.starts_with("```")
1072 || next_trimmed.starts_with("~~~")
1073 || next_trimmed.starts_with('>')
1074 || next_trimmed.starts_with('|')
1075 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1076 || is_horizontal_rule(next_trimmed)
1077 || (next_trimmed.starts_with('-')
1078 && !is_horizontal_rule(next_trimmed)
1079 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1080 || (next_trimmed.starts_with('*')
1081 && !is_horizontal_rule(next_trimmed)
1082 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1083 || (next_trimmed.starts_with('+')
1084 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1085 || is_numbered_list_item(next_trimmed)
1086 {
1087 break;
1088 }
1089
1090 if prev_line.ends_with(" ") {
1092 paragraph_parts.push(current_part.join(" "));
1094 current_part = vec![next_line];
1095 } else {
1096 current_part.push(next_line);
1097 }
1098 i += 1;
1099 }
1100
1101 if !current_part.is_empty() {
1103 if current_part.len() == 1 {
1104 paragraph_parts.push(current_part[0].to_string());
1106 } else {
1107 paragraph_parts.push(current_part.join(" "));
1108 }
1109 }
1110
1111 for (j, part) in paragraph_parts.iter().enumerate() {
1113 let reflowed = reflow_line(part, options);
1114 result.extend(reflowed);
1115
1116 if j < paragraph_parts.len() - 1 && !result.is_empty() {
1118 let last_idx = result.len() - 1;
1119 if !result[last_idx].ends_with(" ") {
1120 result[last_idx].push_str(" ");
1121 }
1122 }
1123 }
1124 }
1125 }
1126
1127 let result_text = result.join("\n");
1129 if content.ends_with('\n') && !result_text.ends_with('\n') {
1130 format!("{result_text}\n")
1131 } else {
1132 result_text
1133 }
1134}
1135
1136#[cfg(test)]
1137mod tests {
1138 use super::*;
1139
1140 #[test]
1141 fn test_list_item_trailing_whitespace_removal() {
1142 let input = "1. First line with trailing spaces \n Second line with trailing spaces \n Third line\n";
1145
1146 let options = ReflowOptions {
1147 line_length: 999999,
1148 break_on_sentences: true, preserve_breaks: false,
1150 sentence_per_line: false,
1151 };
1152
1153 let result = reflow_markdown(input, &options);
1154
1155 eprintln!("Input: {input:?}");
1156 eprintln!("Result: {result:?}");
1157
1158 assert!(
1161 !result.contains(" "),
1162 "Result should not contain 3+ consecutive spaces: {result:?}"
1163 );
1164
1165 assert!(result.contains(" \n"), "Hard breaks should be preserved: {result:?}");
1167
1168 assert!(
1171 result.lines().count() >= 2,
1172 "Should have multiple lines (not reflowed due to hard breaks), got: {}",
1173 result.lines().count()
1174 );
1175 }
1176
1177 #[test]
1178 fn test_reflow_simple_text() {
1179 let options = ReflowOptions {
1180 line_length: 20,
1181 ..Default::default()
1182 };
1183
1184 let input = "This is a very long line that needs to be wrapped";
1185 let result = reflow_line(input, &options);
1186
1187 assert_eq!(result.len(), 3);
1188 assert!(result[0].chars().count() <= 20);
1189 assert!(result[1].chars().count() <= 20);
1190 assert!(result[2].chars().count() <= 20);
1191 }
1192
1193 #[test]
1194 fn test_preserve_inline_code() {
1195 let options = ReflowOptions {
1196 line_length: 30,
1197 ..Default::default()
1198 };
1199
1200 let result = reflow_line("This line has `inline code` that should be preserved", &options);
1201 let joined = result.join(" ");
1203 assert!(joined.contains("`inline code`"));
1204 }
1205
1206 #[test]
1207 fn test_preserve_links() {
1208 let options = ReflowOptions {
1209 line_length: 40,
1210 ..Default::default()
1211 };
1212
1213 let text = "Check out [this link](https://example.com/very/long/url) for more info";
1214 let result = reflow_line(text, &options);
1215
1216 let joined = result.join(" ");
1218 assert!(joined.contains("[this link](https://example.com/very/long/url)"));
1219 }
1220
1221 #[test]
1222 fn test_reference_link_patterns_fixed() {
1223 let options = ReflowOptions {
1224 line_length: 30,
1225 break_on_sentences: true,
1226 preserve_breaks: false,
1227 sentence_per_line: false,
1228 };
1229
1230 let test_cases = vec![
1232 ("Check out [text][ref] for details", vec!["[text][ref]"]),
1234 ("See [text][] for info", vec!["[text][]"]),
1236 ("Visit [homepage] today", vec!["[homepage]"]),
1238 (
1240 "Links: [first][ref1] and [second][ref2] here",
1241 vec!["[first][ref1]", "[second][ref2]"],
1242 ),
1243 (
1245 "See [inline](url) and [reference][ref] links",
1246 vec", "[reference][ref]"],
1247 ),
1248 ];
1249
1250 for (input, expected_patterns) in test_cases {
1251 println!("\nTesting: {input}");
1252 let result = reflow_line(input, &options);
1253 let joined = result.join(" ");
1254 println!("Result: {joined}");
1255
1256 for expected_pattern in expected_patterns {
1258 assert!(
1259 joined.contains(expected_pattern),
1260 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1261 );
1262 }
1263
1264 assert!(
1266 !joined.contains("[ ") || !joined.contains("] ["),
1267 "Detected broken reference link pattern with spaces inside brackets in '{joined}'"
1268 );
1269 }
1270 }
1271
1272 #[test]
1273 fn test_sentence_detection_basic() {
1274 assert!(is_sentence_boundary("Hello. World", 5));
1276 assert!(is_sentence_boundary("Test! Another", 4));
1277 assert!(is_sentence_boundary("Question? Answer", 8));
1278
1279 assert!(!is_sentence_boundary("Hello world", 5));
1281 assert!(!is_sentence_boundary("Test.com", 4));
1282 assert!(!is_sentence_boundary("3.14 pi", 1));
1283 }
1284
1285 #[test]
1286 fn test_sentence_detection_abbreviations() {
1287 assert!(!is_sentence_boundary("Mr. Smith", 2));
1289 assert!(!is_sentence_boundary("Dr. Jones", 2));
1290 assert!(!is_sentence_boundary("e.g. example", 3));
1291 assert!(!is_sentence_boundary("i.e. that is", 3));
1292 assert!(!is_sentence_boundary("etc. items", 3));
1293
1294 assert!(is_sentence_boundary("Mr. Smith arrived. Next sentence.", 17));
1296 }
1297
1298 #[test]
1299 fn test_split_into_sentences() {
1300 let text = "First sentence. Second sentence. Third one!";
1301 let sentences = split_into_sentences(text);
1302 assert_eq!(sentences.len(), 3);
1303 assert_eq!(sentences[0], "First sentence.");
1304 assert_eq!(sentences[1], "Second sentence.");
1305 assert_eq!(sentences[2], "Third one!");
1306
1307 let text2 = "Mr. Smith met Dr. Jones.";
1309 let sentences2 = split_into_sentences(text2);
1310 assert_eq!(sentences2.len(), 1);
1311 assert_eq!(sentences2[0], "Mr. Smith met Dr. Jones.");
1312
1313 let text3 = "This is a single sentence.";
1315 let sentences3 = split_into_sentences(text3);
1316 assert_eq!(sentences3.len(), 1);
1317 assert_eq!(sentences3[0], "This is a single sentence.");
1318 }
1319
1320 #[test]
1321 fn test_sentence_per_line_reflow() {
1322 let options = ReflowOptions {
1323 line_length: 80,
1324 break_on_sentences: true,
1325 preserve_breaks: false,
1326 sentence_per_line: true,
1327 };
1328
1329 let input = "First sentence. Second sentence. Third sentence.";
1331 let result = reflow_line(input, &options);
1332 assert_eq!(result.len(), 3);
1333 assert_eq!(result[0], "First sentence.");
1334 assert_eq!(result[1], "Second sentence.");
1335 assert_eq!(result[2], "Third sentence.");
1336
1337 let input2 = "This has **bold**. And [a link](url).";
1339 let result2 = reflow_line(input2, &options);
1340 assert_eq!(result2.len(), 2);
1341 assert_eq!(result2[0], "This has **bold**.");
1342 assert_eq!(result2[1], "And [a link](url).");
1343 }
1344
1345 #[test]
1346 fn test_sentence_per_line_with_backticks() {
1347 let options = ReflowOptions {
1348 line_length: 80,
1349 break_on_sentences: true,
1350 preserve_breaks: false,
1351 sentence_per_line: true,
1352 };
1353
1354 let input = "This sentence has `code` in it. And this has `more code` too.";
1355 let result = reflow_line(input, &options);
1356 assert_eq!(result.len(), 2);
1357 assert_eq!(result[0], "This sentence has `code` in it.");
1358 assert_eq!(result[1], "And this has `more code` too.");
1359 }
1360
1361 #[test]
1362 fn test_sentence_per_line_with_backticks_in_parens() {
1363 let options = ReflowOptions {
1364 line_length: 80,
1365 break_on_sentences: true,
1366 preserve_breaks: false,
1367 sentence_per_line: true,
1368 };
1369
1370 let input = "Configure in (`.rumdl.toml` or `pyproject.toml`). Next sentence.";
1371 let result = reflow_line(input, &options);
1372 assert_eq!(result.len(), 2);
1373 assert_eq!(result[0], "Configure in (`.rumdl.toml` or `pyproject.toml`).");
1374 assert_eq!(result[1], "Next sentence.");
1375 }
1376
1377 #[test]
1378 fn test_sentence_per_line_with_questions_exclamations() {
1379 let options = ReflowOptions {
1380 line_length: 80,
1381 break_on_sentences: true,
1382 preserve_breaks: false,
1383 sentence_per_line: true,
1384 };
1385
1386 let input = "Is this a question? Yes it is! And a statement.";
1387 let result = reflow_line(input, &options);
1388 assert_eq!(result.len(), 3);
1389 assert_eq!(result[0], "Is this a question?");
1390 assert_eq!(result[1], "Yes it is!");
1391 assert_eq!(result[2], "And a statement.");
1392 }
1393
1394 #[test]
1395 fn test_reference_link_edge_cases() {
1396 let options = ReflowOptions {
1397 line_length: 40,
1398 break_on_sentences: true,
1399 preserve_breaks: false,
1400 sentence_per_line: false,
1401 };
1402
1403 let test_cases = vec![
1405 ("Text with \\[escaped\\] brackets", vec!["\\[escaped\\]"]),
1407 (
1409 "Link [text with [nested] content][ref]",
1410 vec!["[text with [nested] content][ref]"],
1411 ),
1412 (
1414 "First [ref][link] then [inline](url)",
1415 vec!["[ref][link]", "[inline](url)"],
1416 ),
1417 ("Array [0] and reference [link] here", vec!["[0]", "[link]"]),
1419 (
1421 "Complex [text with *emphasis*][] reference",
1422 vec!["[text with *emphasis*][]"],
1423 ),
1424 ];
1425
1426 for (input, expected_patterns) in test_cases {
1427 println!("\nTesting edge case: {input}");
1428 let result = reflow_line(input, &options);
1429 let joined = result.join(" ");
1430 println!("Result: {joined}");
1431
1432 for expected_pattern in expected_patterns {
1434 assert!(
1435 joined.contains(expected_pattern),
1436 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1437 );
1438 }
1439 }
1440 }
1441
1442 #[test]
1443 fn test_reflow_with_emphasis() {
1444 let options = ReflowOptions {
1445 line_length: 25,
1446 ..Default::default()
1447 };
1448
1449 let result = reflow_line("This is *emphasized* and **strong** text that needs wrapping", &options);
1450
1451 let joined = result.join(" ");
1453 assert!(joined.contains("*emphasized*"));
1454 assert!(joined.contains("**strong**"));
1455 }
1456
1457 #[test]
1458 fn test_image_patterns_preserved() {
1459 let options = ReflowOptions {
1460 line_length: 30,
1461 ..Default::default()
1462 };
1463
1464 let test_cases = vec for details",
1469 vec"],
1470 ),
1471 ("See ![image][ref] for info", vec!["![image][ref]"]),
1473 ("Visit ![homepage][] today", vec!["![homepage][]"]),
1475 (
1477 "Images:  and ![second][ref2]",
1478 vec", "![second][ref2]"],
1479 ),
1480 ];
1481
1482 for (input, expected_patterns) in test_cases {
1483 println!("\nTesting: {input}");
1484 let result = reflow_line(input, &options);
1485 let joined = result.join(" ");
1486 println!("Result: {joined}");
1487
1488 for expected_pattern in expected_patterns {
1489 assert!(
1490 joined.contains(expected_pattern),
1491 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1492 );
1493 }
1494 }
1495 }
1496
1497 #[test]
1498 fn test_extended_markdown_patterns() {
1499 let options = ReflowOptions {
1500 line_length: 40,
1501 ..Default::default()
1502 };
1503
1504 let test_cases = vec![
1505 ("Text with ~~strikethrough~~ preserved", vec!["~~strikethrough~~"]),
1507 (
1509 "Check [[wiki link]] and [[page|display]]",
1510 vec!["[[wiki link]]", "[[page|display]]"],
1511 ),
1512 (
1514 "Inline $x^2 + y^2$ and display $$\\int f(x) dx$$",
1515 vec!["$x^2 + y^2$", "$$\\int f(x) dx$$"],
1516 ),
1517 ("Use :smile: and :heart: emojis", vec![":smile:", ":heart:"]),
1519 (
1521 "Text with <span>tag</span> and <br/>",
1522 vec!["<span>", "</span>", "<br/>"],
1523 ),
1524 ("Non-breaking space and em—dash", vec![" ", "—"]),
1526 ];
1527
1528 for (input, expected_patterns) in test_cases {
1529 let result = reflow_line(input, &options);
1530 let joined = result.join(" ");
1531
1532 for pattern in expected_patterns {
1533 assert!(
1534 joined.contains(pattern),
1535 "Expected '{pattern}' to be preserved in '{input}', but got '{joined}'"
1536 );
1537 }
1538 }
1539 }
1540
1541 #[test]
1542 fn test_complex_mixed_patterns() {
1543 let options = ReflowOptions {
1544 line_length: 50,
1545 ..Default::default()
1546 };
1547
1548 let input = "Line with **bold**, `code`, [link](url), , ~~strike~~, $math$, :emoji:, and <tag> all together";
1550 let result = reflow_line(input, &options);
1551 let joined = result.join(" ");
1552
1553 assert!(joined.contains("**bold**"));
1555 assert!(joined.contains("`code`"));
1556 assert!(joined.contains("[link](url)"));
1557 assert!(joined.contains(""));
1558 assert!(joined.contains("~~strike~~"));
1559 assert!(joined.contains("$math$"));
1560 assert!(joined.contains(":emoji:"));
1561 assert!(joined.contains("<tag>"));
1562 }
1563
1564 #[test]
1565 fn test_footnote_patterns_preserved() {
1566 let options = ReflowOptions {
1567 line_length: 40,
1568 ..Default::default()
1569 };
1570
1571 let test_cases = vec![
1572 ("This has a footnote[^1] reference", vec!["[^1]"]),
1574 ("Text with [^first] and [^second] notes", vec!["[^first]", "[^second]"]),
1576 ("Reference to [^long-footnote-name] here", vec!["[^long-footnote-name]"]),
1578 ];
1579
1580 for (input, expected_patterns) in test_cases {
1581 let result = reflow_line(input, &options);
1582 let joined = result.join(" ");
1583
1584 for expected_pattern in expected_patterns {
1585 assert!(
1586 joined.contains(expected_pattern),
1587 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1588 );
1589 }
1590 }
1591 }
1592
1593 #[test]
1594 fn test_reflow_markdown_numbered_lists() {
1595 let options = ReflowOptions {
1597 line_length: 50,
1598 ..Default::default()
1599 };
1600
1601 let content = r#"1. List `manifest` to find the manifest with the largest ID. Say it's `00000000000000000002.manifest` in this example.
16022. Short item
16033. Another long item that definitely exceeds the fifty character limit and needs wrapping"#;
1604
1605 let result = reflow_markdown(content, &options);
1606
1607 let expected = r#"1. List `manifest` to find the manifest with the
1609 largest ID. Say it's
1610 `00000000000000000002.manifest` in this
1611 example.
16122. Short item
16133. Another long item that definitely exceeds the
1614 fifty character limit and needs wrapping"#;
1615
1616 assert_eq!(
1617 result, expected,
1618 "Numbered lists should be reflowed with proper markers and indentation.\nExpected:\n{expected}\nGot:\n{result}"
1619 );
1620 }
1621
1622 #[test]
1623 fn test_reflow_markdown_bullet_lists() {
1624 let options = ReflowOptions {
1625 line_length: 40,
1626 ..Default::default()
1627 };
1628
1629 let content = r#"- First bullet point with a very long line that needs wrapping
1630* Second bullet using asterisk
1631+ Third bullet using plus sign
1632- Short one"#;
1633
1634 let result = reflow_markdown(content, &options);
1635
1636 let expected = r#"- First bullet point with a very long
1638 line that needs wrapping
1639* Second bullet using asterisk
1640+ Third bullet using plus sign
1641- Short one"#;
1642
1643 assert_eq!(
1644 result, expected,
1645 "Bullet lists should preserve markers and indent continuations with 2 spaces.\nExpected:\n{expected}\nGot:\n{result}"
1646 );
1647 }
1648}