1use crate::utils::regex_cache::{
7 DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
8 INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
9 SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
10};
11#[derive(Clone)]
13pub struct ReflowOptions {
14 pub line_length: usize,
16 pub break_on_sentences: bool,
18 pub preserve_breaks: bool,
20 pub sentence_per_line: bool,
22}
23
24impl Default for ReflowOptions {
25 fn default() -> Self {
26 Self {
27 line_length: 80,
28 break_on_sentences: true,
29 preserve_breaks: false,
30 sentence_per_line: false,
31 }
32 }
33}
34
35fn is_sentence_boundary(text: &str, pos: usize) -> bool {
38 let chars: Vec<char> = text.chars().collect();
39
40 if pos + 2 >= chars.len() {
41 return false;
42 }
43
44 let c = chars[pos];
46 if c != '.' && c != '!' && c != '?' {
47 return false;
48 }
49
50 if chars[pos + 1] != ' ' {
52 return false;
53 }
54
55 if !chars[pos + 2].is_uppercase() {
57 return false;
58 }
59
60 if pos > 0 {
62 let prev_word = &text[..pos];
64 let ignored_words = [
65 "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr", "Jr",
66 ];
67 for word in &ignored_words {
68 if prev_word.to_lowercase().ends_with(&word.to_lowercase()) {
69 return false;
70 }
71 }
72
73 if pos > 0 && chars[pos - 1].is_numeric() && pos + 2 < chars.len() && chars[pos + 2].is_numeric() {
75 return false;
76 }
77 }
78
79 true
80}
81
82pub fn split_into_sentences(text: &str) -> Vec<String> {
84 let mut sentences = Vec::new();
85 let mut current_sentence = String::new();
86 let mut chars = text.chars().peekable();
87 let mut pos = 0;
88
89 while let Some(c) = chars.next() {
90 current_sentence.push(c);
91
92 if is_sentence_boundary(text, pos) {
93 if chars.peek() == Some(&' ') {
95 chars.next();
96 pos += 1;
97 }
98
99 sentences.push(current_sentence.trim().to_string());
100 current_sentence.clear();
101 }
102
103 pos += 1;
104 }
105
106 if !current_sentence.trim().is_empty() {
108 sentences.push(current_sentence.trim().to_string());
109 }
110
111 sentences
112}
113
114fn is_horizontal_rule(line: &str) -> bool {
116 if line.len() < 3 {
117 return false;
118 }
119
120 let chars: Vec<char> = line.chars().collect();
122 if chars.is_empty() {
123 return false;
124 }
125
126 let first_char = chars[0];
127 if first_char != '-' && first_char != '_' && first_char != '*' {
128 return false;
129 }
130
131 for c in &chars {
133 if *c != first_char && *c != ' ' {
134 return false;
135 }
136 }
137
138 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
140 non_space_count >= 3
141}
142
143fn is_numbered_list_item(line: &str) -> bool {
145 let mut chars = line.chars();
146
147 if !chars.next().is_some_and(|c| c.is_numeric()) {
149 return false;
150 }
151
152 while let Some(c) = chars.next() {
154 if c == '.' {
155 return chars.next().is_none_or(|c| c == ' ');
157 }
158 if !c.is_numeric() {
159 return false;
160 }
161 }
162
163 false
164}
165
166pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
168 if options.sentence_per_line {
170 let elements = parse_markdown_elements(line);
171 return reflow_elements_sentence_per_line(&elements);
172 }
173
174 if line.chars().count() <= options.line_length {
176 return vec![line.to_string()];
177 }
178
179 let elements = parse_markdown_elements(line);
181
182 reflow_elements(&elements, options)
184}
185
186#[derive(Debug, Clone)]
188enum Element {
189 Text(String),
191 Link { text: String, url: String },
193 ReferenceLink { text: String, reference: String },
195 EmptyReferenceLink { text: String },
197 ShortcutReference { reference: String },
199 InlineImage { alt: String, url: String },
201 ReferenceImage { alt: String, reference: String },
203 EmptyReferenceImage { alt: String },
205 FootnoteReference { note: String },
207 Strikethrough(String),
209 WikiLink(String),
211 InlineMath(String),
213 DisplayMath(String),
215 EmojiShortcode(String),
217 HtmlTag(String),
219 HtmlEntity(String),
221 Code(String),
223 Bold(String),
225 Italic(String),
227}
228
229impl std::fmt::Display for Element {
230 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
231 match self {
232 Element::Text(s) => write!(f, "{s}"),
233 Element::Link { text, url } => write!(f, "[{text}]({url})"),
234 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
235 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
236 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
237 Element::InlineImage { alt, url } => write!(f, ""),
238 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
239 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
240 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
241 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
242 Element::WikiLink(s) => write!(f, "[[{s}]]"),
243 Element::InlineMath(s) => write!(f, "${s}$"),
244 Element::DisplayMath(s) => write!(f, "$${s}$$"),
245 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
246 Element::HtmlTag(s) => write!(f, "{s}"),
247 Element::HtmlEntity(s) => write!(f, "{s}"),
248 Element::Code(s) => write!(f, "`{s}`"),
249 Element::Bold(s) => write!(f, "**{s}**"),
250 Element::Italic(s) => write!(f, "*{s}*"),
251 }
252 }
253}
254
255impl Element {
256 fn len(&self) -> usize {
257 match self {
258 Element::Text(s) => s.chars().count(),
259 Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, Element::EmptyReferenceLink { text } => text.chars().count() + 4, Element::ShortcutReference { reference } => reference.chars().count() + 2, Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, Element::FootnoteReference { note } => note.chars().count() + 3, Element::Strikethrough(s) => s.chars().count() + 4, Element::WikiLink(s) => s.chars().count() + 4, Element::InlineMath(s) => s.chars().count() + 2, Element::DisplayMath(s) => s.chars().count() + 4, Element::EmojiShortcode(s) => s.chars().count() + 2, Element::HtmlTag(s) => s.chars().count(), Element::HtmlEntity(s) => s.chars().count(), Element::Code(s) => s.chars().count() + 2, Element::Bold(s) => s.chars().count() + 4, Element::Italic(s) => s.chars().count() + 2, }
278 }
279}
280
281fn parse_markdown_elements(text: &str) -> Vec<Element> {
290 let mut elements = Vec::new();
291 let mut remaining = text;
292
293 while !remaining.is_empty() {
294 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
296
297 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
300 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
301 {
302 earliest_match = Some((m.start(), "inline_image", m));
303 }
304
305 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
307 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
308 {
309 earliest_match = Some((m.start(), "ref_image", m));
310 }
311
312 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
314 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
315 {
316 earliest_match = Some((m.start(), "footnote_ref", m));
317 }
318
319 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
321 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
322 {
323 earliest_match = Some((m.start(), "inline_link", m));
324 }
325
326 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
328 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
329 {
330 earliest_match = Some((m.start(), "ref_link", m));
331 }
332
333 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
336 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
337 {
338 earliest_match = Some((m.start(), "shortcut_ref", m));
339 }
340
341 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
343 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
344 {
345 earliest_match = Some((m.start(), "wiki_link", m));
346 }
347
348 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
350 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
351 {
352 earliest_match = Some((m.start(), "display_math", m));
353 }
354
355 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
357 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
358 {
359 earliest_match = Some((m.start(), "inline_math", m));
360 }
361
362 if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
364 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
365 {
366 earliest_match = Some((m.start(), "strikethrough", m));
367 }
368
369 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
371 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
372 {
373 earliest_match = Some((m.start(), "emoji", m));
374 }
375
376 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
378 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
379 {
380 earliest_match = Some((m.start(), "html_entity", m));
381 }
382
383 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
385 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
386 {
387 earliest_match = Some((m.start(), "html_tag", m));
388 }
389
390 let mut next_special = remaining.len();
392 let mut special_type = "";
393
394 if let Some(pos) = remaining.find('`')
395 && pos < next_special
396 {
397 next_special = pos;
398 special_type = "code";
399 }
400 if let Some(pos) = remaining.find("**")
401 && pos < next_special
402 {
403 next_special = pos;
404 special_type = "bold";
405 }
406 if let Some(pos) = remaining.find('*')
407 && pos < next_special
408 && !remaining[pos..].starts_with("**")
409 {
410 next_special = pos;
411 special_type = "italic";
412 }
413
414 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
416 pos < next_special
417 } else {
418 false
419 };
420
421 if should_process_markdown_link {
422 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
423
424 if pos > 0 {
426 elements.push(Element::Text(remaining[..pos].to_string()));
427 }
428
429 match pattern_type {
431 "inline_image" => {
432 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
433 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
434 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
435 elements.push(Element::InlineImage {
436 alt: alt.to_string(),
437 url: url.to_string(),
438 });
439 remaining = &remaining[match_obj.end()..];
440 } else {
441 elements.push(Element::Text("!".to_string()));
442 remaining = &remaining[1..];
443 }
444 }
445 "ref_image" => {
446 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
447 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
448 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
449
450 if reference.is_empty() {
451 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
452 } else {
453 elements.push(Element::ReferenceImage {
454 alt: alt.to_string(),
455 reference: reference.to_string(),
456 });
457 }
458 remaining = &remaining[match_obj.end()..];
459 } else {
460 elements.push(Element::Text("!".to_string()));
461 remaining = &remaining[1..];
462 }
463 }
464 "footnote_ref" => {
465 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
466 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
467 elements.push(Element::FootnoteReference { note: note.to_string() });
468 remaining = &remaining[match_obj.end()..];
469 } else {
470 elements.push(Element::Text("[".to_string()));
471 remaining = &remaining[1..];
472 }
473 }
474 "inline_link" => {
475 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
476 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
477 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
478 elements.push(Element::Link {
479 text: text.to_string(),
480 url: url.to_string(),
481 });
482 remaining = &remaining[match_obj.end()..];
483 } else {
484 elements.push(Element::Text("[".to_string()));
486 remaining = &remaining[1..];
487 }
488 }
489 "ref_link" => {
490 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
491 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
492 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
493
494 if reference.is_empty() {
495 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
497 } else {
498 elements.push(Element::ReferenceLink {
500 text: text.to_string(),
501 reference: reference.to_string(),
502 });
503 }
504 remaining = &remaining[match_obj.end()..];
505 } else {
506 elements.push(Element::Text("[".to_string()));
508 remaining = &remaining[1..];
509 }
510 }
511 "shortcut_ref" => {
512 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
513 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
514 elements.push(Element::ShortcutReference {
515 reference: reference.to_string(),
516 });
517 remaining = &remaining[match_obj.end()..];
518 } else {
519 elements.push(Element::Text("[".to_string()));
521 remaining = &remaining[1..];
522 }
523 }
524 "wiki_link" => {
525 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
526 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
527 elements.push(Element::WikiLink(content.to_string()));
528 remaining = &remaining[match_obj.end()..];
529 } else {
530 elements.push(Element::Text("[[".to_string()));
531 remaining = &remaining[2..];
532 }
533 }
534 "display_math" => {
535 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
536 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
537 elements.push(Element::DisplayMath(math.to_string()));
538 remaining = &remaining[match_obj.end()..];
539 } else {
540 elements.push(Element::Text("$$".to_string()));
541 remaining = &remaining[2..];
542 }
543 }
544 "inline_math" => {
545 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
546 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
547 elements.push(Element::InlineMath(math.to_string()));
548 remaining = &remaining[match_obj.end()..];
549 } else {
550 elements.push(Element::Text("$".to_string()));
551 remaining = &remaining[1..];
552 }
553 }
554 "strikethrough" => {
555 if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
556 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
557 elements.push(Element::Strikethrough(text.to_string()));
558 remaining = &remaining[match_obj.end()..];
559 } else {
560 elements.push(Element::Text("~~".to_string()));
561 remaining = &remaining[2..];
562 }
563 }
564 "emoji" => {
565 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
566 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
567 elements.push(Element::EmojiShortcode(emoji.to_string()));
568 remaining = &remaining[match_obj.end()..];
569 } else {
570 elements.push(Element::Text(":".to_string()));
571 remaining = &remaining[1..];
572 }
573 }
574 "html_entity" => {
575 elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
577 remaining = &remaining[match_obj.end()..];
578 }
579 "html_tag" => {
580 elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
582 remaining = &remaining[match_obj.end()..];
583 }
584 _ => {
585 elements.push(Element::Text("[".to_string()));
587 remaining = &remaining[1..];
588 }
589 }
590 } else {
591 if next_special > 0 && next_special < remaining.len() {
595 elements.push(Element::Text(remaining[..next_special].to_string()));
596 remaining = &remaining[next_special..];
597 }
598
599 match special_type {
601 "code" => {
602 if let Some(code_end) = remaining[1..].find('`') {
604 let code = &remaining[1..1 + code_end];
605 elements.push(Element::Code(code.to_string()));
606 remaining = &remaining[1 + code_end + 1..];
607 } else {
608 elements.push(Element::Text(remaining.to_string()));
610 break;
611 }
612 }
613 "bold" => {
614 if let Some(bold_end) = remaining[2..].find("**") {
616 let bold_text = &remaining[2..2 + bold_end];
617 elements.push(Element::Bold(bold_text.to_string()));
618 remaining = &remaining[2 + bold_end + 2..];
619 } else {
620 elements.push(Element::Text("**".to_string()));
622 remaining = &remaining[2..];
623 }
624 }
625 "italic" => {
626 if let Some(italic_end) = remaining[1..].find('*') {
628 let italic_text = &remaining[1..1 + italic_end];
629 elements.push(Element::Italic(italic_text.to_string()));
630 remaining = &remaining[1 + italic_end + 1..];
631 } else {
632 elements.push(Element::Text("*".to_string()));
634 remaining = &remaining[1..];
635 }
636 }
637 _ => {
638 elements.push(Element::Text(remaining.to_string()));
640 break;
641 }
642 }
643 }
644 }
645
646 elements
647}
648
649fn reflow_elements_sentence_per_line(elements: &[Element]) -> Vec<String> {
651 let mut lines = Vec::new();
652 let mut current_line = String::new();
653
654 for element in elements {
655 let element_str = format!("{element}");
656
657 if let Element::Text(text) = element {
659 let combined = format!("{current_line}{text}");
661 let sentences = split_into_sentences(&combined);
662
663 if sentences.len() > 1 {
664 for (i, sentence) in sentences.iter().enumerate() {
666 if i == 0 {
667 lines.push(sentence.to_string());
669 } else if i == sentences.len() - 1 {
670 current_line = sentence.to_string();
672 } else {
673 lines.push(sentence.to_string());
675 }
676 }
677 } else {
678 current_line = combined;
680 }
681 } else {
682 if !current_line.is_empty()
685 && !current_line.ends_with(' ')
686 && !current_line.ends_with('(')
687 && !current_line.ends_with('[')
688 {
689 current_line.push(' ');
690 }
691 current_line.push_str(&element_str);
692 }
693 }
694
695 if !current_line.is_empty() {
697 lines.push(current_line.trim().to_string());
698 }
699
700 lines
701}
702
703fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
705 let mut lines = Vec::new();
706 let mut current_line = String::new();
707 let mut current_length = 0;
708
709 for element in elements {
710 let element_str = format!("{element}");
711 let element_len = element.len();
712
713 if let Element::Text(text) = element {
715 let words: Vec<&str> = text.split_whitespace().collect();
717
718 for word in words {
719 let word_len = word.chars().count();
720 if current_length > 0 && current_length + 1 + word_len > options.line_length {
721 lines.push(current_line.trim().to_string());
723 current_line = word.to_string();
724 current_length = word_len;
725 } else {
726 if current_length > 0 {
728 current_line.push(' ');
729 current_length += 1;
730 }
731 current_line.push_str(word);
732 current_length += word_len;
733 }
734 }
735 } else {
736 if current_length > 0 && current_length + 1 + element_len > options.line_length {
739 lines.push(current_line.trim().to_string());
741 current_line = element_str;
742 current_length = element_len;
743 } else {
744 if current_length > 0 {
746 current_line.push(' ');
747 current_length += 1;
748 }
749 current_line.push_str(&element_str);
750 current_length += element_len;
751 }
752 }
753 }
754
755 if !current_line.is_empty() {
757 lines.push(current_line.trim_end().to_string());
758 }
759
760 lines
761}
762
763pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
765 let lines: Vec<&str> = content.lines().collect();
766 let mut result = Vec::new();
767 let mut i = 0;
768
769 while i < lines.len() {
770 let line = lines[i];
771 let trimmed = line.trim();
772
773 if trimmed.is_empty() {
775 result.push(String::new());
776 i += 1;
777 continue;
778 }
779
780 if trimmed.starts_with('#') {
782 result.push(line.to_string());
783 i += 1;
784 continue;
785 }
786
787 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
789 result.push(line.to_string());
790 i += 1;
791 while i < lines.len() {
793 result.push(lines[i].to_string());
794 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
795 i += 1;
796 break;
797 }
798 i += 1;
799 }
800 continue;
801 }
802
803 if line.starts_with(" ") || line.starts_with("\t") {
805 result.push(line.to_string());
807 i += 1;
808 while i < lines.len() {
809 let next_line = lines[i];
810 if next_line.starts_with(" ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
812 result.push(next_line.to_string());
813 i += 1;
814 } else {
815 break;
816 }
817 }
818 continue;
819 }
820
821 if trimmed.starts_with('>') {
823 let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
824 let quote_content = &line[quote_prefix.len()..].trim_start();
825
826 let reflowed = reflow_line(quote_content, options);
827 for reflowed_line in reflowed.iter() {
828 result.push(format!("{quote_prefix} {reflowed_line}"));
829 }
830 i += 1;
831 continue;
832 }
833
834 if is_horizontal_rule(trimmed) {
836 result.push(line.to_string());
837 i += 1;
838 continue;
839 }
840
841 if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
843 || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
844 || trimmed.starts_with('+')
845 || is_numbered_list_item(trimmed)
846 {
847 let indent = line.len() - line.trim_start().len();
849 let indent_str = " ".repeat(indent);
850
851 let mut marker_end = indent;
854 let mut content_start = indent;
855
856 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
857 if let Some(period_pos) = line[indent..].find('.') {
859 marker_end = indent + period_pos + 1; content_start = marker_end;
861 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
863 content_start += 1;
864 }
865 }
866 } else {
867 marker_end = indent + 1; content_start = marker_end;
870 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
872 content_start += 1;
873 }
874 }
875
876 let marker = &line[indent..marker_end];
877
878 let mut list_content = vec![line[content_start..].to_string()];
880 i += 1;
881
882 while i < lines.len() {
884 let next_line = lines[i];
885 let next_trimmed = next_line.trim();
886
887 if next_trimmed.is_empty()
889 || next_trimmed.starts_with('#')
890 || next_trimmed.starts_with("```")
891 || next_trimmed.starts_with("~~~")
892 || next_trimmed.starts_with('>')
893 || next_trimmed.starts_with('|')
894 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
895 || is_horizontal_rule(next_trimmed)
896 || (next_trimmed.starts_with('-')
897 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
898 || (next_trimmed.starts_with('*')
899 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
900 || (next_trimmed.starts_with('+')
901 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
902 || is_numbered_list_item(next_trimmed)
903 {
904 break;
905 }
906
907 let next_indent = next_line.len() - next_line.trim_start().len();
909 if next_indent >= content_start {
910 list_content.push(next_line.trim_start().to_string());
913 i += 1;
914 } else {
915 break;
917 }
918 }
919
920 let combined_content = if options.preserve_breaks {
922 list_content[0].clone()
923 } else {
924 list_content.join(" ")
925 };
926
927 let trimmed_marker = marker;
929 let continuation_spaces = content_start;
930
931 let prefix_length = indent + trimmed_marker.len() + 1;
933
934 let adjusted_options = ReflowOptions {
936 line_length: options.line_length.saturating_sub(prefix_length),
937 ..options.clone()
938 };
939
940 let reflowed = reflow_line(&combined_content, &adjusted_options);
941 for (j, reflowed_line) in reflowed.iter().enumerate() {
942 if j == 0 {
943 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
944 } else {
945 let continuation_indent = " ".repeat(continuation_spaces);
947 result.push(format!("{continuation_indent}{reflowed_line}"));
948 }
949 }
950 continue;
951 }
952
953 if trimmed.contains('|') {
955 result.push(line.to_string());
956 i += 1;
957 continue;
958 }
959
960 if trimmed.starts_with('[') && line.contains("]:") {
962 result.push(line.to_string());
963 i += 1;
964 continue;
965 }
966
967 let mut is_single_line_paragraph = true;
969 if i + 1 < lines.len() {
970 let next_line = lines[i + 1];
971 let next_trimmed = next_line.trim();
972 if !next_trimmed.is_empty()
974 && !next_trimmed.starts_with('#')
975 && !next_trimmed.starts_with("```")
976 && !next_trimmed.starts_with("~~~")
977 && !next_trimmed.starts_with('>')
978 && !next_trimmed.starts_with('|')
979 && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
980 && !is_horizontal_rule(next_trimmed)
981 && !(next_trimmed.starts_with('-')
982 && !is_horizontal_rule(next_trimmed)
983 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
984 && !(next_trimmed.starts_with('*')
985 && !is_horizontal_rule(next_trimmed)
986 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
987 && !(next_trimmed.starts_with('+')
988 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
989 && !is_numbered_list_item(next_trimmed)
990 {
991 is_single_line_paragraph = false;
992 }
993 }
994
995 if is_single_line_paragraph && line.chars().count() <= options.line_length {
997 result.push(line.to_string());
998 i += 1;
999 continue;
1000 }
1001
1002 let mut paragraph_parts = Vec::new();
1004 let mut current_part = vec![line];
1005 i += 1;
1006
1007 if options.preserve_breaks {
1009 let has_hard_break = line.ends_with(" ");
1011 let reflowed = reflow_line(line, options);
1012
1013 if has_hard_break && !reflowed.is_empty() {
1015 let mut reflowed_with_break = reflowed;
1016 let last_idx = reflowed_with_break.len() - 1;
1017 if !reflowed_with_break[last_idx].ends_with(" ") {
1018 reflowed_with_break[last_idx].push_str(" ");
1019 }
1020 result.extend(reflowed_with_break);
1021 } else {
1022 result.extend(reflowed);
1023 }
1024 } else {
1025 while i < lines.len() {
1027 let prev_line = if !current_part.is_empty() {
1028 current_part.last().unwrap()
1029 } else {
1030 ""
1031 };
1032 let next_line = lines[i];
1033 let next_trimmed = next_line.trim();
1034
1035 if next_trimmed.is_empty()
1037 || next_trimmed.starts_with('#')
1038 || next_trimmed.starts_with("```")
1039 || next_trimmed.starts_with("~~~")
1040 || next_trimmed.starts_with('>')
1041 || next_trimmed.starts_with('|')
1042 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1043 || is_horizontal_rule(next_trimmed)
1044 || (next_trimmed.starts_with('-')
1045 && !is_horizontal_rule(next_trimmed)
1046 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1047 || (next_trimmed.starts_with('*')
1048 && !is_horizontal_rule(next_trimmed)
1049 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1050 || (next_trimmed.starts_with('+')
1051 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1052 || is_numbered_list_item(next_trimmed)
1053 {
1054 break;
1055 }
1056
1057 if prev_line.ends_with(" ") {
1059 paragraph_parts.push(current_part.join(" "));
1061 current_part = vec![next_line];
1062 } else {
1063 current_part.push(next_line);
1064 }
1065 i += 1;
1066 }
1067
1068 if !current_part.is_empty() {
1070 if current_part.len() == 1 {
1071 paragraph_parts.push(current_part[0].to_string());
1073 } else {
1074 paragraph_parts.push(current_part.join(" "));
1075 }
1076 }
1077
1078 for (j, part) in paragraph_parts.iter().enumerate() {
1080 let reflowed = reflow_line(part, options);
1081 result.extend(reflowed);
1082
1083 if j < paragraph_parts.len() - 1 && !result.is_empty() {
1085 let last_idx = result.len() - 1;
1086 if !result[last_idx].ends_with(" ") {
1087 result[last_idx].push_str(" ");
1088 }
1089 }
1090 }
1091 }
1092 }
1093
1094 let result_text = result.join("\n");
1096 if content.ends_with('\n') && !result_text.ends_with('\n') {
1097 format!("{result_text}\n")
1098 } else {
1099 result_text
1100 }
1101}
1102
1103#[cfg(test)]
1104mod tests {
1105 use super::*;
1106
1107 #[test]
1108 fn test_reflow_simple_text() {
1109 let options = ReflowOptions {
1110 line_length: 20,
1111 ..Default::default()
1112 };
1113
1114 let input = "This is a very long line that needs to be wrapped";
1115 let result = reflow_line(input, &options);
1116
1117 assert_eq!(result.len(), 3);
1118 assert!(result[0].chars().count() <= 20);
1119 assert!(result[1].chars().count() <= 20);
1120 assert!(result[2].chars().count() <= 20);
1121 }
1122
1123 #[test]
1124 fn test_preserve_inline_code() {
1125 let options = ReflowOptions {
1126 line_length: 30,
1127 ..Default::default()
1128 };
1129
1130 let result = reflow_line("This line has `inline code` that should be preserved", &options);
1131 let joined = result.join(" ");
1133 assert!(joined.contains("`inline code`"));
1134 }
1135
1136 #[test]
1137 fn test_preserve_links() {
1138 let options = ReflowOptions {
1139 line_length: 40,
1140 ..Default::default()
1141 };
1142
1143 let text = "Check out [this link](https://example.com/very/long/url) for more info";
1144 let result = reflow_line(text, &options);
1145
1146 let joined = result.join(" ");
1148 assert!(joined.contains("[this link](https://example.com/very/long/url)"));
1149 }
1150
1151 #[test]
1152 fn test_reference_link_patterns_fixed() {
1153 let options = ReflowOptions {
1154 line_length: 30,
1155 break_on_sentences: true,
1156 preserve_breaks: false,
1157 sentence_per_line: false,
1158 };
1159
1160 let test_cases = vec![
1162 ("Check out [text][ref] for details", vec!["[text][ref]"]),
1164 ("See [text][] for info", vec!["[text][]"]),
1166 ("Visit [homepage] today", vec!["[homepage]"]),
1168 (
1170 "Links: [first][ref1] and [second][ref2] here",
1171 vec!["[first][ref1]", "[second][ref2]"],
1172 ),
1173 (
1175 "See [inline](url) and [reference][ref] links",
1176 vec", "[reference][ref]"],
1177 ),
1178 ];
1179
1180 for (input, expected_patterns) in test_cases {
1181 println!("\nTesting: {input}");
1182 let result = reflow_line(input, &options);
1183 let joined = result.join(" ");
1184 println!("Result: {joined}");
1185
1186 for expected_pattern in expected_patterns {
1188 assert!(
1189 joined.contains(expected_pattern),
1190 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1191 );
1192 }
1193
1194 assert!(
1196 !joined.contains("[ ") || !joined.contains("] ["),
1197 "Detected broken reference link pattern with spaces inside brackets in '{joined}'"
1198 );
1199 }
1200 }
1201
1202 #[test]
1203 fn test_sentence_detection_basic() {
1204 assert!(is_sentence_boundary("Hello. World", 5));
1206 assert!(is_sentence_boundary("Test! Another", 4));
1207 assert!(is_sentence_boundary("Question? Answer", 8));
1208
1209 assert!(!is_sentence_boundary("Hello world", 5));
1211 assert!(!is_sentence_boundary("Test.com", 4));
1212 assert!(!is_sentence_boundary("3.14 pi", 1));
1213 }
1214
1215 #[test]
1216 fn test_sentence_detection_abbreviations() {
1217 assert!(!is_sentence_boundary("Mr. Smith", 2));
1219 assert!(!is_sentence_boundary("Dr. Jones", 2));
1220 assert!(!is_sentence_boundary("e.g. example", 3));
1221 assert!(!is_sentence_boundary("i.e. that is", 3));
1222 assert!(!is_sentence_boundary("etc. items", 3));
1223
1224 assert!(is_sentence_boundary("Mr. Smith arrived. Next sentence.", 17));
1226 }
1227
1228 #[test]
1229 fn test_split_into_sentences() {
1230 let text = "First sentence. Second sentence. Third one!";
1231 let sentences = split_into_sentences(text);
1232 assert_eq!(sentences.len(), 3);
1233 assert_eq!(sentences[0], "First sentence.");
1234 assert_eq!(sentences[1], "Second sentence.");
1235 assert_eq!(sentences[2], "Third one!");
1236
1237 let text2 = "Mr. Smith met Dr. Jones.";
1239 let sentences2 = split_into_sentences(text2);
1240 assert_eq!(sentences2.len(), 1);
1241 assert_eq!(sentences2[0], "Mr. Smith met Dr. Jones.");
1242
1243 let text3 = "This is a single sentence.";
1245 let sentences3 = split_into_sentences(text3);
1246 assert_eq!(sentences3.len(), 1);
1247 assert_eq!(sentences3[0], "This is a single sentence.");
1248 }
1249
1250 #[test]
1251 fn test_sentence_per_line_reflow() {
1252 let options = ReflowOptions {
1253 line_length: 80,
1254 break_on_sentences: true,
1255 preserve_breaks: false,
1256 sentence_per_line: true,
1257 };
1258
1259 let input = "First sentence. Second sentence. Third sentence.";
1261 let result = reflow_line(input, &options);
1262 assert_eq!(result.len(), 3);
1263 assert_eq!(result[0], "First sentence.");
1264 assert_eq!(result[1], "Second sentence.");
1265 assert_eq!(result[2], "Third sentence.");
1266
1267 let input2 = "This has **bold**. And [a link](url).";
1269 let result2 = reflow_line(input2, &options);
1270 assert_eq!(result2.len(), 2);
1271 assert_eq!(result2[0], "This has **bold**.");
1272 assert_eq!(result2[1], "And [a link](url).");
1273 }
1274
1275 #[test]
1276 fn test_sentence_per_line_with_backticks() {
1277 let options = ReflowOptions {
1278 line_length: 80,
1279 break_on_sentences: true,
1280 preserve_breaks: false,
1281 sentence_per_line: true,
1282 };
1283
1284 let input = "This sentence has `code` in it. And this has `more code` too.";
1285 let result = reflow_line(input, &options);
1286 assert_eq!(result.len(), 2);
1287 assert_eq!(result[0], "This sentence has `code` in it.");
1288 assert_eq!(result[1], "And this has `more code` too.");
1289 }
1290
1291 #[test]
1292 fn test_sentence_per_line_with_backticks_in_parens() {
1293 let options = ReflowOptions {
1294 line_length: 80,
1295 break_on_sentences: true,
1296 preserve_breaks: false,
1297 sentence_per_line: true,
1298 };
1299
1300 let input = "Configure in (`.rumdl.toml` or `pyproject.toml`). Next sentence.";
1301 let result = reflow_line(input, &options);
1302 assert_eq!(result.len(), 2);
1303 assert_eq!(result[0], "Configure in (`.rumdl.toml` or `pyproject.toml`).");
1304 assert_eq!(result[1], "Next sentence.");
1305 }
1306
1307 #[test]
1308 fn test_sentence_per_line_with_questions_exclamations() {
1309 let options = ReflowOptions {
1310 line_length: 80,
1311 break_on_sentences: true,
1312 preserve_breaks: false,
1313 sentence_per_line: true,
1314 };
1315
1316 let input = "Is this a question? Yes it is! And a statement.";
1317 let result = reflow_line(input, &options);
1318 assert_eq!(result.len(), 3);
1319 assert_eq!(result[0], "Is this a question?");
1320 assert_eq!(result[1], "Yes it is!");
1321 assert_eq!(result[2], "And a statement.");
1322 }
1323
1324 #[test]
1325 fn test_reference_link_edge_cases() {
1326 let options = ReflowOptions {
1327 line_length: 40,
1328 break_on_sentences: true,
1329 preserve_breaks: false,
1330 sentence_per_line: false,
1331 };
1332
1333 let test_cases = vec![
1335 ("Text with \\[escaped\\] brackets", vec!["\\[escaped\\]"]),
1337 (
1339 "Link [text with [nested] content][ref]",
1340 vec!["[text with [nested] content][ref]"],
1341 ),
1342 (
1344 "First [ref][link] then [inline](url)",
1345 vec!["[ref][link]", "[inline](url)"],
1346 ),
1347 ("Array [0] and reference [link] here", vec!["[0]", "[link]"]),
1349 (
1351 "Complex [text with *emphasis*][] reference",
1352 vec!["[text with *emphasis*][]"],
1353 ),
1354 ];
1355
1356 for (input, expected_patterns) in test_cases {
1357 println!("\nTesting edge case: {input}");
1358 let result = reflow_line(input, &options);
1359 let joined = result.join(" ");
1360 println!("Result: {joined}");
1361
1362 for expected_pattern in expected_patterns {
1364 assert!(
1365 joined.contains(expected_pattern),
1366 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1367 );
1368 }
1369 }
1370 }
1371
1372 #[test]
1373 fn test_reflow_with_emphasis() {
1374 let options = ReflowOptions {
1375 line_length: 25,
1376 ..Default::default()
1377 };
1378
1379 let result = reflow_line("This is *emphasized* and **strong** text that needs wrapping", &options);
1380
1381 let joined = result.join(" ");
1383 assert!(joined.contains("*emphasized*"));
1384 assert!(joined.contains("**strong**"));
1385 }
1386
1387 #[test]
1388 fn test_image_patterns_preserved() {
1389 let options = ReflowOptions {
1390 line_length: 30,
1391 ..Default::default()
1392 };
1393
1394 let test_cases = vec for details",
1399 vec"],
1400 ),
1401 ("See ![image][ref] for info", vec!["![image][ref]"]),
1403 ("Visit ![homepage][] today", vec!["![homepage][]"]),
1405 (
1407 "Images:  and ![second][ref2]",
1408 vec", "![second][ref2]"],
1409 ),
1410 ];
1411
1412 for (input, expected_patterns) in test_cases {
1413 println!("\nTesting: {input}");
1414 let result = reflow_line(input, &options);
1415 let joined = result.join(" ");
1416 println!("Result: {joined}");
1417
1418 for expected_pattern in expected_patterns {
1419 assert!(
1420 joined.contains(expected_pattern),
1421 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1422 );
1423 }
1424 }
1425 }
1426
1427 #[test]
1428 fn test_extended_markdown_patterns() {
1429 let options = ReflowOptions {
1430 line_length: 40,
1431 ..Default::default()
1432 };
1433
1434 let test_cases = vec![
1435 ("Text with ~~strikethrough~~ preserved", vec!["~~strikethrough~~"]),
1437 (
1439 "Check [[wiki link]] and [[page|display]]",
1440 vec!["[[wiki link]]", "[[page|display]]"],
1441 ),
1442 (
1444 "Inline $x^2 + y^2$ and display $$\\int f(x) dx$$",
1445 vec!["$x^2 + y^2$", "$$\\int f(x) dx$$"],
1446 ),
1447 ("Use :smile: and :heart: emojis", vec![":smile:", ":heart:"]),
1449 (
1451 "Text with <span>tag</span> and <br/>",
1452 vec!["<span>", "</span>", "<br/>"],
1453 ),
1454 ("Non-breaking space and em—dash", vec![" ", "—"]),
1456 ];
1457
1458 for (input, expected_patterns) in test_cases {
1459 let result = reflow_line(input, &options);
1460 let joined = result.join(" ");
1461
1462 for pattern in expected_patterns {
1463 assert!(
1464 joined.contains(pattern),
1465 "Expected '{pattern}' to be preserved in '{input}', but got '{joined}'"
1466 );
1467 }
1468 }
1469 }
1470
1471 #[test]
1472 fn test_complex_mixed_patterns() {
1473 let options = ReflowOptions {
1474 line_length: 50,
1475 ..Default::default()
1476 };
1477
1478 let input = "Line with **bold**, `code`, [link](url), , ~~strike~~, $math$, :emoji:, and <tag> all together";
1480 let result = reflow_line(input, &options);
1481 let joined = result.join(" ");
1482
1483 assert!(joined.contains("**bold**"));
1485 assert!(joined.contains("`code`"));
1486 assert!(joined.contains("[link](url)"));
1487 assert!(joined.contains(""));
1488 assert!(joined.contains("~~strike~~"));
1489 assert!(joined.contains("$math$"));
1490 assert!(joined.contains(":emoji:"));
1491 assert!(joined.contains("<tag>"));
1492 }
1493
1494 #[test]
1495 fn test_footnote_patterns_preserved() {
1496 let options = ReflowOptions {
1497 line_length: 40,
1498 ..Default::default()
1499 };
1500
1501 let test_cases = vec![
1502 ("This has a footnote[^1] reference", vec!["[^1]"]),
1504 ("Text with [^first] and [^second] notes", vec!["[^first]", "[^second]"]),
1506 ("Reference to [^long-footnote-name] here", vec!["[^long-footnote-name]"]),
1508 ];
1509
1510 for (input, expected_patterns) in test_cases {
1511 let result = reflow_line(input, &options);
1512 let joined = result.join(" ");
1513
1514 for expected_pattern in expected_patterns {
1515 assert!(
1516 joined.contains(expected_pattern),
1517 "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1518 );
1519 }
1520 }
1521 }
1522
1523 #[test]
1524 fn test_reflow_markdown_numbered_lists() {
1525 let options = ReflowOptions {
1527 line_length: 50,
1528 ..Default::default()
1529 };
1530
1531 let content = r#"1. List `manifest` to find the manifest with the largest ID. Say it's `00000000000000000002.manifest` in this example.
15322. Short item
15333. Another long item that definitely exceeds the fifty character limit and needs wrapping"#;
1534
1535 let result = reflow_markdown(content, &options);
1536
1537 let expected = r#"1. List `manifest` to find the manifest with the
1539 largest ID. Say it's
1540 `00000000000000000002.manifest` in this
1541 example.
15422. Short item
15433. Another long item that definitely exceeds the
1544 fifty character limit and needs wrapping"#;
1545
1546 assert_eq!(
1547 result, expected,
1548 "Numbered lists should be reflowed with proper markers and indentation.\nExpected:\n{expected}\nGot:\n{result}"
1549 );
1550 }
1551
1552 #[test]
1553 fn test_reflow_markdown_bullet_lists() {
1554 let options = ReflowOptions {
1555 line_length: 40,
1556 ..Default::default()
1557 };
1558
1559 let content = r#"- First bullet point with a very long line that needs wrapping
1560* Second bullet using asterisk
1561+ Third bullet using plus sign
1562- Short one"#;
1563
1564 let result = reflow_markdown(content, &options);
1565
1566 let expected = r#"- First bullet point with a very long
1568 line that needs wrapping
1569* Second bullet using asterisk
1570+ Third bullet using plus sign
1571- Short one"#;
1572
1573 assert_eq!(
1574 result, expected,
1575 "Bullet lists should preserve markers and indent continuations with 2 spaces.\nExpected:\n{expected}\nGot:\n{result}"
1576 );
1577 }
1578}