1use crate::utils::is_definition_list_item;
7use crate::utils::regex_cache::{
8 DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
9 INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
10 SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
11};
12#[derive(Clone)]
14pub struct ReflowOptions {
15 pub line_length: usize,
17 pub break_on_sentences: bool,
19 pub preserve_breaks: bool,
21 pub sentence_per_line: bool,
23}
24
25impl Default for ReflowOptions {
26 fn default() -> Self {
27 Self {
28 line_length: 80,
29 break_on_sentences: true,
30 preserve_breaks: false,
31 sentence_per_line: false,
32 }
33 }
34}
35
36fn text_ends_with_abbreviation(text: &str) -> bool {
51 if !text.ends_with('.') {
53 return false;
54 }
55
56 let without_period = text.trim_end_matches('.');
58
59 let last_word = without_period.split_whitespace().last().unwrap_or("");
61
62 if last_word.is_empty() {
63 return false;
64 }
65
66 let abbreviations = [
69 "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr", "Jr",
70 ];
71
72 abbreviations.iter().any(|abbr| last_word.eq_ignore_ascii_case(abbr))
74}
75
76fn is_sentence_boundary(text: &str, pos: usize) -> bool {
79 let chars: Vec<char> = text.chars().collect();
80
81 if pos + 1 >= chars.len() {
82 return false;
83 }
84
85 let c = chars[pos];
87 if c != '.' && c != '!' && c != '?' {
88 return false;
89 }
90
91 if chars[pos + 1] != ' ' {
93 return false;
94 }
95
96 let mut next_char_pos = pos + 2;
98 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
99 next_char_pos += 1;
100 }
101
102 if next_char_pos >= chars.len() {
104 return false;
105 }
106
107 if !chars[next_char_pos].is_uppercase() {
109 return false;
110 }
111
112 if pos > 0 && c == '.' {
114 if text_ends_with_abbreviation(&text[..=pos]) {
117 return false;
118 }
119
120 if chars[pos - 1].is_numeric() && next_char_pos < chars.len() && chars[next_char_pos].is_numeric() {
123 return false;
124 }
125 }
126 true
127}
128
129pub fn split_into_sentences(text: &str) -> Vec<String> {
131 let mut sentences = Vec::new();
132 let mut current_sentence = String::new();
133 let mut chars = text.chars().peekable();
134 let mut pos = 0;
135
136 while let Some(c) = chars.next() {
137 current_sentence.push(c);
138
139 if is_sentence_boundary(text, pos) {
140 if chars.peek() == Some(&' ') {
142 chars.next();
143 pos += 1;
144 }
145 sentences.push(current_sentence.trim().to_string());
146 current_sentence.clear();
147 }
148
149 pos += 1;
150 }
151
152 if !current_sentence.trim().is_empty() {
154 sentences.push(current_sentence.trim().to_string());
155 }
156 sentences
157}
158
159fn is_horizontal_rule(line: &str) -> bool {
161 if line.len() < 3 {
162 return false;
163 }
164
165 let chars: Vec<char> = line.chars().collect();
167 if chars.is_empty() {
168 return false;
169 }
170
171 let first_char = chars[0];
172 if first_char != '-' && first_char != '_' && first_char != '*' {
173 return false;
174 }
175
176 for c in &chars {
178 if *c != first_char && *c != ' ' {
179 return false;
180 }
181 }
182
183 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
185 non_space_count >= 3
186}
187
188fn is_numbered_list_item(line: &str) -> bool {
190 let mut chars = line.chars();
191
192 if !chars.next().is_some_and(|c| c.is_numeric()) {
194 return false;
195 }
196
197 while let Some(c) = chars.next() {
199 if c == '.' {
200 return chars.next().is_none_or(|c| c == ' ');
202 }
203 if !c.is_numeric() {
204 return false;
205 }
206 }
207
208 false
209}
210
211fn has_hard_break(line: &str) -> bool {
217 let line = line.strip_suffix('\r').unwrap_or(line);
218 line.ends_with(" ") || line.ends_with('\\')
219}
220
221fn trim_preserving_hard_break(s: &str) -> String {
227 let s = s.strip_suffix('\r').unwrap_or(s);
229
230 if s.ends_with('\\') {
232 return s.to_string();
234 }
235
236 if s.ends_with(" ") {
238 let content_end = s.trim_end().len();
240 if content_end == 0 {
241 return String::new();
243 }
244 format!("{} ", &s[..content_end])
246 } else {
247 s.trim_end().to_string()
249 }
250}
251
252pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
253 if options.sentence_per_line {
255 let elements = parse_markdown_elements(line);
256 return reflow_elements_sentence_per_line(&elements);
257 }
258
259 if line.chars().count() <= options.line_length {
261 return vec![line.to_string()];
262 }
263
264 let elements = parse_markdown_elements(line);
266
267 reflow_elements(&elements, options)
269}
270
271#[derive(Debug, Clone)]
273enum Element {
274 Text(String),
276 Link { text: String, url: String },
278 ReferenceLink { text: String, reference: String },
280 EmptyReferenceLink { text: String },
282 ShortcutReference { reference: String },
284 InlineImage { alt: String, url: String },
286 ReferenceImage { alt: String, reference: String },
288 EmptyReferenceImage { alt: String },
290 FootnoteReference { note: String },
292 Strikethrough(String),
294 WikiLink(String),
296 InlineMath(String),
298 DisplayMath(String),
300 EmojiShortcode(String),
302 HtmlTag(String),
304 HtmlEntity(String),
306 Code(String),
308 Bold(String),
310 Italic(String),
312}
313
314impl std::fmt::Display for Element {
315 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
316 match self {
317 Element::Text(s) => write!(f, "{s}"),
318 Element::Link { text, url } => write!(f, "[{text}]({url})"),
319 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
320 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
321 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
322 Element::InlineImage { alt, url } => write!(f, ""),
323 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
324 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
325 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
326 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
327 Element::WikiLink(s) => write!(f, "[[{s}]]"),
328 Element::InlineMath(s) => write!(f, "${s}$"),
329 Element::DisplayMath(s) => write!(f, "$${s}$$"),
330 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
331 Element::HtmlTag(s) => write!(f, "{s}"),
332 Element::HtmlEntity(s) => write!(f, "{s}"),
333 Element::Code(s) => write!(f, "`{s}`"),
334 Element::Bold(s) => write!(f, "**{s}**"),
335 Element::Italic(s) => write!(f, "*{s}*"),
336 }
337 }
338}
339
340impl Element {
341 fn len(&self) -> usize {
342 match self {
343 Element::Text(s) => s.chars().count(),
344 Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, Element::EmptyReferenceLink { text } => text.chars().count() + 4, Element::ShortcutReference { reference } => reference.chars().count() + 2, Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, Element::FootnoteReference { note } => note.chars().count() + 3, Element::Strikethrough(s) => s.chars().count() + 4, Element::WikiLink(s) => s.chars().count() + 4, Element::InlineMath(s) => s.chars().count() + 2, Element::DisplayMath(s) => s.chars().count() + 4, Element::EmojiShortcode(s) => s.chars().count() + 2, Element::HtmlTag(s) => s.chars().count(), Element::HtmlEntity(s) => s.chars().count(), Element::Code(s) => s.chars().count() + 2, Element::Bold(s) => s.chars().count() + 4, Element::Italic(s) => s.chars().count() + 2, }
363 }
364}
365
366fn parse_markdown_elements(text: &str) -> Vec<Element> {
375 let mut elements = Vec::new();
376 let mut remaining = text;
377
378 while !remaining.is_empty() {
379 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
381
382 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
385 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
386 {
387 earliest_match = Some((m.start(), "inline_image", m));
388 }
389
390 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
392 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
393 {
394 earliest_match = Some((m.start(), "ref_image", m));
395 }
396
397 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
399 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
400 {
401 earliest_match = Some((m.start(), "footnote_ref", m));
402 }
403
404 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
406 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
407 {
408 earliest_match = Some((m.start(), "inline_link", m));
409 }
410
411 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
413 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
414 {
415 earliest_match = Some((m.start(), "ref_link", m));
416 }
417
418 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
421 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
422 {
423 earliest_match = Some((m.start(), "shortcut_ref", m));
424 }
425
426 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
428 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
429 {
430 earliest_match = Some((m.start(), "wiki_link", m));
431 }
432
433 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
435 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
436 {
437 earliest_match = Some((m.start(), "display_math", m));
438 }
439
440 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
442 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
443 {
444 earliest_match = Some((m.start(), "inline_math", m));
445 }
446
447 if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
449 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
450 {
451 earliest_match = Some((m.start(), "strikethrough", m));
452 }
453
454 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
456 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
457 {
458 earliest_match = Some((m.start(), "emoji", m));
459 }
460
461 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
463 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
464 {
465 earliest_match = Some((m.start(), "html_entity", m));
466 }
467
468 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
471 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
472 {
473 let matched_text = &remaining[m.start()..m.end()];
475 let is_autolink = matched_text.starts_with("<http://")
476 || matched_text.starts_with("<https://")
477 || matched_text.starts_with("<mailto:")
478 || matched_text.starts_with("<ftp://")
479 || matched_text.starts_with("<ftps://");
480
481 if !is_autolink {
482 earliest_match = Some((m.start(), "html_tag", m));
483 }
484 }
485
486 let mut next_special = remaining.len();
488 let mut special_type = "";
489
490 if let Some(pos) = remaining.find('`')
491 && pos < next_special
492 {
493 next_special = pos;
494 special_type = "code";
495 }
496 if let Some(pos) = remaining.find("**")
497 && pos < next_special
498 {
499 next_special = pos;
500 special_type = "bold";
501 }
502 if let Some(pos) = remaining.find('*')
503 && pos < next_special
504 && !remaining[pos..].starts_with("**")
505 {
506 next_special = pos;
507 special_type = "italic";
508 }
509
510 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
512 pos < next_special
513 } else {
514 false
515 };
516
517 if should_process_markdown_link {
518 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
519
520 if pos > 0 {
522 elements.push(Element::Text(remaining[..pos].to_string()));
523 }
524
525 match pattern_type {
527 "inline_image" => {
528 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
529 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
530 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
531 elements.push(Element::InlineImage {
532 alt: alt.to_string(),
533 url: url.to_string(),
534 });
535 remaining = &remaining[match_obj.end()..];
536 } else {
537 elements.push(Element::Text("!".to_string()));
538 remaining = &remaining[1..];
539 }
540 }
541 "ref_image" => {
542 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
543 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
544 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
545
546 if reference.is_empty() {
547 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
548 } else {
549 elements.push(Element::ReferenceImage {
550 alt: alt.to_string(),
551 reference: reference.to_string(),
552 });
553 }
554 remaining = &remaining[match_obj.end()..];
555 } else {
556 elements.push(Element::Text("!".to_string()));
557 remaining = &remaining[1..];
558 }
559 }
560 "footnote_ref" => {
561 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
562 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
563 elements.push(Element::FootnoteReference { note: note.to_string() });
564 remaining = &remaining[match_obj.end()..];
565 } else {
566 elements.push(Element::Text("[".to_string()));
567 remaining = &remaining[1..];
568 }
569 }
570 "inline_link" => {
571 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
572 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
573 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
574 elements.push(Element::Link {
575 text: text.to_string(),
576 url: url.to_string(),
577 });
578 remaining = &remaining[match_obj.end()..];
579 } else {
580 elements.push(Element::Text("[".to_string()));
582 remaining = &remaining[1..];
583 }
584 }
585 "ref_link" => {
586 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
587 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
588 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
589
590 if reference.is_empty() {
591 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
593 } else {
594 elements.push(Element::ReferenceLink {
596 text: text.to_string(),
597 reference: reference.to_string(),
598 });
599 }
600 remaining = &remaining[match_obj.end()..];
601 } else {
602 elements.push(Element::Text("[".to_string()));
604 remaining = &remaining[1..];
605 }
606 }
607 "shortcut_ref" => {
608 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
609 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
610 elements.push(Element::ShortcutReference {
611 reference: reference.to_string(),
612 });
613 remaining = &remaining[match_obj.end()..];
614 } else {
615 elements.push(Element::Text("[".to_string()));
617 remaining = &remaining[1..];
618 }
619 }
620 "wiki_link" => {
621 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
622 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
623 elements.push(Element::WikiLink(content.to_string()));
624 remaining = &remaining[match_obj.end()..];
625 } else {
626 elements.push(Element::Text("[[".to_string()));
627 remaining = &remaining[2..];
628 }
629 }
630 "display_math" => {
631 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
632 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
633 elements.push(Element::DisplayMath(math.to_string()));
634 remaining = &remaining[match_obj.end()..];
635 } else {
636 elements.push(Element::Text("$$".to_string()));
637 remaining = &remaining[2..];
638 }
639 }
640 "inline_math" => {
641 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
642 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
643 elements.push(Element::InlineMath(math.to_string()));
644 remaining = &remaining[match_obj.end()..];
645 } else {
646 elements.push(Element::Text("$".to_string()));
647 remaining = &remaining[1..];
648 }
649 }
650 "strikethrough" => {
651 if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
652 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
653 elements.push(Element::Strikethrough(text.to_string()));
654 remaining = &remaining[match_obj.end()..];
655 } else {
656 elements.push(Element::Text("~~".to_string()));
657 remaining = &remaining[2..];
658 }
659 }
660 "emoji" => {
661 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
662 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
663 elements.push(Element::EmojiShortcode(emoji.to_string()));
664 remaining = &remaining[match_obj.end()..];
665 } else {
666 elements.push(Element::Text(":".to_string()));
667 remaining = &remaining[1..];
668 }
669 }
670 "html_entity" => {
671 elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
673 remaining = &remaining[match_obj.end()..];
674 }
675 "html_tag" => {
676 elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
678 remaining = &remaining[match_obj.end()..];
679 }
680 _ => {
681 elements.push(Element::Text("[".to_string()));
683 remaining = &remaining[1..];
684 }
685 }
686 } else {
687 if next_special > 0 && next_special < remaining.len() {
691 elements.push(Element::Text(remaining[..next_special].to_string()));
692 remaining = &remaining[next_special..];
693 }
694
695 match special_type {
697 "code" => {
698 if let Some(code_end) = remaining[1..].find('`') {
700 let code = &remaining[1..1 + code_end];
701 elements.push(Element::Code(code.to_string()));
702 remaining = &remaining[1 + code_end + 1..];
703 } else {
704 elements.push(Element::Text(remaining.to_string()));
706 break;
707 }
708 }
709 "bold" => {
710 if let Some(bold_end) = remaining[2..].find("**") {
712 let bold_text = &remaining[2..2 + bold_end];
713 elements.push(Element::Bold(bold_text.to_string()));
714 remaining = &remaining[2 + bold_end + 2..];
715 } else {
716 elements.push(Element::Text("**".to_string()));
718 remaining = &remaining[2..];
719 }
720 }
721 "italic" => {
722 if let Some(italic_end) = remaining[1..].find('*') {
724 let italic_text = &remaining[1..1 + italic_end];
725 elements.push(Element::Italic(italic_text.to_string()));
726 remaining = &remaining[1 + italic_end + 1..];
727 } else {
728 elements.push(Element::Text("*".to_string()));
730 remaining = &remaining[1..];
731 }
732 }
733 _ => {
734 elements.push(Element::Text(remaining.to_string()));
736 break;
737 }
738 }
739 }
740 }
741
742 elements
743}
744
745fn reflow_elements_sentence_per_line(elements: &[Element]) -> Vec<String> {
747 let mut lines = Vec::new();
748 let mut current_line = String::new();
749
750 for element in elements.iter() {
751 let element_str = format!("{element}");
752
753 if let Element::Text(text) = element {
755 let combined = format!("{current_line}{text}");
757 let sentences = split_into_sentences(&combined);
758
759 if sentences.len() > 1 {
760 for (i, sentence) in sentences.iter().enumerate() {
762 if i == 0 {
763 let trimmed = sentence.trim();
766
767 if text_ends_with_abbreviation(trimmed) {
768 current_line = sentence.to_string();
770 } else {
771 lines.push(sentence.to_string());
773 current_line.clear();
774 }
775 } else if i == sentences.len() - 1 {
776 let trimmed = sentence.trim();
778 let ends_with_sentence_punct =
779 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
780
781 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed) {
782 lines.push(sentence.to_string());
784 current_line.clear();
785 } else {
786 current_line = sentence.to_string();
788 }
789 } else {
790 lines.push(sentence.to_string());
792 }
793 }
794 } else {
795 current_line = combined;
797 }
798 } else {
799 if !current_line.is_empty()
802 && !current_line.ends_with(' ')
803 && !current_line.ends_with('(')
804 && !current_line.ends_with('[')
805 {
806 current_line.push(' ');
807 }
808 current_line.push_str(&element_str);
809 }
810 }
811
812 if !current_line.is_empty() {
814 lines.push(current_line.trim().to_string());
815 }
816 lines
817}
818
819fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
821 let mut lines = Vec::new();
822 let mut current_line = String::new();
823 let mut current_length = 0;
824
825 for element in elements {
826 let element_str = format!("{element}");
827 let element_len = element.len();
828
829 if let Element::Text(text) = element {
831 let words: Vec<&str> = text.split_whitespace().collect();
833
834 for word in words {
835 let word_len = word.chars().count();
836 if current_length > 0 && current_length + 1 + word_len > options.line_length {
837 lines.push(current_line.trim().to_string());
839 current_line = word.to_string();
840 current_length = word_len;
841 } else {
842 if current_length > 0 {
844 current_line.push(' ');
845 current_length += 1;
846 }
847 current_line.push_str(word);
848 current_length += word_len;
849 }
850 }
851 } else {
852 if current_length > 0 && current_length + 1 + element_len > options.line_length {
855 lines.push(current_line.trim().to_string());
857 current_line = element_str;
858 current_length = element_len;
859 } else {
860 if current_length > 0 {
862 current_line.push(' ');
863 current_length += 1;
864 }
865 current_line.push_str(&element_str);
866 current_length += element_len;
867 }
868 }
869 }
870
871 if !current_line.is_empty() {
873 lines.push(current_line.trim_end().to_string());
874 }
875
876 lines
877}
878
879pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
881 let lines: Vec<&str> = content.lines().collect();
882 let mut result = Vec::new();
883 let mut i = 0;
884
885 while i < lines.len() {
886 let line = lines[i];
887 let trimmed = line.trim();
888
889 if trimmed.is_empty() {
891 result.push(String::new());
892 i += 1;
893 continue;
894 }
895
896 if trimmed.starts_with('#') {
898 result.push(line.to_string());
899 i += 1;
900 continue;
901 }
902
903 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
905 result.push(line.to_string());
906 i += 1;
907 while i < lines.len() {
909 result.push(lines[i].to_string());
910 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
911 i += 1;
912 break;
913 }
914 i += 1;
915 }
916 continue;
917 }
918
919 if line.starts_with(" ") || line.starts_with("\t") {
921 result.push(line.to_string());
923 i += 1;
924 while i < lines.len() {
925 let next_line = lines[i];
926 if next_line.starts_with(" ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
928 result.push(next_line.to_string());
929 i += 1;
930 } else {
931 break;
932 }
933 }
934 continue;
935 }
936
937 if trimmed.starts_with('>') {
939 let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
940 let quote_content = &line[quote_prefix.len()..].trim_start();
941
942 let reflowed = reflow_line(quote_content, options);
943 for reflowed_line in reflowed.iter() {
944 result.push(format!("{quote_prefix} {reflowed_line}"));
945 }
946 i += 1;
947 continue;
948 }
949
950 if is_horizontal_rule(trimmed) {
952 result.push(line.to_string());
953 i += 1;
954 continue;
955 }
956
957 if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
959 || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
960 || trimmed.starts_with('+')
961 || is_numbered_list_item(trimmed)
962 {
963 let indent = line.len() - line.trim_start().len();
965 let indent_str = " ".repeat(indent);
966
967 let mut marker_end = indent;
970 let mut content_start = indent;
971
972 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
973 if let Some(period_pos) = line[indent..].find('.') {
975 marker_end = indent + period_pos + 1; content_start = marker_end;
977 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
979 content_start += 1;
980 }
981 }
982 } else {
983 marker_end = indent + 1; content_start = marker_end;
986 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
988 content_start += 1;
989 }
990 }
991
992 let marker = &line[indent..marker_end];
993
994 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
997 i += 1;
998
999 while i < lines.len() {
1001 let next_line = lines[i];
1002 let next_trimmed = next_line.trim();
1003
1004 if next_trimmed.is_empty()
1006 || next_trimmed.starts_with('#')
1007 || next_trimmed.starts_with("```")
1008 || next_trimmed.starts_with("~~~")
1009 || next_trimmed.starts_with('>')
1010 || next_trimmed.starts_with('|')
1011 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1012 || is_horizontal_rule(next_trimmed)
1013 || (next_trimmed.starts_with('-')
1014 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1015 || (next_trimmed.starts_with('*')
1016 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1017 || (next_trimmed.starts_with('+')
1018 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1019 || is_numbered_list_item(next_trimmed)
1020 || is_definition_list_item(next_trimmed)
1021 {
1022 break;
1023 }
1024
1025 let next_indent = next_line.len() - next_line.trim_start().len();
1027 if next_indent >= content_start {
1028 let trimmed_start = next_line.trim_start();
1031 list_content.push(trim_preserving_hard_break(trimmed_start));
1032 i += 1;
1033 } else {
1034 break;
1036 }
1037 }
1038
1039 let combined_content = if options.preserve_breaks {
1042 list_content[0].clone()
1043 } else {
1044 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1046 if has_hard_breaks {
1047 list_content.join("\n")
1049 } else {
1050 list_content.join(" ")
1052 }
1053 };
1054
1055 let trimmed_marker = marker;
1057 let continuation_spaces = content_start;
1058
1059 let prefix_length = indent + trimmed_marker.len() + 1;
1061
1062 let adjusted_options = ReflowOptions {
1064 line_length: options.line_length.saturating_sub(prefix_length),
1065 ..options.clone()
1066 };
1067
1068 let reflowed = reflow_line(&combined_content, &adjusted_options);
1069 for (j, reflowed_line) in reflowed.iter().enumerate() {
1070 if j == 0 {
1071 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1072 } else {
1073 let continuation_indent = " ".repeat(continuation_spaces);
1075 result.push(format!("{continuation_indent}{reflowed_line}"));
1076 }
1077 }
1078 continue;
1079 }
1080
1081 if trimmed.contains('|') {
1083 result.push(line.to_string());
1084 i += 1;
1085 continue;
1086 }
1087
1088 if trimmed.starts_with('[') && line.contains("]:") {
1090 result.push(line.to_string());
1091 i += 1;
1092 continue;
1093 }
1094
1095 if is_definition_list_item(trimmed) {
1097 result.push(line.to_string());
1098 i += 1;
1099 continue;
1100 }
1101
1102 let mut is_single_line_paragraph = true;
1104 if i + 1 < lines.len() {
1105 let next_line = lines[i + 1];
1106 let next_trimmed = next_line.trim();
1107 if !next_trimmed.is_empty()
1109 && !next_trimmed.starts_with('#')
1110 && !next_trimmed.starts_with("```")
1111 && !next_trimmed.starts_with("~~~")
1112 && !next_trimmed.starts_with('>')
1113 && !next_trimmed.starts_with('|')
1114 && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1115 && !is_horizontal_rule(next_trimmed)
1116 && !(next_trimmed.starts_with('-')
1117 && !is_horizontal_rule(next_trimmed)
1118 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1119 && !(next_trimmed.starts_with('*')
1120 && !is_horizontal_rule(next_trimmed)
1121 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1122 && !(next_trimmed.starts_with('+')
1123 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1124 && !is_numbered_list_item(next_trimmed)
1125 {
1126 is_single_line_paragraph = false;
1127 }
1128 }
1129
1130 if is_single_line_paragraph && line.chars().count() <= options.line_length {
1132 result.push(line.to_string());
1133 i += 1;
1134 continue;
1135 }
1136
1137 let mut paragraph_parts = Vec::new();
1139 let mut current_part = vec![line];
1140 i += 1;
1141
1142 if options.preserve_breaks {
1144 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1146 Some("\\")
1147 } else if line.ends_with(" ") {
1148 Some(" ")
1149 } else {
1150 None
1151 };
1152 let reflowed = reflow_line(line, options);
1153
1154 if let Some(break_marker) = hard_break_type {
1156 if !reflowed.is_empty() {
1157 let mut reflowed_with_break = reflowed;
1158 let last_idx = reflowed_with_break.len() - 1;
1159 if !has_hard_break(&reflowed_with_break[last_idx]) {
1160 reflowed_with_break[last_idx].push_str(break_marker);
1161 }
1162 result.extend(reflowed_with_break);
1163 }
1164 } else {
1165 result.extend(reflowed);
1166 }
1167 } else {
1168 while i < lines.len() {
1170 let prev_line = if !current_part.is_empty() {
1171 current_part.last().unwrap()
1172 } else {
1173 ""
1174 };
1175 let next_line = lines[i];
1176 let next_trimmed = next_line.trim();
1177
1178 if next_trimmed.is_empty()
1180 || next_trimmed.starts_with('#')
1181 || next_trimmed.starts_with("```")
1182 || next_trimmed.starts_with("~~~")
1183 || next_trimmed.starts_with('>')
1184 || next_trimmed.starts_with('|')
1185 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1186 || is_horizontal_rule(next_trimmed)
1187 || (next_trimmed.starts_with('-')
1188 && !is_horizontal_rule(next_trimmed)
1189 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1190 || (next_trimmed.starts_with('*')
1191 && !is_horizontal_rule(next_trimmed)
1192 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1193 || (next_trimmed.starts_with('+')
1194 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1195 || is_numbered_list_item(next_trimmed)
1196 || is_definition_list_item(next_trimmed)
1197 {
1198 break;
1199 }
1200
1201 if has_hard_break(prev_line) {
1203 paragraph_parts.push(current_part.join(" "));
1205 current_part = vec![next_line];
1206 } else {
1207 current_part.push(next_line);
1208 }
1209 i += 1;
1210 }
1211
1212 if !current_part.is_empty() {
1214 if current_part.len() == 1 {
1215 paragraph_parts.push(current_part[0].to_string());
1217 } else {
1218 paragraph_parts.push(current_part.join(" "));
1219 }
1220 }
1221
1222 for (j, part) in paragraph_parts.iter().enumerate() {
1224 let reflowed = reflow_line(part, options);
1225 result.extend(reflowed);
1226
1227 if j < paragraph_parts.len() - 1 && !result.is_empty() {
1230 let last_idx = result.len() - 1;
1231 if !has_hard_break(&result[last_idx]) {
1232 result[last_idx].push_str(" ");
1233 }
1234 }
1235 }
1236 }
1237 }
1238
1239 let result_text = result.join("\n");
1241 if content.ends_with('\n') && !result_text.ends_with('\n') {
1242 format!("{result_text}\n")
1243 } else {
1244 result_text
1245 }
1246}
1247
1248#[derive(Debug, Clone)]
1250pub struct ParagraphReflow {
1251 pub start_byte: usize,
1253 pub end_byte: usize,
1255 pub reflowed_text: String,
1257}
1258
1259pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1277 if line_number == 0 {
1278 return None;
1279 }
1280
1281 let lines: Vec<&str> = content.lines().collect();
1282
1283 if line_number > lines.len() {
1285 return None;
1286 }
1287
1288 let target_idx = line_number - 1; let target_line = lines[target_idx];
1290 let trimmed = target_line.trim();
1291
1292 if trimmed.is_empty()
1294 || trimmed.starts_with('#')
1295 || trimmed.starts_with("```")
1296 || trimmed.starts_with("~~~")
1297 || target_line.starts_with(" ")
1298 || target_line.starts_with('\t')
1299 || trimmed.starts_with('>')
1300 || trimmed.contains('|') || (trimmed.starts_with('[') && target_line.contains("]:")) || is_horizontal_rule(trimmed)
1303 || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1304 && !is_horizontal_rule(trimmed)
1305 && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1306 || is_numbered_list_item(trimmed)
1307 || is_definition_list_item(trimmed)
1308 {
1309 return None;
1310 }
1311
1312 let mut para_start = target_idx;
1314 while para_start > 0 {
1315 let prev_idx = para_start - 1;
1316 let prev_line = lines[prev_idx];
1317 let prev_trimmed = prev_line.trim();
1318
1319 if prev_trimmed.is_empty()
1321 || prev_trimmed.starts_with('#')
1322 || prev_trimmed.starts_with("```")
1323 || prev_trimmed.starts_with("~~~")
1324 || prev_line.starts_with(" ")
1325 || prev_line.starts_with('\t')
1326 || prev_trimmed.starts_with('>')
1327 || prev_trimmed.contains('|')
1328 || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1329 || is_horizontal_rule(prev_trimmed)
1330 || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1331 && !is_horizontal_rule(prev_trimmed)
1332 && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1333 || is_numbered_list_item(prev_trimmed)
1334 || is_definition_list_item(prev_trimmed)
1335 {
1336 break;
1337 }
1338
1339 para_start = prev_idx;
1340 }
1341
1342 let mut para_end = target_idx;
1344 while para_end + 1 < lines.len() {
1345 let next_idx = para_end + 1;
1346 let next_line = lines[next_idx];
1347 let next_trimmed = next_line.trim();
1348
1349 if next_trimmed.is_empty()
1351 || next_trimmed.starts_with('#')
1352 || next_trimmed.starts_with("```")
1353 || next_trimmed.starts_with("~~~")
1354 || next_line.starts_with(" ")
1355 || next_line.starts_with('\t')
1356 || next_trimmed.starts_with('>')
1357 || next_trimmed.contains('|')
1358 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1359 || is_horizontal_rule(next_trimmed)
1360 || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1361 && !is_horizontal_rule(next_trimmed)
1362 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1363 || is_numbered_list_item(next_trimmed)
1364 || is_definition_list_item(next_trimmed)
1365 {
1366 break;
1367 }
1368
1369 para_end = next_idx;
1370 }
1371
1372 let paragraph_lines = &lines[para_start..=para_end];
1374
1375 let mut start_byte = 0;
1377 for line in lines.iter().take(para_start) {
1378 start_byte += line.len() + 1; }
1380
1381 let mut end_byte = start_byte;
1382 for line in paragraph_lines.iter() {
1383 end_byte += line.len() + 1; }
1385
1386 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1389
1390 if !includes_trailing_newline {
1392 end_byte -= 1;
1393 }
1394
1395 let paragraph_text = paragraph_lines.join("\n");
1397
1398 let options = ReflowOptions {
1400 line_length,
1401 break_on_sentences: true,
1402 preserve_breaks: false,
1403 sentence_per_line: false,
1404 };
1405
1406 let reflowed = reflow_markdown(¶graph_text, &options);
1408
1409 let reflowed_text = if includes_trailing_newline {
1413 if reflowed.ends_with('\n') {
1415 reflowed
1416 } else {
1417 format!("{reflowed}\n")
1418 }
1419 } else {
1420 if reflowed.ends_with('\n') {
1422 reflowed.trim_end_matches('\n').to_string()
1423 } else {
1424 reflowed
1425 }
1426 };
1427
1428 Some(ParagraphReflow {
1429 start_byte,
1430 end_byte,
1431 reflowed_text,
1432 })
1433}
1434
1435#[cfg(test)]
1436mod tests {
1437 use super::*;
1438
1439 #[test]
1444 fn test_helper_function_text_ends_with_abbreviation() {
1445 assert!(text_ends_with_abbreviation("Dr."));
1449 assert!(text_ends_with_abbreviation("word Dr."));
1450 assert!(text_ends_with_abbreviation("e.g."));
1451 assert!(text_ends_with_abbreviation("i.e."));
1452 assert!(text_ends_with_abbreviation("etc."));
1453 assert!(text_ends_with_abbreviation("Mr."));
1454 assert!(text_ends_with_abbreviation("Mrs."));
1455 assert!(text_ends_with_abbreviation("Ms."));
1456 assert!(text_ends_with_abbreviation("Prof."));
1457
1458 assert!(!text_ends_with_abbreviation("paradigms."));
1460 assert!(!text_ends_with_abbreviation("programs."));
1461 assert!(!text_ends_with_abbreviation("items."));
1462 assert!(!text_ends_with_abbreviation("systems."));
1463 assert!(!text_ends_with_abbreviation("Dr?")); assert!(!text_ends_with_abbreviation("Mr!")); assert!(!text_ends_with_abbreviation("paradigms?")); assert!(!text_ends_with_abbreviation("word")); assert!(!text_ends_with_abbreviation("")); }
1469}