1use crate::utils::is_definition_list_item;
7use crate::utils::regex_cache::{
8 DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
9 INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
10 SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
11};
12use std::collections::HashSet;
13
14#[derive(Clone)]
16pub struct ReflowOptions {
17 pub line_length: usize,
19 pub break_on_sentences: bool,
21 pub preserve_breaks: bool,
23 pub sentence_per_line: bool,
25 pub abbreviations: Option<Vec<String>>,
29}
30
31impl Default for ReflowOptions {
32 fn default() -> Self {
33 Self {
34 line_length: 80,
35 break_on_sentences: true,
36 preserve_breaks: false,
37 sentence_per_line: false,
38 abbreviations: None,
39 }
40 }
41}
42
43fn get_abbreviations(custom: &Option<Vec<String>>) -> HashSet<String> {
47 let mut abbreviations: HashSet<String> = [
55 "Mr", "Mrs", "Ms", "Dr", "Prof", "Sr", "Jr",
57 "i.e", "e.g",
59 ]
60 .iter()
61 .map(|s| s.to_lowercase())
62 .collect();
63
64 if let Some(custom_list) = custom {
67 for abbr in custom_list {
68 let normalized = abbr.trim_end_matches('.').to_lowercase();
69 if !normalized.is_empty() {
70 abbreviations.insert(normalized);
71 }
72 }
73 }
74
75 abbreviations
76}
77
78fn text_ends_with_abbreviation(text: &str, abbreviations: &HashSet<String>) -> bool {
93 if !text.ends_with('.') {
95 return false;
96 }
97
98 let without_period = text.trim_end_matches('.');
100
101 let last_word = without_period.split_whitespace().last().unwrap_or("");
103
104 if last_word.is_empty() {
105 return false;
106 }
107
108 abbreviations.contains(&last_word.to_lowercase())
110}
111
112fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
115 let chars: Vec<char> = text.chars().collect();
116
117 if pos + 1 >= chars.len() {
118 return false;
119 }
120
121 let c = chars[pos];
123 if c != '.' && c != '!' && c != '?' {
124 return false;
125 }
126
127 if chars[pos + 1] != ' ' {
129 return false;
130 }
131
132 let mut next_char_pos = pos + 2;
134 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
135 next_char_pos += 1;
136 }
137
138 if next_char_pos >= chars.len() {
140 return false;
141 }
142
143 if !chars[next_char_pos].is_uppercase() {
145 return false;
146 }
147
148 if pos > 0 && c == '.' {
150 if text_ends_with_abbreviation(&text[..=pos], abbreviations) {
153 return false;
154 }
155
156 if chars[pos - 1].is_numeric() && next_char_pos < chars.len() && chars[next_char_pos].is_numeric() {
159 return false;
160 }
161 }
162 true
163}
164
165pub fn split_into_sentences(text: &str) -> Vec<String> {
167 split_into_sentences_custom(text, &None)
168}
169
170pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
172 let abbreviations = get_abbreviations(custom_abbreviations);
173 split_into_sentences_with_set(text, &abbreviations)
174}
175
176fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
179 let mut sentences = Vec::new();
180 let mut current_sentence = String::new();
181 let mut chars = text.chars().peekable();
182 let mut pos = 0;
183
184 while let Some(c) = chars.next() {
185 current_sentence.push(c);
186
187 if is_sentence_boundary(text, pos, abbreviations) {
188 if chars.peek() == Some(&' ') {
190 chars.next();
191 pos += 1;
192 }
193 sentences.push(current_sentence.trim().to_string());
194 current_sentence.clear();
195 }
196
197 pos += 1;
198 }
199
200 if !current_sentence.trim().is_empty() {
202 sentences.push(current_sentence.trim().to_string());
203 }
204 sentences
205}
206
207fn is_horizontal_rule(line: &str) -> bool {
209 if line.len() < 3 {
210 return false;
211 }
212
213 let chars: Vec<char> = line.chars().collect();
215 if chars.is_empty() {
216 return false;
217 }
218
219 let first_char = chars[0];
220 if first_char != '-' && first_char != '_' && first_char != '*' {
221 return false;
222 }
223
224 for c in &chars {
226 if *c != first_char && *c != ' ' {
227 return false;
228 }
229 }
230
231 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
233 non_space_count >= 3
234}
235
236fn is_numbered_list_item(line: &str) -> bool {
238 let mut chars = line.chars();
239
240 if !chars.next().is_some_and(|c| c.is_numeric()) {
242 return false;
243 }
244
245 while let Some(c) = chars.next() {
247 if c == '.' {
248 return chars.next().is_none_or(|c| c == ' ');
250 }
251 if !c.is_numeric() {
252 return false;
253 }
254 }
255
256 false
257}
258
259fn has_hard_break(line: &str) -> bool {
265 let line = line.strip_suffix('\r').unwrap_or(line);
266 line.ends_with(" ") || line.ends_with('\\')
267}
268
269fn trim_preserving_hard_break(s: &str) -> String {
275 let s = s.strip_suffix('\r').unwrap_or(s);
277
278 if s.ends_with('\\') {
280 return s.to_string();
282 }
283
284 if s.ends_with(" ") {
286 let content_end = s.trim_end().len();
288 if content_end == 0 {
289 return String::new();
291 }
292 format!("{} ", &s[..content_end])
294 } else {
295 s.trim_end().to_string()
297 }
298}
299
300pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
301 if options.sentence_per_line {
303 let elements = parse_markdown_elements(line);
304 return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
305 }
306
307 if line.chars().count() <= options.line_length {
309 return vec![line.to_string()];
310 }
311
312 let elements = parse_markdown_elements(line);
314
315 reflow_elements(&elements, options)
317}
318
319#[derive(Debug, Clone)]
321enum Element {
322 Text(String),
324 Link { text: String, url: String },
326 ReferenceLink { text: String, reference: String },
328 EmptyReferenceLink { text: String },
330 ShortcutReference { reference: String },
332 InlineImage { alt: String, url: String },
334 ReferenceImage { alt: String, reference: String },
336 EmptyReferenceImage { alt: String },
338 FootnoteReference { note: String },
340 Strikethrough(String),
342 WikiLink(String),
344 InlineMath(String),
346 DisplayMath(String),
348 EmojiShortcode(String),
350 HtmlTag(String),
352 HtmlEntity(String),
354 Code(String),
356 Bold(String),
358 Italic(String),
360}
361
362impl std::fmt::Display for Element {
363 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
364 match self {
365 Element::Text(s) => write!(f, "{s}"),
366 Element::Link { text, url } => write!(f, "[{text}]({url})"),
367 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
368 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
369 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
370 Element::InlineImage { alt, url } => write!(f, ""),
371 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
372 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
373 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
374 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
375 Element::WikiLink(s) => write!(f, "[[{s}]]"),
376 Element::InlineMath(s) => write!(f, "${s}$"),
377 Element::DisplayMath(s) => write!(f, "$${s}$$"),
378 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
379 Element::HtmlTag(s) => write!(f, "{s}"),
380 Element::HtmlEntity(s) => write!(f, "{s}"),
381 Element::Code(s) => write!(f, "`{s}`"),
382 Element::Bold(s) => write!(f, "**{s}**"),
383 Element::Italic(s) => write!(f, "*{s}*"),
384 }
385 }
386}
387
388impl Element {
389 fn len(&self) -> usize {
390 match self {
391 Element::Text(s) => s.chars().count(),
392 Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, Element::EmptyReferenceLink { text } => text.chars().count() + 4, Element::ShortcutReference { reference } => reference.chars().count() + 2, Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, Element::FootnoteReference { note } => note.chars().count() + 3, Element::Strikethrough(s) => s.chars().count() + 4, Element::WikiLink(s) => s.chars().count() + 4, Element::InlineMath(s) => s.chars().count() + 2, Element::DisplayMath(s) => s.chars().count() + 4, Element::EmojiShortcode(s) => s.chars().count() + 2, Element::HtmlTag(s) => s.chars().count(), Element::HtmlEntity(s) => s.chars().count(), Element::Code(s) => s.chars().count() + 2, Element::Bold(s) => s.chars().count() + 4, Element::Italic(s) => s.chars().count() + 2, }
411 }
412}
413
414fn parse_markdown_elements(text: &str) -> Vec<Element> {
423 let mut elements = Vec::new();
424 let mut remaining = text;
425
426 while !remaining.is_empty() {
427 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
429
430 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
433 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
434 {
435 earliest_match = Some((m.start(), "inline_image", m));
436 }
437
438 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
440 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
441 {
442 earliest_match = Some((m.start(), "ref_image", m));
443 }
444
445 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
447 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
448 {
449 earliest_match = Some((m.start(), "footnote_ref", m));
450 }
451
452 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
454 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
455 {
456 earliest_match = Some((m.start(), "inline_link", m));
457 }
458
459 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
461 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
462 {
463 earliest_match = Some((m.start(), "ref_link", m));
464 }
465
466 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
469 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
470 {
471 earliest_match = Some((m.start(), "shortcut_ref", m));
472 }
473
474 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
476 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
477 {
478 earliest_match = Some((m.start(), "wiki_link", m));
479 }
480
481 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
483 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
484 {
485 earliest_match = Some((m.start(), "display_math", m));
486 }
487
488 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
490 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
491 {
492 earliest_match = Some((m.start(), "inline_math", m));
493 }
494
495 if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
497 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
498 {
499 earliest_match = Some((m.start(), "strikethrough", m));
500 }
501
502 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
504 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
505 {
506 earliest_match = Some((m.start(), "emoji", m));
507 }
508
509 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
511 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
512 {
513 earliest_match = Some((m.start(), "html_entity", m));
514 }
515
516 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
519 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
520 {
521 let matched_text = &remaining[m.start()..m.end()];
523 let is_autolink = matched_text.starts_with("<http://")
524 || matched_text.starts_with("<https://")
525 || matched_text.starts_with("<mailto:")
526 || matched_text.starts_with("<ftp://")
527 || matched_text.starts_with("<ftps://");
528
529 if !is_autolink {
530 earliest_match = Some((m.start(), "html_tag", m));
531 }
532 }
533
534 let mut next_special = remaining.len();
536 let mut special_type = "";
537
538 if let Some(pos) = remaining.find('`')
539 && pos < next_special
540 {
541 next_special = pos;
542 special_type = "code";
543 }
544 if let Some(pos) = remaining.find("**")
545 && pos < next_special
546 {
547 next_special = pos;
548 special_type = "bold";
549 }
550 if let Some(pos) = remaining.find('*')
551 && pos < next_special
552 && !remaining[pos..].starts_with("**")
553 {
554 next_special = pos;
555 special_type = "italic";
556 }
557
558 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
560 pos < next_special
561 } else {
562 false
563 };
564
565 if should_process_markdown_link {
566 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
567
568 if pos > 0 {
570 elements.push(Element::Text(remaining[..pos].to_string()));
571 }
572
573 match pattern_type {
575 "inline_image" => {
576 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
577 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
578 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
579 elements.push(Element::InlineImage {
580 alt: alt.to_string(),
581 url: url.to_string(),
582 });
583 remaining = &remaining[match_obj.end()..];
584 } else {
585 elements.push(Element::Text("!".to_string()));
586 remaining = &remaining[1..];
587 }
588 }
589 "ref_image" => {
590 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
591 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
592 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
593
594 if reference.is_empty() {
595 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
596 } else {
597 elements.push(Element::ReferenceImage {
598 alt: alt.to_string(),
599 reference: reference.to_string(),
600 });
601 }
602 remaining = &remaining[match_obj.end()..];
603 } else {
604 elements.push(Element::Text("!".to_string()));
605 remaining = &remaining[1..];
606 }
607 }
608 "footnote_ref" => {
609 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
610 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
611 elements.push(Element::FootnoteReference { note: note.to_string() });
612 remaining = &remaining[match_obj.end()..];
613 } else {
614 elements.push(Element::Text("[".to_string()));
615 remaining = &remaining[1..];
616 }
617 }
618 "inline_link" => {
619 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
620 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
621 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
622 elements.push(Element::Link {
623 text: text.to_string(),
624 url: url.to_string(),
625 });
626 remaining = &remaining[match_obj.end()..];
627 } else {
628 elements.push(Element::Text("[".to_string()));
630 remaining = &remaining[1..];
631 }
632 }
633 "ref_link" => {
634 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
635 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
636 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
637
638 if reference.is_empty() {
639 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
641 } else {
642 elements.push(Element::ReferenceLink {
644 text: text.to_string(),
645 reference: reference.to_string(),
646 });
647 }
648 remaining = &remaining[match_obj.end()..];
649 } else {
650 elements.push(Element::Text("[".to_string()));
652 remaining = &remaining[1..];
653 }
654 }
655 "shortcut_ref" => {
656 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
657 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
658 elements.push(Element::ShortcutReference {
659 reference: reference.to_string(),
660 });
661 remaining = &remaining[match_obj.end()..];
662 } else {
663 elements.push(Element::Text("[".to_string()));
665 remaining = &remaining[1..];
666 }
667 }
668 "wiki_link" => {
669 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
670 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
671 elements.push(Element::WikiLink(content.to_string()));
672 remaining = &remaining[match_obj.end()..];
673 } else {
674 elements.push(Element::Text("[[".to_string()));
675 remaining = &remaining[2..];
676 }
677 }
678 "display_math" => {
679 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
680 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
681 elements.push(Element::DisplayMath(math.to_string()));
682 remaining = &remaining[match_obj.end()..];
683 } else {
684 elements.push(Element::Text("$$".to_string()));
685 remaining = &remaining[2..];
686 }
687 }
688 "inline_math" => {
689 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
690 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
691 elements.push(Element::InlineMath(math.to_string()));
692 remaining = &remaining[match_obj.end()..];
693 } else {
694 elements.push(Element::Text("$".to_string()));
695 remaining = &remaining[1..];
696 }
697 }
698 "strikethrough" => {
699 if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
700 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
701 elements.push(Element::Strikethrough(text.to_string()));
702 remaining = &remaining[match_obj.end()..];
703 } else {
704 elements.push(Element::Text("~~".to_string()));
705 remaining = &remaining[2..];
706 }
707 }
708 "emoji" => {
709 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
710 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
711 elements.push(Element::EmojiShortcode(emoji.to_string()));
712 remaining = &remaining[match_obj.end()..];
713 } else {
714 elements.push(Element::Text(":".to_string()));
715 remaining = &remaining[1..];
716 }
717 }
718 "html_entity" => {
719 elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
721 remaining = &remaining[match_obj.end()..];
722 }
723 "html_tag" => {
724 elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
726 remaining = &remaining[match_obj.end()..];
727 }
728 _ => {
729 elements.push(Element::Text("[".to_string()));
731 remaining = &remaining[1..];
732 }
733 }
734 } else {
735 if next_special > 0 && next_special < remaining.len() {
739 elements.push(Element::Text(remaining[..next_special].to_string()));
740 remaining = &remaining[next_special..];
741 }
742
743 match special_type {
745 "code" => {
746 if let Some(code_end) = remaining[1..].find('`') {
748 let code = &remaining[1..1 + code_end];
749 elements.push(Element::Code(code.to_string()));
750 remaining = &remaining[1 + code_end + 1..];
751 } else {
752 elements.push(Element::Text(remaining.to_string()));
754 break;
755 }
756 }
757 "bold" => {
758 if let Some(bold_end) = remaining[2..].find("**") {
760 let bold_text = &remaining[2..2 + bold_end];
761 elements.push(Element::Bold(bold_text.to_string()));
762 remaining = &remaining[2 + bold_end + 2..];
763 } else {
764 elements.push(Element::Text("**".to_string()));
766 remaining = &remaining[2..];
767 }
768 }
769 "italic" => {
770 if let Some(italic_end) = remaining[1..].find('*') {
772 let italic_text = &remaining[1..1 + italic_end];
773 elements.push(Element::Italic(italic_text.to_string()));
774 remaining = &remaining[1 + italic_end + 1..];
775 } else {
776 elements.push(Element::Text("*".to_string()));
778 remaining = &remaining[1..];
779 }
780 }
781 _ => {
782 elements.push(Element::Text(remaining.to_string()));
784 break;
785 }
786 }
787 }
788 }
789
790 elements
791}
792
793fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
795 let abbreviations = get_abbreviations(custom_abbreviations);
796 let mut lines = Vec::new();
797 let mut current_line = String::new();
798
799 for element in elements.iter() {
800 let element_str = format!("{element}");
801
802 if let Element::Text(text) = element {
804 let combined = format!("{current_line}{text}");
806 let sentences = split_into_sentences_with_set(&combined, &abbreviations);
808
809 if sentences.len() > 1 {
810 for (i, sentence) in sentences.iter().enumerate() {
812 if i == 0 {
813 let trimmed = sentence.trim();
816
817 if text_ends_with_abbreviation(trimmed, &abbreviations) {
818 current_line = sentence.to_string();
820 } else {
821 lines.push(sentence.to_string());
823 current_line.clear();
824 }
825 } else if i == sentences.len() - 1 {
826 let trimmed = sentence.trim();
828 let ends_with_sentence_punct =
829 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
830
831 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
832 lines.push(sentence.to_string());
834 current_line.clear();
835 } else {
836 current_line = sentence.to_string();
838 }
839 } else {
840 lines.push(sentence.to_string());
842 }
843 }
844 } else {
845 current_line = combined;
847 }
848 } else {
849 if !current_line.is_empty()
852 && !current_line.ends_with(' ')
853 && !current_line.ends_with('(')
854 && !current_line.ends_with('[')
855 {
856 current_line.push(' ');
857 }
858 current_line.push_str(&element_str);
859 }
860 }
861
862 if !current_line.is_empty() {
864 lines.push(current_line.trim().to_string());
865 }
866 lines
867}
868
869fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
871 let mut lines = Vec::new();
872 let mut current_line = String::new();
873 let mut current_length = 0;
874
875 for element in elements {
876 let element_str = format!("{element}");
877 let element_len = element.len();
878
879 if let Element::Text(text) = element {
881 let has_leading_space = text.starts_with(char::is_whitespace);
883 let words: Vec<&str> = text.split_whitespace().collect();
885
886 for (i, word) in words.iter().enumerate() {
887 let word_len = word.chars().count();
888 let is_trailing_punct = word
890 .chars()
891 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
892
893 if current_length > 0 && current_length + 1 + word_len > options.line_length && !is_trailing_punct {
894 lines.push(current_line.trim().to_string());
896 current_line = word.to_string();
897 current_length = word_len;
898 } else {
899 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
903 current_line.push(' ');
904 current_length += 1;
905 }
906 current_line.push_str(word);
907 current_length += word_len;
908 }
909 }
910 } else {
911 if current_length > 0 && current_length + 1 + element_len > options.line_length {
914 lines.push(current_line.trim().to_string());
916 current_line = element_str;
917 current_length = element_len;
918 } else {
919 let ends_with_opener =
922 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
923 if current_length > 0 && !ends_with_opener {
924 current_line.push(' ');
925 current_length += 1;
926 }
927 current_line.push_str(&element_str);
928 current_length += element_len;
929 }
930 }
931 }
932
933 if !current_line.is_empty() {
935 lines.push(current_line.trim_end().to_string());
936 }
937
938 lines
939}
940
941pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
943 let lines: Vec<&str> = content.lines().collect();
944 let mut result = Vec::new();
945 let mut i = 0;
946
947 while i < lines.len() {
948 let line = lines[i];
949 let trimmed = line.trim();
950
951 if trimmed.is_empty() {
953 result.push(String::new());
954 i += 1;
955 continue;
956 }
957
958 if trimmed.starts_with('#') {
960 result.push(line.to_string());
961 i += 1;
962 continue;
963 }
964
965 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
967 result.push(line.to_string());
968 i += 1;
969 while i < lines.len() {
971 result.push(lines[i].to_string());
972 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
973 i += 1;
974 break;
975 }
976 i += 1;
977 }
978 continue;
979 }
980
981 if line.starts_with(" ") || line.starts_with("\t") {
983 result.push(line.to_string());
985 i += 1;
986 while i < lines.len() {
987 let next_line = lines[i];
988 if next_line.starts_with(" ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
990 result.push(next_line.to_string());
991 i += 1;
992 } else {
993 break;
994 }
995 }
996 continue;
997 }
998
999 if trimmed.starts_with('>') {
1001 let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
1002 let quote_content = &line[quote_prefix.len()..].trim_start();
1003
1004 let reflowed = reflow_line(quote_content, options);
1005 for reflowed_line in reflowed.iter() {
1006 result.push(format!("{quote_prefix} {reflowed_line}"));
1007 }
1008 i += 1;
1009 continue;
1010 }
1011
1012 if is_horizontal_rule(trimmed) {
1014 result.push(line.to_string());
1015 i += 1;
1016 continue;
1017 }
1018
1019 if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
1021 || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
1022 || trimmed.starts_with('+')
1023 || is_numbered_list_item(trimmed)
1024 {
1025 let indent = line.len() - line.trim_start().len();
1027 let indent_str = " ".repeat(indent);
1028
1029 let mut marker_end = indent;
1032 let mut content_start = indent;
1033
1034 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
1035 if let Some(period_pos) = line[indent..].find('.') {
1037 marker_end = indent + period_pos + 1; content_start = marker_end;
1039 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
1041 content_start += 1;
1042 }
1043 }
1044 } else {
1045 marker_end = indent + 1; content_start = marker_end;
1048 while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
1050 content_start += 1;
1051 }
1052 }
1053
1054 let marker = &line[indent..marker_end];
1055
1056 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
1059 i += 1;
1060
1061 while i < lines.len() {
1063 let next_line = lines[i];
1064 let next_trimmed = next_line.trim();
1065
1066 if next_trimmed.is_empty()
1068 || next_trimmed.starts_with('#')
1069 || next_trimmed.starts_with("```")
1070 || next_trimmed.starts_with("~~~")
1071 || next_trimmed.starts_with('>')
1072 || next_trimmed.starts_with('|')
1073 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1074 || is_horizontal_rule(next_trimmed)
1075 || (next_trimmed.starts_with('-')
1076 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1077 || (next_trimmed.starts_with('*')
1078 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1079 || (next_trimmed.starts_with('+')
1080 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1081 || is_numbered_list_item(next_trimmed)
1082 || is_definition_list_item(next_trimmed)
1083 {
1084 break;
1085 }
1086
1087 let next_indent = next_line.len() - next_line.trim_start().len();
1089 if next_indent >= content_start {
1090 let trimmed_start = next_line.trim_start();
1093 list_content.push(trim_preserving_hard_break(trimmed_start));
1094 i += 1;
1095 } else {
1096 break;
1098 }
1099 }
1100
1101 let combined_content = if options.preserve_breaks {
1104 list_content[0].clone()
1105 } else {
1106 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1108 if has_hard_breaks {
1109 list_content.join("\n")
1111 } else {
1112 list_content.join(" ")
1114 }
1115 };
1116
1117 let trimmed_marker = marker;
1119 let continuation_spaces = content_start;
1120
1121 let prefix_length = indent + trimmed_marker.len() + 1;
1123
1124 let adjusted_options = ReflowOptions {
1126 line_length: options.line_length.saturating_sub(prefix_length),
1127 ..options.clone()
1128 };
1129
1130 let reflowed = reflow_line(&combined_content, &adjusted_options);
1131 for (j, reflowed_line) in reflowed.iter().enumerate() {
1132 if j == 0 {
1133 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1134 } else {
1135 let continuation_indent = " ".repeat(continuation_spaces);
1137 result.push(format!("{continuation_indent}{reflowed_line}"));
1138 }
1139 }
1140 continue;
1141 }
1142
1143 if trimmed.contains('|') {
1145 result.push(line.to_string());
1146 i += 1;
1147 continue;
1148 }
1149
1150 if trimmed.starts_with('[') && line.contains("]:") {
1152 result.push(line.to_string());
1153 i += 1;
1154 continue;
1155 }
1156
1157 if is_definition_list_item(trimmed) {
1159 result.push(line.to_string());
1160 i += 1;
1161 continue;
1162 }
1163
1164 let mut is_single_line_paragraph = true;
1166 if i + 1 < lines.len() {
1167 let next_line = lines[i + 1];
1168 let next_trimmed = next_line.trim();
1169 if !next_trimmed.is_empty()
1171 && !next_trimmed.starts_with('#')
1172 && !next_trimmed.starts_with("```")
1173 && !next_trimmed.starts_with("~~~")
1174 && !next_trimmed.starts_with('>')
1175 && !next_trimmed.starts_with('|')
1176 && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1177 && !is_horizontal_rule(next_trimmed)
1178 && !(next_trimmed.starts_with('-')
1179 && !is_horizontal_rule(next_trimmed)
1180 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1181 && !(next_trimmed.starts_with('*')
1182 && !is_horizontal_rule(next_trimmed)
1183 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1184 && !(next_trimmed.starts_with('+')
1185 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1186 && !is_numbered_list_item(next_trimmed)
1187 {
1188 is_single_line_paragraph = false;
1189 }
1190 }
1191
1192 if is_single_line_paragraph && line.chars().count() <= options.line_length {
1194 result.push(line.to_string());
1195 i += 1;
1196 continue;
1197 }
1198
1199 let mut paragraph_parts = Vec::new();
1201 let mut current_part = vec![line];
1202 i += 1;
1203
1204 if options.preserve_breaks {
1206 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1208 Some("\\")
1209 } else if line.ends_with(" ") {
1210 Some(" ")
1211 } else {
1212 None
1213 };
1214 let reflowed = reflow_line(line, options);
1215
1216 if let Some(break_marker) = hard_break_type {
1218 if !reflowed.is_empty() {
1219 let mut reflowed_with_break = reflowed;
1220 let last_idx = reflowed_with_break.len() - 1;
1221 if !has_hard_break(&reflowed_with_break[last_idx]) {
1222 reflowed_with_break[last_idx].push_str(break_marker);
1223 }
1224 result.extend(reflowed_with_break);
1225 }
1226 } else {
1227 result.extend(reflowed);
1228 }
1229 } else {
1230 while i < lines.len() {
1232 let prev_line = if !current_part.is_empty() {
1233 current_part.last().unwrap()
1234 } else {
1235 ""
1236 };
1237 let next_line = lines[i];
1238 let next_trimmed = next_line.trim();
1239
1240 if next_trimmed.is_empty()
1242 || next_trimmed.starts_with('#')
1243 || next_trimmed.starts_with("```")
1244 || next_trimmed.starts_with("~~~")
1245 || next_trimmed.starts_with('>')
1246 || next_trimmed.starts_with('|')
1247 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1248 || is_horizontal_rule(next_trimmed)
1249 || (next_trimmed.starts_with('-')
1250 && !is_horizontal_rule(next_trimmed)
1251 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1252 || (next_trimmed.starts_with('*')
1253 && !is_horizontal_rule(next_trimmed)
1254 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1255 || (next_trimmed.starts_with('+')
1256 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1257 || is_numbered_list_item(next_trimmed)
1258 || is_definition_list_item(next_trimmed)
1259 {
1260 break;
1261 }
1262
1263 if has_hard_break(prev_line) {
1265 paragraph_parts.push(current_part.join(" "));
1267 current_part = vec![next_line];
1268 } else {
1269 current_part.push(next_line);
1270 }
1271 i += 1;
1272 }
1273
1274 if !current_part.is_empty() {
1276 if current_part.len() == 1 {
1277 paragraph_parts.push(current_part[0].to_string());
1279 } else {
1280 paragraph_parts.push(current_part.join(" "));
1281 }
1282 }
1283
1284 for (j, part) in paragraph_parts.iter().enumerate() {
1286 let reflowed = reflow_line(part, options);
1287 result.extend(reflowed);
1288
1289 if j < paragraph_parts.len() - 1 && !result.is_empty() {
1292 let last_idx = result.len() - 1;
1293 if !has_hard_break(&result[last_idx]) {
1294 result[last_idx].push_str(" ");
1295 }
1296 }
1297 }
1298 }
1299 }
1300
1301 let result_text = result.join("\n");
1303 if content.ends_with('\n') && !result_text.ends_with('\n') {
1304 format!("{result_text}\n")
1305 } else {
1306 result_text
1307 }
1308}
1309
1310#[derive(Debug, Clone)]
1312pub struct ParagraphReflow {
1313 pub start_byte: usize,
1315 pub end_byte: usize,
1317 pub reflowed_text: String,
1319}
1320
1321pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1339 if line_number == 0 {
1340 return None;
1341 }
1342
1343 let lines: Vec<&str> = content.lines().collect();
1344
1345 if line_number > lines.len() {
1347 return None;
1348 }
1349
1350 let target_idx = line_number - 1; let target_line = lines[target_idx];
1352 let trimmed = target_line.trim();
1353
1354 if trimmed.is_empty()
1356 || trimmed.starts_with('#')
1357 || trimmed.starts_with("```")
1358 || trimmed.starts_with("~~~")
1359 || target_line.starts_with(" ")
1360 || target_line.starts_with('\t')
1361 || trimmed.starts_with('>')
1362 || trimmed.contains('|') || (trimmed.starts_with('[') && target_line.contains("]:")) || is_horizontal_rule(trimmed)
1365 || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1366 && !is_horizontal_rule(trimmed)
1367 && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1368 || is_numbered_list_item(trimmed)
1369 || is_definition_list_item(trimmed)
1370 {
1371 return None;
1372 }
1373
1374 let mut para_start = target_idx;
1376 while para_start > 0 {
1377 let prev_idx = para_start - 1;
1378 let prev_line = lines[prev_idx];
1379 let prev_trimmed = prev_line.trim();
1380
1381 if prev_trimmed.is_empty()
1383 || prev_trimmed.starts_with('#')
1384 || prev_trimmed.starts_with("```")
1385 || prev_trimmed.starts_with("~~~")
1386 || prev_line.starts_with(" ")
1387 || prev_line.starts_with('\t')
1388 || prev_trimmed.starts_with('>')
1389 || prev_trimmed.contains('|')
1390 || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1391 || is_horizontal_rule(prev_trimmed)
1392 || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1393 && !is_horizontal_rule(prev_trimmed)
1394 && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1395 || is_numbered_list_item(prev_trimmed)
1396 || is_definition_list_item(prev_trimmed)
1397 {
1398 break;
1399 }
1400
1401 para_start = prev_idx;
1402 }
1403
1404 let mut para_end = target_idx;
1406 while para_end + 1 < lines.len() {
1407 let next_idx = para_end + 1;
1408 let next_line = lines[next_idx];
1409 let next_trimmed = next_line.trim();
1410
1411 if next_trimmed.is_empty()
1413 || next_trimmed.starts_with('#')
1414 || next_trimmed.starts_with("```")
1415 || next_trimmed.starts_with("~~~")
1416 || next_line.starts_with(" ")
1417 || next_line.starts_with('\t')
1418 || next_trimmed.starts_with('>')
1419 || next_trimmed.contains('|')
1420 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1421 || is_horizontal_rule(next_trimmed)
1422 || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1423 && !is_horizontal_rule(next_trimmed)
1424 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1425 || is_numbered_list_item(next_trimmed)
1426 || is_definition_list_item(next_trimmed)
1427 {
1428 break;
1429 }
1430
1431 para_end = next_idx;
1432 }
1433
1434 let paragraph_lines = &lines[para_start..=para_end];
1436
1437 let mut start_byte = 0;
1439 for line in lines.iter().take(para_start) {
1440 start_byte += line.len() + 1; }
1442
1443 let mut end_byte = start_byte;
1444 for line in paragraph_lines.iter() {
1445 end_byte += line.len() + 1; }
1447
1448 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1451
1452 if !includes_trailing_newline {
1454 end_byte -= 1;
1455 }
1456
1457 let paragraph_text = paragraph_lines.join("\n");
1459
1460 let options = ReflowOptions {
1462 line_length,
1463 break_on_sentences: true,
1464 preserve_breaks: false,
1465 sentence_per_line: false,
1466 abbreviations: None,
1467 };
1468
1469 let reflowed = reflow_markdown(¶graph_text, &options);
1471
1472 let reflowed_text = if includes_trailing_newline {
1476 if reflowed.ends_with('\n') {
1478 reflowed
1479 } else {
1480 format!("{reflowed}\n")
1481 }
1482 } else {
1483 if reflowed.ends_with('\n') {
1485 reflowed.trim_end_matches('\n').to_string()
1486 } else {
1487 reflowed
1488 }
1489 };
1490
1491 Some(ParagraphReflow {
1492 start_byte,
1493 end_byte,
1494 reflowed_text,
1495 })
1496}
1497
1498#[cfg(test)]
1499mod tests {
1500 use super::*;
1501
1502 #[test]
1507 fn test_helper_function_text_ends_with_abbreviation() {
1508 let abbreviations = get_abbreviations(&None);
1510
1511 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
1513 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
1514 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
1515 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
1516 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
1517 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
1518 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
1519 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
1520
1521 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
1523 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
1524 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
1525 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
1526 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
1527 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
1533}