1use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::regex_cache::{
9 DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
10 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
11 LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
12 REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
13};
14use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
15use std::collections::HashSet;
16
17#[derive(Clone)]
19pub struct ReflowOptions {
20 pub line_length: usize,
22 pub break_on_sentences: bool,
24 pub preserve_breaks: bool,
26 pub sentence_per_line: bool,
28 pub abbreviations: Option<Vec<String>>,
32}
33
34impl Default for ReflowOptions {
35 fn default() -> Self {
36 Self {
37 line_length: 80,
38 break_on_sentences: true,
39 preserve_breaks: false,
40 sentence_per_line: false,
41 abbreviations: None,
42 }
43 }
44}
45
46fn get_abbreviations(custom: &Option<Vec<String>>) -> HashSet<String> {
50 let mut abbreviations: HashSet<String> = [
58 "Mr", "Mrs", "Ms", "Dr", "Prof", "Sr", "Jr",
60 "i.e", "e.g",
62 ]
63 .iter()
64 .map(|s| s.to_lowercase())
65 .collect();
66
67 if let Some(custom_list) = custom {
70 for abbr in custom_list {
71 let normalized = abbr.trim_end_matches('.').to_lowercase();
72 if !normalized.is_empty() {
73 abbreviations.insert(normalized);
74 }
75 }
76 }
77
78 abbreviations
79}
80
81fn text_ends_with_abbreviation(text: &str, abbreviations: &HashSet<String>) -> bool {
96 if !text.ends_with('.') {
98 return false;
99 }
100
101 let without_period = text.trim_end_matches('.');
103
104 let last_word = without_period.split_whitespace().last().unwrap_or("");
106
107 if last_word.is_empty() {
108 return false;
109 }
110
111 abbreviations.contains(&last_word.to_lowercase())
113}
114
115fn is_cjk_sentence_ending(c: char) -> bool {
118 matches!(c, '。' | '!' | '?')
119}
120
121fn is_cjk_char(c: char) -> bool {
123 matches!(c,
125 '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{3040}'..='\u{309F}' | '\u{30A0}'..='\u{30FF}' | '\u{AC00}'..='\u{D7AF}' )
131}
132
133fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
137 let chars: Vec<char> = text.chars().collect();
138
139 if pos + 1 >= chars.len() {
140 return false;
141 }
142
143 let c = chars[pos];
144 let next_char = chars[pos + 1];
145
146 if is_cjk_sentence_ending(c) {
149 let mut after_punct_pos = pos + 1;
151 while after_punct_pos < chars.len()
152 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
153 {
154 after_punct_pos += 1;
155 }
156
157 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
159 after_punct_pos += 1;
160 }
161
162 if after_punct_pos >= chars.len() {
164 return false;
165 }
166
167 while after_punct_pos < chars.len()
169 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
170 {
171 after_punct_pos += 1;
172 }
173
174 if after_punct_pos >= chars.len() {
175 return false;
176 }
177
178 return true;
181 }
182
183 if c != '.' && c != '!' && c != '?' {
185 return false;
186 }
187
188 let (_space_pos, after_space_pos) = if next_char == ' ' {
190 (pos + 1, pos + 2)
192 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
193 (pos + 2, pos + 3)
195 } else if (next_char == '*' || next_char == '_')
196 && pos + 3 < chars.len()
197 && chars[pos + 2] == next_char
198 && chars[pos + 3] == ' '
199 {
200 (pos + 3, pos + 4)
202 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
203 (pos + 3, pos + 4)
205 } else {
206 return false;
207 };
208
209 let mut next_char_pos = after_space_pos;
211 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
212 next_char_pos += 1;
213 }
214
215 if next_char_pos >= chars.len() {
217 return false;
218 }
219
220 let mut first_letter_pos = next_char_pos;
222 while first_letter_pos < chars.len()
223 && (chars[first_letter_pos] == '*' || chars[first_letter_pos] == '_' || chars[first_letter_pos] == '~')
224 {
225 first_letter_pos += 1;
226 }
227
228 if first_letter_pos >= chars.len() {
230 return false;
231 }
232
233 let first_char = chars[first_letter_pos];
235 if !first_char.is_uppercase() && !is_cjk_char(first_char) {
236 return false;
237 }
238
239 if pos > 0 && c == '.' {
241 if text_ends_with_abbreviation(&text[..=pos], abbreviations) {
244 return false;
245 }
246
247 if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
250 return false;
251 }
252 }
253 true
254}
255
256pub fn split_into_sentences(text: &str) -> Vec<String> {
258 split_into_sentences_custom(text, &None)
259}
260
261pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
263 let abbreviations = get_abbreviations(custom_abbreviations);
264 split_into_sentences_with_set(text, &abbreviations)
265}
266
267fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
270 let mut sentences = Vec::new();
271 let mut current_sentence = String::new();
272 let mut chars = text.chars().peekable();
273 let mut pos = 0;
274
275 while let Some(c) = chars.next() {
276 current_sentence.push(c);
277
278 if is_sentence_boundary(text, pos, abbreviations) {
279 while chars.peek() == Some(&'*') || chars.peek() == Some(&'_') || chars.peek() == Some(&'~') {
281 current_sentence.push(chars.next().unwrap());
282 pos += 1;
283 }
284
285 if chars.peek() == Some(&' ') {
287 chars.next();
288 pos += 1;
289 }
290
291 sentences.push(current_sentence.trim().to_string());
292 current_sentence.clear();
293 }
294
295 pos += 1;
296 }
297
298 if !current_sentence.trim().is_empty() {
300 sentences.push(current_sentence.trim().to_string());
301 }
302 sentences
303}
304
305fn is_horizontal_rule(line: &str) -> bool {
307 if line.len() < 3 {
308 return false;
309 }
310
311 let chars: Vec<char> = line.chars().collect();
313 if chars.is_empty() {
314 return false;
315 }
316
317 let first_char = chars[0];
318 if first_char != '-' && first_char != '_' && first_char != '*' {
319 return false;
320 }
321
322 for c in &chars {
324 if *c != first_char && *c != ' ' {
325 return false;
326 }
327 }
328
329 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
331 non_space_count >= 3
332}
333
334fn is_numbered_list_item(line: &str) -> bool {
336 let mut chars = line.chars();
337
338 if !chars.next().is_some_and(|c| c.is_numeric()) {
340 return false;
341 }
342
343 while let Some(c) = chars.next() {
345 if c == '.' {
346 return chars.next().is_none_or(|c| c == ' ');
348 }
349 if !c.is_numeric() {
350 return false;
351 }
352 }
353
354 false
355}
356
357fn has_hard_break(line: &str) -> bool {
363 let line = line.strip_suffix('\r').unwrap_or(line);
364 line.ends_with(" ") || line.ends_with('\\')
365}
366
367fn trim_preserving_hard_break(s: &str) -> String {
373 let s = s.strip_suffix('\r').unwrap_or(s);
375
376 if s.ends_with('\\') {
378 return s.to_string();
380 }
381
382 if s.ends_with(" ") {
384 let content_end = s.trim_end().len();
386 if content_end == 0 {
387 return String::new();
389 }
390 format!("{} ", &s[..content_end])
392 } else {
393 s.trim_end().to_string()
395 }
396}
397
398pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
399 if options.sentence_per_line {
401 let elements = parse_markdown_elements(line);
402 return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
403 }
404
405 if options.line_length == 0 || line.chars().count() <= options.line_length {
408 return vec![line.to_string()];
409 }
410
411 let elements = parse_markdown_elements(line);
413
414 reflow_elements(&elements, options)
416}
417
418#[derive(Debug, Clone)]
420enum LinkedImageSource {
421 Inline(String),
423 Reference(String),
425}
426
427#[derive(Debug, Clone)]
429enum LinkedImageTarget {
430 Inline(String),
432 Reference(String),
434}
435
436#[derive(Debug, Clone)]
438enum Element {
439 Text(String),
441 Link { text: String, url: String },
443 ReferenceLink { text: String, reference: String },
445 EmptyReferenceLink { text: String },
447 ShortcutReference { reference: String },
449 InlineImage { alt: String, url: String },
451 ReferenceImage { alt: String, reference: String },
453 EmptyReferenceImage { alt: String },
455 LinkedImage {
461 alt: String,
462 img_source: LinkedImageSource,
463 link_target: LinkedImageTarget,
464 },
465 FootnoteReference { note: String },
467 Strikethrough(String),
469 WikiLink(String),
471 InlineMath(String),
473 DisplayMath(String),
475 EmojiShortcode(String),
477 HtmlTag(String),
479 HtmlEntity(String),
481 HugoShortcode(String),
483 Code(String),
485 Bold {
487 content: String,
488 underscore: bool,
490 },
491 Italic {
493 content: String,
494 underscore: bool,
496 },
497}
498
499impl std::fmt::Display for Element {
500 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
501 match self {
502 Element::Text(s) => write!(f, "{s}"),
503 Element::Link { text, url } => write!(f, "[{text}]({url})"),
504 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
505 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
506 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
507 Element::InlineImage { alt, url } => write!(f, ""),
508 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
509 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
510 Element::LinkedImage {
511 alt,
512 img_source,
513 link_target,
514 } => {
515 let img_part = match img_source {
517 LinkedImageSource::Inline(url) => format!(""),
518 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
519 };
520 match link_target {
522 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
523 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
524 }
525 }
526 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
527 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
528 Element::WikiLink(s) => write!(f, "[[{s}]]"),
529 Element::InlineMath(s) => write!(f, "${s}$"),
530 Element::DisplayMath(s) => write!(f, "$${s}$$"),
531 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
532 Element::HtmlTag(s) => write!(f, "{s}"),
533 Element::HtmlEntity(s) => write!(f, "{s}"),
534 Element::HugoShortcode(s) => write!(f, "{s}"),
535 Element::Code(s) => write!(f, "`{s}`"),
536 Element::Bold { content, underscore } => {
537 if *underscore {
538 write!(f, "__{content}__")
539 } else {
540 write!(f, "**{content}**")
541 }
542 }
543 Element::Italic { content, underscore } => {
544 if *underscore {
545 write!(f, "_{content}_")
546 } else {
547 write!(f, "*{content}*")
548 }
549 }
550 }
551 }
552}
553
554impl Element {
555 fn len(&self) -> usize {
556 match self {
557 Element::Text(s) => s.chars().count(),
558 Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, Element::EmptyReferenceLink { text } => text.chars().count() + 4, Element::ShortcutReference { reference } => reference.chars().count() + 2, Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, Element::LinkedImage {
566 alt,
567 img_source,
568 link_target,
569 } => {
570 let alt_len = alt.chars().count();
573 let img_len = match img_source {
574 LinkedImageSource::Inline(url) => url.chars().count() + 2, LinkedImageSource::Reference(r) => r.chars().count() + 2, };
577 let link_len = match link_target {
578 LinkedImageTarget::Inline(url) => url.chars().count() + 2, LinkedImageTarget::Reference(r) => r.chars().count() + 2, };
581 5 + alt_len + img_len + link_len
584 }
585 Element::FootnoteReference { note } => note.chars().count() + 3, Element::Strikethrough(s) => s.chars().count() + 4, Element::WikiLink(s) => s.chars().count() + 4, Element::InlineMath(s) => s.chars().count() + 2, Element::DisplayMath(s) => s.chars().count() + 4, Element::EmojiShortcode(s) => s.chars().count() + 2, Element::HtmlTag(s) => s.chars().count(), Element::HtmlEntity(s) => s.chars().count(), Element::HugoShortcode(s) => s.chars().count(), Element::Code(s) => s.chars().count() + 2, Element::Bold { content, .. } => content.chars().count() + 4, Element::Italic { content, .. } => content.chars().count() + 2, }
598 }
599}
600
601#[derive(Debug, Clone)]
603struct EmphasisSpan {
604 start: usize,
606 end: usize,
608 content: String,
610 is_strong: bool,
612 is_strikethrough: bool,
614 uses_underscore: bool,
616}
617
618fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
628 let mut spans = Vec::new();
629 let mut options = Options::empty();
630 options.insert(Options::ENABLE_STRIKETHROUGH);
631
632 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
635 let mut strikethrough_stack: Vec<usize> = Vec::new();
636
637 let parser = Parser::new_ext(text, options).into_offset_iter();
638
639 for (event, range) in parser {
640 match event {
641 Event::Start(Tag::Emphasis) => {
642 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
644 emphasis_stack.push((range.start, uses_underscore));
645 }
646 Event::End(TagEnd::Emphasis) => {
647 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
648 let content_start = start_byte + 1;
650 let content_end = range.end - 1;
651 if content_end > content_start
652 && let Some(content) = text.get(content_start..content_end)
653 {
654 spans.push(EmphasisSpan {
655 start: start_byte,
656 end: range.end,
657 content: content.to_string(),
658 is_strong: false,
659 is_strikethrough: false,
660 uses_underscore,
661 });
662 }
663 }
664 }
665 Event::Start(Tag::Strong) => {
666 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
668 strong_stack.push((range.start, uses_underscore));
669 }
670 Event::End(TagEnd::Strong) => {
671 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
672 let content_start = start_byte + 2;
674 let content_end = range.end - 2;
675 if content_end > content_start
676 && let Some(content) = text.get(content_start..content_end)
677 {
678 spans.push(EmphasisSpan {
679 start: start_byte,
680 end: range.end,
681 content: content.to_string(),
682 is_strong: true,
683 is_strikethrough: false,
684 uses_underscore,
685 });
686 }
687 }
688 }
689 Event::Start(Tag::Strikethrough) => {
690 strikethrough_stack.push(range.start);
691 }
692 Event::End(TagEnd::Strikethrough) => {
693 if let Some(start_byte) = strikethrough_stack.pop() {
694 let content_start = start_byte + 2;
696 let content_end = range.end - 2;
697 if content_end > content_start
698 && let Some(content) = text.get(content_start..content_end)
699 {
700 spans.push(EmphasisSpan {
701 start: start_byte,
702 end: range.end,
703 content: content.to_string(),
704 is_strong: false,
705 is_strikethrough: true,
706 uses_underscore: false,
707 });
708 }
709 }
710 }
711 _ => {}
712 }
713 }
714
715 spans.sort_by_key(|s| s.start);
717 spans
718}
719
720fn parse_markdown_elements(text: &str) -> Vec<Element> {
731 let mut elements = Vec::new();
732 let mut remaining = text;
733
734 let emphasis_spans = extract_emphasis_spans(text);
736
737 while !remaining.is_empty() {
738 let current_offset = text.len() - remaining.len();
740 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
742
743 if remaining.contains("[!") {
747 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
749 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
750 {
751 earliest_match = Some((m.start(), "linked_image_ii", m));
752 }
753
754 if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
756 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
757 {
758 earliest_match = Some((m.start(), "linked_image_ri", m));
759 }
760
761 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
763 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
764 {
765 earliest_match = Some((m.start(), "linked_image_ir", m));
766 }
767
768 if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
770 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
771 {
772 earliest_match = Some((m.start(), "linked_image_rr", m));
773 }
774 }
775
776 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
779 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
780 {
781 earliest_match = Some((m.start(), "inline_image", m));
782 }
783
784 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
786 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
787 {
788 earliest_match = Some((m.start(), "ref_image", m));
789 }
790
791 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
793 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
794 {
795 earliest_match = Some((m.start(), "footnote_ref", m));
796 }
797
798 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
800 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
801 {
802 earliest_match = Some((m.start(), "inline_link", m));
803 }
804
805 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
807 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
808 {
809 earliest_match = Some((m.start(), "ref_link", m));
810 }
811
812 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
815 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
816 {
817 earliest_match = Some((m.start(), "shortcut_ref", m));
818 }
819
820 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
822 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
823 {
824 earliest_match = Some((m.start(), "wiki_link", m));
825 }
826
827 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
829 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
830 {
831 earliest_match = Some((m.start(), "display_math", m));
832 }
833
834 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
836 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
837 {
838 earliest_match = Some((m.start(), "inline_math", m));
839 }
840
841 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
845 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
846 {
847 earliest_match = Some((m.start(), "emoji", m));
848 }
849
850 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
852 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
853 {
854 earliest_match = Some((m.start(), "html_entity", m));
855 }
856
857 if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
860 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
861 {
862 earliest_match = Some((m.start(), "hugo_shortcode", m));
863 }
864
865 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
868 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
869 {
870 let matched_text = &remaining[m.start()..m.end()];
872 let is_autolink = matched_text.starts_with("<http://")
873 || matched_text.starts_with("<https://")
874 || matched_text.starts_with("<mailto:")
875 || matched_text.starts_with("<ftp://")
876 || matched_text.starts_with("<ftps://");
877
878 if !is_autolink {
879 earliest_match = Some((m.start(), "html_tag", m));
880 }
881 }
882
883 let mut next_special = remaining.len();
885 let mut special_type = "";
886 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
887
888 if let Some(pos) = remaining.find('`')
890 && pos < next_special
891 {
892 next_special = pos;
893 special_type = "code";
894 }
895
896 for span in &emphasis_spans {
899 if span.start >= current_offset && span.start < current_offset + remaining.len() {
900 let pos_in_remaining = span.start - current_offset;
901 if pos_in_remaining < next_special {
902 next_special = pos_in_remaining;
903 special_type = "pulldown_emphasis";
904 pulldown_emphasis = Some(span);
905 }
906 break; }
908 }
909
910 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
912 pos < next_special
913 } else {
914 false
915 };
916
917 if should_process_markdown_link {
918 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
919
920 if pos > 0 {
922 elements.push(Element::Text(remaining[..pos].to_string()));
923 }
924
925 match pattern_type {
927 "linked_image_ii" => {
929 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
930 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
931 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
932 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
933 elements.push(Element::LinkedImage {
934 alt: alt.to_string(),
935 img_source: LinkedImageSource::Inline(img_url.to_string()),
936 link_target: LinkedImageTarget::Inline(link_url.to_string()),
937 });
938 remaining = &remaining[match_obj.end()..];
939 } else {
940 elements.push(Element::Text("[".to_string()));
941 remaining = &remaining[1..];
942 }
943 }
944 "linked_image_ri" => {
946 if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
947 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
948 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
949 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
950 elements.push(Element::LinkedImage {
951 alt: alt.to_string(),
952 img_source: LinkedImageSource::Reference(img_ref.to_string()),
953 link_target: LinkedImageTarget::Inline(link_url.to_string()),
954 });
955 remaining = &remaining[match_obj.end()..];
956 } else {
957 elements.push(Element::Text("[".to_string()));
958 remaining = &remaining[1..];
959 }
960 }
961 "linked_image_ir" => {
963 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
964 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
965 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
966 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
967 elements.push(Element::LinkedImage {
968 alt: alt.to_string(),
969 img_source: LinkedImageSource::Inline(img_url.to_string()),
970 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
971 });
972 remaining = &remaining[match_obj.end()..];
973 } else {
974 elements.push(Element::Text("[".to_string()));
975 remaining = &remaining[1..];
976 }
977 }
978 "linked_image_rr" => {
980 if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
981 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
982 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
983 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
984 elements.push(Element::LinkedImage {
985 alt: alt.to_string(),
986 img_source: LinkedImageSource::Reference(img_ref.to_string()),
987 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
988 });
989 remaining = &remaining[match_obj.end()..];
990 } else {
991 elements.push(Element::Text("[".to_string()));
992 remaining = &remaining[1..];
993 }
994 }
995 "inline_image" => {
996 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
997 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
998 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
999 elements.push(Element::InlineImage {
1000 alt: alt.to_string(),
1001 url: url.to_string(),
1002 });
1003 remaining = &remaining[match_obj.end()..];
1004 } else {
1005 elements.push(Element::Text("!".to_string()));
1006 remaining = &remaining[1..];
1007 }
1008 }
1009 "ref_image" => {
1010 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
1011 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1012 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1013
1014 if reference.is_empty() {
1015 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1016 } else {
1017 elements.push(Element::ReferenceImage {
1018 alt: alt.to_string(),
1019 reference: reference.to_string(),
1020 });
1021 }
1022 remaining = &remaining[match_obj.end()..];
1023 } else {
1024 elements.push(Element::Text("!".to_string()));
1025 remaining = &remaining[1..];
1026 }
1027 }
1028 "footnote_ref" => {
1029 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
1030 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1031 elements.push(Element::FootnoteReference { note: note.to_string() });
1032 remaining = &remaining[match_obj.end()..];
1033 } else {
1034 elements.push(Element::Text("[".to_string()));
1035 remaining = &remaining[1..];
1036 }
1037 }
1038 "inline_link" => {
1039 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1040 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1041 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1042 elements.push(Element::Link {
1043 text: text.to_string(),
1044 url: url.to_string(),
1045 });
1046 remaining = &remaining[match_obj.end()..];
1047 } else {
1048 elements.push(Element::Text("[".to_string()));
1050 remaining = &remaining[1..];
1051 }
1052 }
1053 "ref_link" => {
1054 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1055 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1056 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1057
1058 if reference.is_empty() {
1059 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1061 } else {
1062 elements.push(Element::ReferenceLink {
1064 text: text.to_string(),
1065 reference: reference.to_string(),
1066 });
1067 }
1068 remaining = &remaining[match_obj.end()..];
1069 } else {
1070 elements.push(Element::Text("[".to_string()));
1072 remaining = &remaining[1..];
1073 }
1074 }
1075 "shortcut_ref" => {
1076 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1077 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1078 elements.push(Element::ShortcutReference {
1079 reference: reference.to_string(),
1080 });
1081 remaining = &remaining[match_obj.end()..];
1082 } else {
1083 elements.push(Element::Text("[".to_string()));
1085 remaining = &remaining[1..];
1086 }
1087 }
1088 "wiki_link" => {
1089 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1090 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1091 elements.push(Element::WikiLink(content.to_string()));
1092 remaining = &remaining[match_obj.end()..];
1093 } else {
1094 elements.push(Element::Text("[[".to_string()));
1095 remaining = &remaining[2..];
1096 }
1097 }
1098 "display_math" => {
1099 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1100 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1101 elements.push(Element::DisplayMath(math.to_string()));
1102 remaining = &remaining[match_obj.end()..];
1103 } else {
1104 elements.push(Element::Text("$$".to_string()));
1105 remaining = &remaining[2..];
1106 }
1107 }
1108 "inline_math" => {
1109 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1110 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1111 elements.push(Element::InlineMath(math.to_string()));
1112 remaining = &remaining[match_obj.end()..];
1113 } else {
1114 elements.push(Element::Text("$".to_string()));
1115 remaining = &remaining[1..];
1116 }
1117 }
1118 "emoji" => {
1120 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1121 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1122 elements.push(Element::EmojiShortcode(emoji.to_string()));
1123 remaining = &remaining[match_obj.end()..];
1124 } else {
1125 elements.push(Element::Text(":".to_string()));
1126 remaining = &remaining[1..];
1127 }
1128 }
1129 "html_entity" => {
1130 elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
1132 remaining = &remaining[match_obj.end()..];
1133 }
1134 "hugo_shortcode" => {
1135 elements.push(Element::HugoShortcode(remaining[..match_obj.end()].to_string()));
1137 remaining = &remaining[match_obj.end()..];
1138 }
1139 "html_tag" => {
1140 elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
1142 remaining = &remaining[match_obj.end()..];
1143 }
1144 _ => {
1145 elements.push(Element::Text("[".to_string()));
1147 remaining = &remaining[1..];
1148 }
1149 }
1150 } else {
1151 if next_special > 0 && next_special < remaining.len() {
1155 elements.push(Element::Text(remaining[..next_special].to_string()));
1156 remaining = &remaining[next_special..];
1157 }
1158
1159 match special_type {
1161 "code" => {
1162 if let Some(code_end) = remaining[1..].find('`') {
1164 let code = &remaining[1..1 + code_end];
1165 elements.push(Element::Code(code.to_string()));
1166 remaining = &remaining[1 + code_end + 1..];
1167 } else {
1168 elements.push(Element::Text(remaining.to_string()));
1170 break;
1171 }
1172 }
1173 "pulldown_emphasis" => {
1174 if let Some(span) = pulldown_emphasis {
1176 let span_len = span.end - span.start;
1177 if span.is_strikethrough {
1178 elements.push(Element::Strikethrough(span.content.clone()));
1179 } else if span.is_strong {
1180 elements.push(Element::Bold {
1181 content: span.content.clone(),
1182 underscore: span.uses_underscore,
1183 });
1184 } else {
1185 elements.push(Element::Italic {
1186 content: span.content.clone(),
1187 underscore: span.uses_underscore,
1188 });
1189 }
1190 remaining = &remaining[span_len..];
1191 } else {
1192 elements.push(Element::Text(remaining[..1].to_string()));
1194 remaining = &remaining[1..];
1195 }
1196 }
1197 _ => {
1198 elements.push(Element::Text(remaining.to_string()));
1200 break;
1201 }
1202 }
1203 }
1204 }
1205
1206 elements
1207}
1208
1209fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1211 let abbreviations = get_abbreviations(custom_abbreviations);
1212 let mut lines = Vec::new();
1213 let mut current_line = String::new();
1214
1215 for element in elements.iter() {
1216 let element_str = format!("{element}");
1217
1218 if let Element::Text(text) = element {
1220 let combined = format!("{current_line}{text}");
1222 let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1224
1225 if sentences.len() > 1 {
1226 for (i, sentence) in sentences.iter().enumerate() {
1228 if i == 0 {
1229 let trimmed = sentence.trim();
1232
1233 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1234 current_line = sentence.to_string();
1236 } else {
1237 lines.push(sentence.to_string());
1239 current_line.clear();
1240 }
1241 } else if i == sentences.len() - 1 {
1242 let trimmed = sentence.trim();
1244 let ends_with_sentence_punct =
1245 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1246
1247 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1248 lines.push(sentence.to_string());
1250 current_line.clear();
1251 } else {
1252 current_line = sentence.to_string();
1254 }
1255 } else {
1256 lines.push(sentence.to_string());
1258 }
1259 }
1260 } else {
1261 let trimmed = combined.trim();
1263 let ends_with_sentence_punct =
1264 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1265
1266 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1267 lines.push(trimmed.to_string());
1269 current_line.clear();
1270 } else {
1271 current_line = combined;
1273 }
1274 }
1275 } else if let Element::Italic { content, underscore } = element {
1276 let marker = if *underscore { "_" } else { "*" };
1278 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1279 } else if let Element::Bold { content, underscore } = element {
1280 let marker = if *underscore { "__" } else { "**" };
1282 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1283 } else if let Element::Strikethrough(content) = element {
1284 handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1286 } else {
1287 if !current_line.is_empty()
1290 && !current_line.ends_with(' ')
1291 && !current_line.ends_with('(')
1292 && !current_line.ends_with('[')
1293 {
1294 current_line.push(' ');
1295 }
1296 current_line.push_str(&element_str);
1297 }
1298 }
1299
1300 if !current_line.is_empty() {
1302 lines.push(current_line.trim().to_string());
1303 }
1304 lines
1305}
1306
1307fn handle_emphasis_sentence_split(
1309 content: &str,
1310 marker: &str,
1311 abbreviations: &HashSet<String>,
1312 current_line: &mut String,
1313 lines: &mut Vec<String>,
1314) {
1315 let sentences = split_into_sentences_with_set(content, abbreviations);
1317
1318 if sentences.len() <= 1 {
1319 if !current_line.is_empty()
1321 && !current_line.ends_with(' ')
1322 && !current_line.ends_with('(')
1323 && !current_line.ends_with('[')
1324 {
1325 current_line.push(' ');
1326 }
1327 current_line.push_str(marker);
1328 current_line.push_str(content);
1329 current_line.push_str(marker);
1330
1331 let trimmed = content.trim();
1333 let ends_with_punct = trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1334 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1335 lines.push(current_line.clone());
1336 current_line.clear();
1337 }
1338 } else {
1339 for (i, sentence) in sentences.iter().enumerate() {
1341 let trimmed = sentence.trim();
1342 if trimmed.is_empty() {
1343 continue;
1344 }
1345
1346 if i == 0 {
1347 if !current_line.is_empty()
1349 && !current_line.ends_with(' ')
1350 && !current_line.ends_with('(')
1351 && !current_line.ends_with('[')
1352 {
1353 current_line.push(' ');
1354 }
1355 current_line.push_str(marker);
1356 current_line.push_str(trimmed);
1357 current_line.push_str(marker);
1358
1359 let ends_with_punct = trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1361 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1362 lines.push(current_line.clone());
1363 current_line.clear();
1364 }
1365 } else if i == sentences.len() - 1 {
1366 let ends_with_punct = trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1368
1369 let mut line = String::new();
1370 line.push_str(marker);
1371 line.push_str(trimmed);
1372 line.push_str(marker);
1373
1374 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1375 lines.push(line);
1376 } else {
1377 *current_line = line;
1379 }
1380 } else {
1381 let mut line = String::new();
1383 line.push_str(marker);
1384 line.push_str(trimmed);
1385 line.push_str(marker);
1386 lines.push(line);
1387 }
1388 }
1389 }
1390}
1391
1392fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1394 let mut lines = Vec::new();
1395 let mut current_line = String::new();
1396 let mut current_length = 0;
1397
1398 for element in elements {
1399 let element_str = format!("{element}");
1400 let element_len = element.len();
1401
1402 if let Element::Text(text) = element {
1404 let has_leading_space = text.starts_with(char::is_whitespace);
1406 let words: Vec<&str> = text.split_whitespace().collect();
1408
1409 for (i, word) in words.iter().enumerate() {
1410 let word_len = word.chars().count();
1411 let is_trailing_punct = word
1413 .chars()
1414 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1415
1416 if current_length > 0 && current_length + 1 + word_len > options.line_length && !is_trailing_punct {
1417 lines.push(current_line.trim().to_string());
1419 current_line = word.to_string();
1420 current_length = word_len;
1421 } else {
1422 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1426 current_line.push(' ');
1427 current_length += 1;
1428 }
1429 current_line.push_str(word);
1430 current_length += word_len;
1431 }
1432 }
1433 } else {
1434 if current_length > 0 && current_length + 1 + element_len > options.line_length {
1437 lines.push(current_line.trim().to_string());
1439 current_line = element_str;
1440 current_length = element_len;
1441 } else {
1442 let ends_with_opener =
1445 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1446 if current_length > 0 && !ends_with_opener {
1447 current_line.push(' ');
1448 current_length += 1;
1449 }
1450 current_line.push_str(&element_str);
1451 current_length += element_len;
1452 }
1453 }
1454 }
1455
1456 if !current_line.is_empty() {
1458 lines.push(current_line.trim_end().to_string());
1459 }
1460
1461 lines
1462}
1463
1464pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1466 let lines: Vec<&str> = content.lines().collect();
1467 let mut result = Vec::new();
1468 let mut i = 0;
1469
1470 while i < lines.len() {
1471 let line = lines[i];
1472 let trimmed = line.trim();
1473
1474 if trimmed.is_empty() {
1476 result.push(String::new());
1477 i += 1;
1478 continue;
1479 }
1480
1481 if trimmed.starts_with('#') {
1483 result.push(line.to_string());
1484 i += 1;
1485 continue;
1486 }
1487
1488 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1490 result.push(line.to_string());
1491 i += 1;
1492 while i < lines.len() {
1494 result.push(lines[i].to_string());
1495 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
1496 i += 1;
1497 break;
1498 }
1499 i += 1;
1500 }
1501 continue;
1502 }
1503
1504 if ElementCache::calculate_indentation_width_default(line) >= 4 {
1506 result.push(line.to_string());
1508 i += 1;
1509 while i < lines.len() {
1510 let next_line = lines[i];
1511 if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
1513 result.push(next_line.to_string());
1514 i += 1;
1515 } else {
1516 break;
1517 }
1518 }
1519 continue;
1520 }
1521
1522 if trimmed.starts_with('>') {
1524 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
1527 let quote_prefix = line[0..gt_pos + 1].to_string();
1528 let quote_content = &line[quote_prefix.len()..].trim_start();
1529
1530 let reflowed = reflow_line(quote_content, options);
1531 for reflowed_line in reflowed.iter() {
1532 result.push(format!("{quote_prefix} {reflowed_line}"));
1533 }
1534 i += 1;
1535 continue;
1536 }
1537
1538 if is_horizontal_rule(trimmed) {
1540 result.push(line.to_string());
1541 i += 1;
1542 continue;
1543 }
1544
1545 let is_unordered_list = |s: &str, marker: char| -> bool {
1549 s.starts_with(marker) && !is_horizontal_rule(s) && (s.len() == 1 || s.chars().nth(1) == Some(' '))
1550 };
1551 if is_unordered_list(trimmed, '-')
1552 || is_unordered_list(trimmed, '*')
1553 || is_unordered_list(trimmed, '+')
1554 || is_numbered_list_item(trimmed)
1555 {
1556 let indent = line.len() - line.trim_start().len();
1558 let indent_str = " ".repeat(indent);
1559
1560 let mut marker_end = indent;
1563 let mut content_start = indent;
1564
1565 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
1566 if let Some(period_pos) = line[indent..].find('.') {
1568 marker_end = indent + period_pos + 1; content_start = marker_end;
1570 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1574 content_start += 1;
1575 }
1576 }
1577 } else {
1578 marker_end = indent + 1; content_start = marker_end;
1581 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1585 content_start += 1;
1586 }
1587 }
1588
1589 let marker = &line[indent..marker_end];
1590
1591 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
1594 i += 1;
1595
1596 while i < lines.len() {
1598 let next_line = lines[i];
1599 let next_trimmed = next_line.trim();
1600
1601 if next_trimmed.is_empty()
1603 || next_trimmed.starts_with('#')
1604 || next_trimmed.starts_with("```")
1605 || next_trimmed.starts_with("~~~")
1606 || next_trimmed.starts_with('>')
1607 || next_trimmed.starts_with('|')
1608 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1609 || is_horizontal_rule(next_trimmed)
1610 || (next_trimmed.starts_with('-')
1611 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1612 || (next_trimmed.starts_with('*')
1613 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1614 || (next_trimmed.starts_with('+')
1615 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1616 || is_numbered_list_item(next_trimmed)
1617 || is_definition_list_item(next_trimmed)
1618 {
1619 break;
1620 }
1621
1622 let next_indent = next_line.len() - next_line.trim_start().len();
1624 if next_indent >= content_start {
1625 let trimmed_start = next_line.trim_start();
1628 list_content.push(trim_preserving_hard_break(trimmed_start));
1629 i += 1;
1630 } else {
1631 break;
1633 }
1634 }
1635
1636 let combined_content = if options.preserve_breaks {
1639 list_content[0].clone()
1640 } else {
1641 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1643 if has_hard_breaks {
1644 list_content.join("\n")
1646 } else {
1647 list_content.join(" ")
1649 }
1650 };
1651
1652 let trimmed_marker = marker;
1654 let continuation_spaces = content_start;
1655
1656 let prefix_length = indent + trimmed_marker.len() + 1;
1658
1659 let adjusted_options = ReflowOptions {
1661 line_length: options.line_length.saturating_sub(prefix_length),
1662 ..options.clone()
1663 };
1664
1665 let reflowed = reflow_line(&combined_content, &adjusted_options);
1666 for (j, reflowed_line) in reflowed.iter().enumerate() {
1667 if j == 0 {
1668 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1669 } else {
1670 let continuation_indent = " ".repeat(continuation_spaces);
1672 result.push(format!("{continuation_indent}{reflowed_line}"));
1673 }
1674 }
1675 continue;
1676 }
1677
1678 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
1680 result.push(line.to_string());
1681 i += 1;
1682 continue;
1683 }
1684
1685 if trimmed.starts_with('[') && line.contains("]:") {
1687 result.push(line.to_string());
1688 i += 1;
1689 continue;
1690 }
1691
1692 if is_definition_list_item(trimmed) {
1694 result.push(line.to_string());
1695 i += 1;
1696 continue;
1697 }
1698
1699 let mut is_single_line_paragraph = true;
1701 if i + 1 < lines.len() {
1702 let next_line = lines[i + 1];
1703 let next_trimmed = next_line.trim();
1704 if !next_trimmed.is_empty()
1706 && !next_trimmed.starts_with('#')
1707 && !next_trimmed.starts_with("```")
1708 && !next_trimmed.starts_with("~~~")
1709 && !next_trimmed.starts_with('>')
1710 && !next_trimmed.starts_with('|')
1711 && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1712 && !is_horizontal_rule(next_trimmed)
1713 && !(next_trimmed.starts_with('-')
1714 && !is_horizontal_rule(next_trimmed)
1715 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1716 && !(next_trimmed.starts_with('*')
1717 && !is_horizontal_rule(next_trimmed)
1718 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1719 && !(next_trimmed.starts_with('+')
1720 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1721 && !is_numbered_list_item(next_trimmed)
1722 {
1723 is_single_line_paragraph = false;
1724 }
1725 }
1726
1727 if is_single_line_paragraph && line.chars().count() <= options.line_length {
1729 result.push(line.to_string());
1730 i += 1;
1731 continue;
1732 }
1733
1734 let mut paragraph_parts = Vec::new();
1736 let mut current_part = vec![line];
1737 i += 1;
1738
1739 if options.preserve_breaks {
1741 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1743 Some("\\")
1744 } else if line.ends_with(" ") {
1745 Some(" ")
1746 } else {
1747 None
1748 };
1749 let reflowed = reflow_line(line, options);
1750
1751 if let Some(break_marker) = hard_break_type {
1753 if !reflowed.is_empty() {
1754 let mut reflowed_with_break = reflowed;
1755 let last_idx = reflowed_with_break.len() - 1;
1756 if !has_hard_break(&reflowed_with_break[last_idx]) {
1757 reflowed_with_break[last_idx].push_str(break_marker);
1758 }
1759 result.extend(reflowed_with_break);
1760 }
1761 } else {
1762 result.extend(reflowed);
1763 }
1764 } else {
1765 while i < lines.len() {
1767 let prev_line = if !current_part.is_empty() {
1768 current_part.last().unwrap()
1769 } else {
1770 ""
1771 };
1772 let next_line = lines[i];
1773 let next_trimmed = next_line.trim();
1774
1775 if next_trimmed.is_empty()
1777 || next_trimmed.starts_with('#')
1778 || next_trimmed.starts_with("```")
1779 || next_trimmed.starts_with("~~~")
1780 || next_trimmed.starts_with('>')
1781 || next_trimmed.starts_with('|')
1782 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1783 || is_horizontal_rule(next_trimmed)
1784 || (next_trimmed.starts_with('-')
1785 && !is_horizontal_rule(next_trimmed)
1786 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1787 || (next_trimmed.starts_with('*')
1788 && !is_horizontal_rule(next_trimmed)
1789 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1790 || (next_trimmed.starts_with('+')
1791 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1792 || is_numbered_list_item(next_trimmed)
1793 || is_definition_list_item(next_trimmed)
1794 {
1795 break;
1796 }
1797
1798 let prev_trimmed = prev_line.trim();
1801 let abbreviations = get_abbreviations(&options.abbreviations);
1802 let ends_with_sentence = (prev_trimmed.ends_with('.')
1803 || prev_trimmed.ends_with('!')
1804 || prev_trimmed.ends_with('?')
1805 || prev_trimmed.ends_with(".*")
1806 || prev_trimmed.ends_with("!*")
1807 || prev_trimmed.ends_with("?*")
1808 || prev_trimmed.ends_with("._")
1809 || prev_trimmed.ends_with("!_")
1810 || prev_trimmed.ends_with("?_"))
1811 && !text_ends_with_abbreviation(prev_trimmed.trim_end_matches(['*', '_']), &abbreviations);
1812
1813 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
1814 paragraph_parts.push(current_part.join(" "));
1816 current_part = vec![next_line];
1817 } else {
1818 current_part.push(next_line);
1819 }
1820 i += 1;
1821 }
1822
1823 if !current_part.is_empty() {
1825 if current_part.len() == 1 {
1826 paragraph_parts.push(current_part[0].to_string());
1828 } else {
1829 paragraph_parts.push(current_part.join(" "));
1830 }
1831 }
1832
1833 for (j, part) in paragraph_parts.iter().enumerate() {
1835 let reflowed = reflow_line(part, options);
1836 result.extend(reflowed);
1837
1838 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
1842 let last_idx = result.len() - 1;
1843 if !has_hard_break(&result[last_idx]) {
1844 result[last_idx].push_str(" ");
1845 }
1846 }
1847 }
1848 }
1849 }
1850
1851 let result_text = result.join("\n");
1853 if content.ends_with('\n') && !result_text.ends_with('\n') {
1854 format!("{result_text}\n")
1855 } else {
1856 result_text
1857 }
1858}
1859
1860#[derive(Debug, Clone)]
1862pub struct ParagraphReflow {
1863 pub start_byte: usize,
1865 pub end_byte: usize,
1867 pub reflowed_text: String,
1869}
1870
1871pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1889 if line_number == 0 {
1890 return None;
1891 }
1892
1893 let lines: Vec<&str> = content.lines().collect();
1894
1895 if line_number > lines.len() {
1897 return None;
1898 }
1899
1900 let target_idx = line_number - 1; let target_line = lines[target_idx];
1902 let trimmed = target_line.trim();
1903
1904 if trimmed.is_empty()
1906 || trimmed.starts_with('#')
1907 || trimmed.starts_with("```")
1908 || trimmed.starts_with("~~~")
1909 || ElementCache::calculate_indentation_width_default(target_line) >= 4
1910 || trimmed.starts_with('>')
1911 || crate::utils::table_utils::TableUtils::is_potential_table_row(target_line) || (trimmed.starts_with('[') && target_line.contains("]:")) || is_horizontal_rule(trimmed)
1914 || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1915 && !is_horizontal_rule(trimmed)
1916 && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1917 || is_numbered_list_item(trimmed)
1918 || is_definition_list_item(trimmed)
1919 {
1920 return None;
1921 }
1922
1923 let mut para_start = target_idx;
1925 while para_start > 0 {
1926 let prev_idx = para_start - 1;
1927 let prev_line = lines[prev_idx];
1928 let prev_trimmed = prev_line.trim();
1929
1930 if prev_trimmed.is_empty()
1932 || prev_trimmed.starts_with('#')
1933 || prev_trimmed.starts_with("```")
1934 || prev_trimmed.starts_with("~~~")
1935 || ElementCache::calculate_indentation_width_default(prev_line) >= 4
1936 || prev_trimmed.starts_with('>')
1937 || crate::utils::table_utils::TableUtils::is_potential_table_row(prev_line)
1938 || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1939 || is_horizontal_rule(prev_trimmed)
1940 || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1941 && !is_horizontal_rule(prev_trimmed)
1942 && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1943 || is_numbered_list_item(prev_trimmed)
1944 || is_definition_list_item(prev_trimmed)
1945 {
1946 break;
1947 }
1948
1949 para_start = prev_idx;
1950 }
1951
1952 let mut para_end = target_idx;
1954 while para_end + 1 < lines.len() {
1955 let next_idx = para_end + 1;
1956 let next_line = lines[next_idx];
1957 let next_trimmed = next_line.trim();
1958
1959 if next_trimmed.is_empty()
1961 || next_trimmed.starts_with('#')
1962 || next_trimmed.starts_with("```")
1963 || next_trimmed.starts_with("~~~")
1964 || ElementCache::calculate_indentation_width_default(next_line) >= 4
1965 || next_trimmed.starts_with('>')
1966 || crate::utils::table_utils::TableUtils::is_potential_table_row(next_line)
1967 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1968 || is_horizontal_rule(next_trimmed)
1969 || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1970 && !is_horizontal_rule(next_trimmed)
1971 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1972 || is_numbered_list_item(next_trimmed)
1973 || is_definition_list_item(next_trimmed)
1974 {
1975 break;
1976 }
1977
1978 para_end = next_idx;
1979 }
1980
1981 let paragraph_lines = &lines[para_start..=para_end];
1983
1984 let mut start_byte = 0;
1986 for line in lines.iter().take(para_start) {
1987 start_byte += line.len() + 1; }
1989
1990 let mut end_byte = start_byte;
1991 for line in paragraph_lines.iter() {
1992 end_byte += line.len() + 1; }
1994
1995 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1998
1999 if !includes_trailing_newline {
2001 end_byte -= 1;
2002 }
2003
2004 let paragraph_text = paragraph_lines.join("\n");
2006
2007 let options = ReflowOptions {
2009 line_length,
2010 break_on_sentences: true,
2011 preserve_breaks: false,
2012 sentence_per_line: false,
2013 abbreviations: None,
2014 };
2015
2016 let reflowed = reflow_markdown(¶graph_text, &options);
2018
2019 let reflowed_text = if includes_trailing_newline {
2023 if reflowed.ends_with('\n') {
2025 reflowed
2026 } else {
2027 format!("{reflowed}\n")
2028 }
2029 } else {
2030 if reflowed.ends_with('\n') {
2032 reflowed.trim_end_matches('\n').to_string()
2033 } else {
2034 reflowed
2035 }
2036 };
2037
2038 Some(ParagraphReflow {
2039 start_byte,
2040 end_byte,
2041 reflowed_text,
2042 })
2043}
2044
2045#[cfg(test)]
2046mod tests {
2047 use super::*;
2048
2049 #[test]
2054 fn test_helper_function_text_ends_with_abbreviation() {
2055 let abbreviations = get_abbreviations(&None);
2057
2058 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2060 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2061 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2062 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2063 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2064 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2065 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2066 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2067
2068 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2070 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2071 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2072 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2073 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2074 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
2080}