1use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::regex_cache::{
9 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
10 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
11 LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
12 REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
13};
14use crate::utils::sentence_utils::{
15 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
16 text_ends_with_abbreviation,
17};
18use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
19use std::collections::HashSet;
20
21#[derive(Clone)]
23pub struct ReflowOptions {
24 pub line_length: usize,
26 pub break_on_sentences: bool,
28 pub preserve_breaks: bool,
30 pub sentence_per_line: bool,
32 pub abbreviations: Option<Vec<String>>,
36}
37
38impl Default for ReflowOptions {
39 fn default() -> Self {
40 Self {
41 line_length: 80,
42 break_on_sentences: true,
43 preserve_breaks: false,
44 sentence_per_line: false,
45 abbreviations: None,
46 }
47 }
48}
49
50fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
54 let chars: Vec<char> = text.chars().collect();
55
56 if pos + 1 >= chars.len() {
57 return false;
58 }
59
60 let c = chars[pos];
61 let next_char = chars[pos + 1];
62
63 if is_cjk_sentence_ending(c) {
66 let mut after_punct_pos = pos + 1;
68 while after_punct_pos < chars.len()
69 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
70 {
71 after_punct_pos += 1;
72 }
73
74 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
76 after_punct_pos += 1;
77 }
78
79 if after_punct_pos >= chars.len() {
81 return false;
82 }
83
84 while after_punct_pos < chars.len()
86 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
87 {
88 after_punct_pos += 1;
89 }
90
91 if after_punct_pos >= chars.len() {
92 return false;
93 }
94
95 return true;
98 }
99
100 if c != '.' && c != '!' && c != '?' {
102 return false;
103 }
104
105 let (_space_pos, after_space_pos) = if next_char == ' ' {
107 (pos + 1, pos + 2)
109 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
110 if chars[pos + 2] == ' ' {
112 (pos + 2, pos + 3)
114 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
115 (pos + 3, pos + 4)
117 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
118 && pos + 4 < chars.len()
119 && chars[pos + 3] == chars[pos + 2]
120 && chars[pos + 4] == ' '
121 {
122 (pos + 4, pos + 5)
124 } else {
125 return false;
126 }
127 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
128 (pos + 2, pos + 3)
130 } else if (next_char == '*' || next_char == '_')
131 && pos + 3 < chars.len()
132 && chars[pos + 2] == next_char
133 && chars[pos + 3] == ' '
134 {
135 (pos + 3, pos + 4)
137 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
138 (pos + 3, pos + 4)
140 } else {
141 return false;
142 };
143
144 let mut next_char_pos = after_space_pos;
146 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
147 next_char_pos += 1;
148 }
149
150 if next_char_pos >= chars.len() {
152 return false;
153 }
154
155 let mut first_letter_pos = next_char_pos;
157 while first_letter_pos < chars.len()
158 && (chars[first_letter_pos] == '*'
159 || chars[first_letter_pos] == '_'
160 || chars[first_letter_pos] == '~'
161 || is_opening_quote(chars[first_letter_pos]))
162 {
163 first_letter_pos += 1;
164 }
165
166 if first_letter_pos >= chars.len() {
168 return false;
169 }
170
171 let first_char = chars[first_letter_pos];
173 if !first_char.is_uppercase() && !is_cjk_char(first_char) {
174 return false;
175 }
176
177 if pos > 0 && c == '.' {
179 if text_ends_with_abbreviation(&text[..=pos], abbreviations) {
182 return false;
183 }
184
185 if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
188 return false;
189 }
190 }
191 true
192}
193
194pub fn split_into_sentences(text: &str) -> Vec<String> {
196 split_into_sentences_custom(text, &None)
197}
198
199pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
201 let abbreviations = get_abbreviations(custom_abbreviations);
202 split_into_sentences_with_set(text, &abbreviations)
203}
204
205fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
208 let mut sentences = Vec::new();
209 let mut current_sentence = String::new();
210 let mut chars = text.chars().peekable();
211 let mut pos = 0;
212
213 while let Some(c) = chars.next() {
214 current_sentence.push(c);
215
216 if is_sentence_boundary(text, pos, abbreviations) {
217 while let Some(&next) = chars.peek() {
219 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
220 current_sentence.push(chars.next().unwrap());
221 pos += 1;
222 } else {
223 break;
224 }
225 }
226
227 if chars.peek() == Some(&' ') {
229 chars.next();
230 pos += 1;
231 }
232
233 sentences.push(current_sentence.trim().to_string());
234 current_sentence.clear();
235 }
236
237 pos += 1;
238 }
239
240 if !current_sentence.trim().is_empty() {
242 sentences.push(current_sentence.trim().to_string());
243 }
244 sentences
245}
246
247fn is_horizontal_rule(line: &str) -> bool {
249 if line.len() < 3 {
250 return false;
251 }
252
253 let chars: Vec<char> = line.chars().collect();
255 if chars.is_empty() {
256 return false;
257 }
258
259 let first_char = chars[0];
260 if first_char != '-' && first_char != '_' && first_char != '*' {
261 return false;
262 }
263
264 for c in &chars {
266 if *c != first_char && *c != ' ' {
267 return false;
268 }
269 }
270
271 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
273 non_space_count >= 3
274}
275
276fn is_numbered_list_item(line: &str) -> bool {
278 let mut chars = line.chars();
279
280 if !chars.next().is_some_and(|c| c.is_numeric()) {
282 return false;
283 }
284
285 while let Some(c) = chars.next() {
287 if c == '.' {
288 return chars.next() == Some(' ');
291 }
292 if !c.is_numeric() {
293 return false;
294 }
295 }
296
297 false
298}
299
300fn has_hard_break(line: &str) -> bool {
306 let line = line.strip_suffix('\r').unwrap_or(line);
307 line.ends_with(" ") || line.ends_with('\\')
308}
309
310fn trim_preserving_hard_break(s: &str) -> String {
316 let s = s.strip_suffix('\r').unwrap_or(s);
318
319 if s.ends_with('\\') {
321 return s.to_string();
323 }
324
325 if s.ends_with(" ") {
327 let content_end = s.trim_end().len();
329 if content_end == 0 {
330 return String::new();
332 }
333 format!("{} ", &s[..content_end])
335 } else {
336 s.trim_end().to_string()
338 }
339}
340
341pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
342 if options.sentence_per_line {
344 let elements = parse_markdown_elements(line);
345 return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
346 }
347
348 if options.line_length == 0 || line.chars().count() <= options.line_length {
351 return vec![line.to_string()];
352 }
353
354 let elements = parse_markdown_elements(line);
356
357 reflow_elements(&elements, options)
359}
360
361#[derive(Debug, Clone)]
363enum LinkedImageSource {
364 Inline(String),
366 Reference(String),
368}
369
370#[derive(Debug, Clone)]
372enum LinkedImageTarget {
373 Inline(String),
375 Reference(String),
377}
378
379#[derive(Debug, Clone)]
381enum Element {
382 Text(String),
384 Link { text: String, url: String },
386 ReferenceLink { text: String, reference: String },
388 EmptyReferenceLink { text: String },
390 ShortcutReference { reference: String },
392 InlineImage { alt: String, url: String },
394 ReferenceImage { alt: String, reference: String },
396 EmptyReferenceImage { alt: String },
398 LinkedImage {
404 alt: String,
405 img_source: LinkedImageSource,
406 link_target: LinkedImageTarget,
407 },
408 FootnoteReference { note: String },
410 Strikethrough(String),
412 WikiLink(String),
414 InlineMath(String),
416 DisplayMath(String),
418 EmojiShortcode(String),
420 HtmlTag(String),
422 HtmlEntity(String),
424 HugoShortcode(String),
426 Code(String),
428 Bold {
430 content: String,
431 underscore: bool,
433 },
434 Italic {
436 content: String,
437 underscore: bool,
439 },
440}
441
442impl std::fmt::Display for Element {
443 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
444 match self {
445 Element::Text(s) => write!(f, "{s}"),
446 Element::Link { text, url } => write!(f, "[{text}]({url})"),
447 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
448 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
449 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
450 Element::InlineImage { alt, url } => write!(f, ""),
451 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
452 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
453 Element::LinkedImage {
454 alt,
455 img_source,
456 link_target,
457 } => {
458 let img_part = match img_source {
460 LinkedImageSource::Inline(url) => format!(""),
461 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
462 };
463 match link_target {
465 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
466 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
467 }
468 }
469 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
470 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
471 Element::WikiLink(s) => write!(f, "[[{s}]]"),
472 Element::InlineMath(s) => write!(f, "${s}$"),
473 Element::DisplayMath(s) => write!(f, "$${s}$$"),
474 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
475 Element::HtmlTag(s) => write!(f, "{s}"),
476 Element::HtmlEntity(s) => write!(f, "{s}"),
477 Element::HugoShortcode(s) => write!(f, "{s}"),
478 Element::Code(s) => write!(f, "`{s}`"),
479 Element::Bold { content, underscore } => {
480 if *underscore {
481 write!(f, "__{content}__")
482 } else {
483 write!(f, "**{content}**")
484 }
485 }
486 Element::Italic { content, underscore } => {
487 if *underscore {
488 write!(f, "_{content}_")
489 } else {
490 write!(f, "*{content}*")
491 }
492 }
493 }
494 }
495}
496
497impl Element {
498 fn len(&self) -> usize {
499 match self {
500 Element::Text(s) => s.chars().count(),
501 Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, Element::EmptyReferenceLink { text } => text.chars().count() + 4, Element::ShortcutReference { reference } => reference.chars().count() + 2, Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, Element::LinkedImage {
509 alt,
510 img_source,
511 link_target,
512 } => {
513 let alt_len = alt.chars().count();
516 let img_len = match img_source {
517 LinkedImageSource::Inline(url) => url.chars().count() + 2, LinkedImageSource::Reference(r) => r.chars().count() + 2, };
520 let link_len = match link_target {
521 LinkedImageTarget::Inline(url) => url.chars().count() + 2, LinkedImageTarget::Reference(r) => r.chars().count() + 2, };
524 5 + alt_len + img_len + link_len
527 }
528 Element::FootnoteReference { note } => note.chars().count() + 3, Element::Strikethrough(s) => s.chars().count() + 4, Element::WikiLink(s) => s.chars().count() + 4, Element::InlineMath(s) => s.chars().count() + 2, Element::DisplayMath(s) => s.chars().count() + 4, Element::EmojiShortcode(s) => s.chars().count() + 2, Element::HtmlTag(s) => s.chars().count(), Element::HtmlEntity(s) => s.chars().count(), Element::HugoShortcode(s) => s.chars().count(), Element::Code(s) => s.chars().count() + 2, Element::Bold { content, .. } => content.chars().count() + 4, Element::Italic { content, .. } => content.chars().count() + 2, }
541 }
542}
543
544#[derive(Debug, Clone)]
546struct EmphasisSpan {
547 start: usize,
549 end: usize,
551 content: String,
553 is_strong: bool,
555 is_strikethrough: bool,
557 uses_underscore: bool,
559}
560
561fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
571 let mut spans = Vec::new();
572 let mut options = Options::empty();
573 options.insert(Options::ENABLE_STRIKETHROUGH);
574
575 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
578 let mut strikethrough_stack: Vec<usize> = Vec::new();
579
580 let parser = Parser::new_ext(text, options).into_offset_iter();
581
582 for (event, range) in parser {
583 match event {
584 Event::Start(Tag::Emphasis) => {
585 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
587 emphasis_stack.push((range.start, uses_underscore));
588 }
589 Event::End(TagEnd::Emphasis) => {
590 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
591 let content_start = start_byte + 1;
593 let content_end = range.end - 1;
594 if content_end > content_start
595 && let Some(content) = text.get(content_start..content_end)
596 {
597 spans.push(EmphasisSpan {
598 start: start_byte,
599 end: range.end,
600 content: content.to_string(),
601 is_strong: false,
602 is_strikethrough: false,
603 uses_underscore,
604 });
605 }
606 }
607 }
608 Event::Start(Tag::Strong) => {
609 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
611 strong_stack.push((range.start, uses_underscore));
612 }
613 Event::End(TagEnd::Strong) => {
614 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
615 let content_start = start_byte + 2;
617 let content_end = range.end - 2;
618 if content_end > content_start
619 && let Some(content) = text.get(content_start..content_end)
620 {
621 spans.push(EmphasisSpan {
622 start: start_byte,
623 end: range.end,
624 content: content.to_string(),
625 is_strong: true,
626 is_strikethrough: false,
627 uses_underscore,
628 });
629 }
630 }
631 }
632 Event::Start(Tag::Strikethrough) => {
633 strikethrough_stack.push(range.start);
634 }
635 Event::End(TagEnd::Strikethrough) => {
636 if let Some(start_byte) = strikethrough_stack.pop() {
637 let content_start = start_byte + 2;
639 let content_end = range.end - 2;
640 if content_end > content_start
641 && let Some(content) = text.get(content_start..content_end)
642 {
643 spans.push(EmphasisSpan {
644 start: start_byte,
645 end: range.end,
646 content: content.to_string(),
647 is_strong: false,
648 is_strikethrough: true,
649 uses_underscore: false,
650 });
651 }
652 }
653 }
654 _ => {}
655 }
656 }
657
658 spans.sort_by_key(|s| s.start);
660 spans
661}
662
663fn parse_markdown_elements(text: &str) -> Vec<Element> {
674 let mut elements = Vec::new();
675 let mut remaining = text;
676
677 let emphasis_spans = extract_emphasis_spans(text);
679
680 while !remaining.is_empty() {
681 let current_offset = text.len() - remaining.len();
683 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
685
686 if remaining.contains("[!") {
690 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
692 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
693 {
694 earliest_match = Some((m.start(), "linked_image_ii", m));
695 }
696
697 if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
699 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
700 {
701 earliest_match = Some((m.start(), "linked_image_ri", m));
702 }
703
704 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
706 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
707 {
708 earliest_match = Some((m.start(), "linked_image_ir", m));
709 }
710
711 if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
713 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
714 {
715 earliest_match = Some((m.start(), "linked_image_rr", m));
716 }
717 }
718
719 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
722 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
723 {
724 earliest_match = Some((m.start(), "inline_image", m));
725 }
726
727 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
729 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
730 {
731 earliest_match = Some((m.start(), "ref_image", m));
732 }
733
734 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
736 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
737 {
738 earliest_match = Some((m.start(), "footnote_ref", m));
739 }
740
741 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
743 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
744 {
745 earliest_match = Some((m.start(), "inline_link", m));
746 }
747
748 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
750 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
751 {
752 earliest_match = Some((m.start(), "ref_link", m));
753 }
754
755 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
758 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
759 {
760 earliest_match = Some((m.start(), "shortcut_ref", m));
761 }
762
763 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
765 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
766 {
767 earliest_match = Some((m.start(), "wiki_link", m));
768 }
769
770 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
772 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
773 {
774 earliest_match = Some((m.start(), "display_math", m));
775 }
776
777 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
779 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
780 {
781 earliest_match = Some((m.start(), "inline_math", m));
782 }
783
784 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
788 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
789 {
790 earliest_match = Some((m.start(), "emoji", m));
791 }
792
793 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
795 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
796 {
797 earliest_match = Some((m.start(), "html_entity", m));
798 }
799
800 if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
803 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
804 {
805 earliest_match = Some((m.start(), "hugo_shortcode", m));
806 }
807
808 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
811 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
812 {
813 let matched_text = &remaining[m.start()..m.end()];
815 let is_url_autolink = matched_text.starts_with("<http://")
816 || matched_text.starts_with("<https://")
817 || matched_text.starts_with("<mailto:")
818 || matched_text.starts_with("<ftp://")
819 || matched_text.starts_with("<ftps://");
820
821 let is_email_autolink = {
824 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
825 EMAIL_PATTERN.is_match(content)
826 };
827
828 if !is_url_autolink && !is_email_autolink {
829 earliest_match = Some((m.start(), "html_tag", m));
830 }
831 }
832
833 let mut next_special = remaining.len();
835 let mut special_type = "";
836 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
837
838 if let Some(pos) = remaining.find('`')
840 && pos < next_special
841 {
842 next_special = pos;
843 special_type = "code";
844 }
845
846 for span in &emphasis_spans {
849 if span.start >= current_offset && span.start < current_offset + remaining.len() {
850 let pos_in_remaining = span.start - current_offset;
851 if pos_in_remaining < next_special {
852 next_special = pos_in_remaining;
853 special_type = "pulldown_emphasis";
854 pulldown_emphasis = Some(span);
855 }
856 break; }
858 }
859
860 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
862 pos < next_special
863 } else {
864 false
865 };
866
867 if should_process_markdown_link {
868 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
869
870 if pos > 0 {
872 elements.push(Element::Text(remaining[..pos].to_string()));
873 }
874
875 match pattern_type {
877 "linked_image_ii" => {
879 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
880 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
881 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
882 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
883 elements.push(Element::LinkedImage {
884 alt: alt.to_string(),
885 img_source: LinkedImageSource::Inline(img_url.to_string()),
886 link_target: LinkedImageTarget::Inline(link_url.to_string()),
887 });
888 remaining = &remaining[match_obj.end()..];
889 } else {
890 elements.push(Element::Text("[".to_string()));
891 remaining = &remaining[1..];
892 }
893 }
894 "linked_image_ri" => {
896 if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
897 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
898 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
899 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
900 elements.push(Element::LinkedImage {
901 alt: alt.to_string(),
902 img_source: LinkedImageSource::Reference(img_ref.to_string()),
903 link_target: LinkedImageTarget::Inline(link_url.to_string()),
904 });
905 remaining = &remaining[match_obj.end()..];
906 } else {
907 elements.push(Element::Text("[".to_string()));
908 remaining = &remaining[1..];
909 }
910 }
911 "linked_image_ir" => {
913 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
914 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
915 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
916 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
917 elements.push(Element::LinkedImage {
918 alt: alt.to_string(),
919 img_source: LinkedImageSource::Inline(img_url.to_string()),
920 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
921 });
922 remaining = &remaining[match_obj.end()..];
923 } else {
924 elements.push(Element::Text("[".to_string()));
925 remaining = &remaining[1..];
926 }
927 }
928 "linked_image_rr" => {
930 if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
931 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
932 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
933 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
934 elements.push(Element::LinkedImage {
935 alt: alt.to_string(),
936 img_source: LinkedImageSource::Reference(img_ref.to_string()),
937 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
938 });
939 remaining = &remaining[match_obj.end()..];
940 } else {
941 elements.push(Element::Text("[".to_string()));
942 remaining = &remaining[1..];
943 }
944 }
945 "inline_image" => {
946 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
947 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
948 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
949 elements.push(Element::InlineImage {
950 alt: alt.to_string(),
951 url: url.to_string(),
952 });
953 remaining = &remaining[match_obj.end()..];
954 } else {
955 elements.push(Element::Text("!".to_string()));
956 remaining = &remaining[1..];
957 }
958 }
959 "ref_image" => {
960 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
961 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
962 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
963
964 if reference.is_empty() {
965 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
966 } else {
967 elements.push(Element::ReferenceImage {
968 alt: alt.to_string(),
969 reference: reference.to_string(),
970 });
971 }
972 remaining = &remaining[match_obj.end()..];
973 } else {
974 elements.push(Element::Text("!".to_string()));
975 remaining = &remaining[1..];
976 }
977 }
978 "footnote_ref" => {
979 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
980 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
981 elements.push(Element::FootnoteReference { note: note.to_string() });
982 remaining = &remaining[match_obj.end()..];
983 } else {
984 elements.push(Element::Text("[".to_string()));
985 remaining = &remaining[1..];
986 }
987 }
988 "inline_link" => {
989 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
990 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
991 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
992 elements.push(Element::Link {
993 text: text.to_string(),
994 url: url.to_string(),
995 });
996 remaining = &remaining[match_obj.end()..];
997 } else {
998 elements.push(Element::Text("[".to_string()));
1000 remaining = &remaining[1..];
1001 }
1002 }
1003 "ref_link" => {
1004 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1005 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1006 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1007
1008 if reference.is_empty() {
1009 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1011 } else {
1012 elements.push(Element::ReferenceLink {
1014 text: text.to_string(),
1015 reference: reference.to_string(),
1016 });
1017 }
1018 remaining = &remaining[match_obj.end()..];
1019 } else {
1020 elements.push(Element::Text("[".to_string()));
1022 remaining = &remaining[1..];
1023 }
1024 }
1025 "shortcut_ref" => {
1026 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1027 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1028 elements.push(Element::ShortcutReference {
1029 reference: reference.to_string(),
1030 });
1031 remaining = &remaining[match_obj.end()..];
1032 } else {
1033 elements.push(Element::Text("[".to_string()));
1035 remaining = &remaining[1..];
1036 }
1037 }
1038 "wiki_link" => {
1039 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1040 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1041 elements.push(Element::WikiLink(content.to_string()));
1042 remaining = &remaining[match_obj.end()..];
1043 } else {
1044 elements.push(Element::Text("[[".to_string()));
1045 remaining = &remaining[2..];
1046 }
1047 }
1048 "display_math" => {
1049 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1050 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1051 elements.push(Element::DisplayMath(math.to_string()));
1052 remaining = &remaining[match_obj.end()..];
1053 } else {
1054 elements.push(Element::Text("$$".to_string()));
1055 remaining = &remaining[2..];
1056 }
1057 }
1058 "inline_math" => {
1059 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1060 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1061 elements.push(Element::InlineMath(math.to_string()));
1062 remaining = &remaining[match_obj.end()..];
1063 } else {
1064 elements.push(Element::Text("$".to_string()));
1065 remaining = &remaining[1..];
1066 }
1067 }
1068 "emoji" => {
1070 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1071 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1072 elements.push(Element::EmojiShortcode(emoji.to_string()));
1073 remaining = &remaining[match_obj.end()..];
1074 } else {
1075 elements.push(Element::Text(":".to_string()));
1076 remaining = &remaining[1..];
1077 }
1078 }
1079 "html_entity" => {
1080 elements.push(Element::HtmlEntity(match_obj.as_str().to_string()));
1082 remaining = &remaining[match_obj.end()..];
1083 }
1084 "hugo_shortcode" => {
1085 elements.push(Element::HugoShortcode(match_obj.as_str().to_string()));
1087 remaining = &remaining[match_obj.end()..];
1088 }
1089 "html_tag" => {
1090 elements.push(Element::HtmlTag(match_obj.as_str().to_string()));
1092 remaining = &remaining[match_obj.end()..];
1093 }
1094 _ => {
1095 elements.push(Element::Text("[".to_string()));
1097 remaining = &remaining[1..];
1098 }
1099 }
1100 } else {
1101 if next_special > 0 && next_special < remaining.len() {
1105 elements.push(Element::Text(remaining[..next_special].to_string()));
1106 remaining = &remaining[next_special..];
1107 }
1108
1109 match special_type {
1111 "code" => {
1112 if let Some(code_end) = remaining[1..].find('`') {
1114 let code = &remaining[1..1 + code_end];
1115 elements.push(Element::Code(code.to_string()));
1116 remaining = &remaining[1 + code_end + 1..];
1117 } else {
1118 elements.push(Element::Text(remaining.to_string()));
1120 break;
1121 }
1122 }
1123 "pulldown_emphasis" => {
1124 if let Some(span) = pulldown_emphasis {
1126 let span_len = span.end - span.start;
1127 if span.is_strikethrough {
1128 elements.push(Element::Strikethrough(span.content.clone()));
1129 } else if span.is_strong {
1130 elements.push(Element::Bold {
1131 content: span.content.clone(),
1132 underscore: span.uses_underscore,
1133 });
1134 } else {
1135 elements.push(Element::Italic {
1136 content: span.content.clone(),
1137 underscore: span.uses_underscore,
1138 });
1139 }
1140 remaining = &remaining[span_len..];
1141 } else {
1142 elements.push(Element::Text(remaining[..1].to_string()));
1144 remaining = &remaining[1..];
1145 }
1146 }
1147 _ => {
1148 elements.push(Element::Text(remaining.to_string()));
1150 break;
1151 }
1152 }
1153 }
1154 }
1155
1156 elements
1157}
1158
1159fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1161 let abbreviations = get_abbreviations(custom_abbreviations);
1162 let mut lines = Vec::new();
1163 let mut current_line = String::new();
1164
1165 for element in elements.iter() {
1166 let element_str = format!("{element}");
1167
1168 if let Element::Text(text) = element {
1170 let combined = format!("{current_line}{text}");
1172 let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1174
1175 if sentences.len() > 1 {
1176 for (i, sentence) in sentences.iter().enumerate() {
1178 if i == 0 {
1179 let trimmed = sentence.trim();
1182
1183 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1184 current_line = sentence.to_string();
1186 } else {
1187 lines.push(sentence.to_string());
1189 current_line.clear();
1190 }
1191 } else if i == sentences.len() - 1 {
1192 let trimmed = sentence.trim();
1194 let ends_with_sentence_punct =
1195 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1196
1197 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1198 lines.push(sentence.to_string());
1200 current_line.clear();
1201 } else {
1202 current_line = sentence.to_string();
1204 }
1205 } else {
1206 lines.push(sentence.to_string());
1208 }
1209 }
1210 } else {
1211 let trimmed = combined.trim();
1213 let ends_with_sentence_punct =
1214 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1215
1216 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1217 lines.push(trimmed.to_string());
1219 current_line.clear();
1220 } else {
1221 current_line = combined;
1223 }
1224 }
1225 } else if let Element::Italic { content, underscore } = element {
1226 let marker = if *underscore { "_" } else { "*" };
1228 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1229 } else if let Element::Bold { content, underscore } = element {
1230 let marker = if *underscore { "__" } else { "**" };
1232 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1233 } else if let Element::Strikethrough(content) = element {
1234 handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1236 } else {
1237 if !current_line.is_empty()
1240 && !current_line.ends_with(' ')
1241 && !current_line.ends_with('(')
1242 && !current_line.ends_with('[')
1243 {
1244 current_line.push(' ');
1245 }
1246 current_line.push_str(&element_str);
1247 }
1248 }
1249
1250 if !current_line.is_empty() {
1252 lines.push(current_line.trim().to_string());
1253 }
1254 lines
1255}
1256
1257fn handle_emphasis_sentence_split(
1259 content: &str,
1260 marker: &str,
1261 abbreviations: &HashSet<String>,
1262 current_line: &mut String,
1263 lines: &mut Vec<String>,
1264) {
1265 let sentences = split_into_sentences_with_set(content, abbreviations);
1267
1268 if sentences.len() <= 1 {
1269 if !current_line.is_empty()
1271 && !current_line.ends_with(' ')
1272 && !current_line.ends_with('(')
1273 && !current_line.ends_with('[')
1274 {
1275 current_line.push(' ');
1276 }
1277 current_line.push_str(marker);
1278 current_line.push_str(content);
1279 current_line.push_str(marker);
1280
1281 let trimmed = content.trim();
1283 let ends_with_punct = trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1284 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1285 lines.push(current_line.clone());
1286 current_line.clear();
1287 }
1288 } else {
1289 for (i, sentence) in sentences.iter().enumerate() {
1291 let trimmed = sentence.trim();
1292 if trimmed.is_empty() {
1293 continue;
1294 }
1295
1296 if i == 0 {
1297 if !current_line.is_empty()
1299 && !current_line.ends_with(' ')
1300 && !current_line.ends_with('(')
1301 && !current_line.ends_with('[')
1302 {
1303 current_line.push(' ');
1304 }
1305 current_line.push_str(marker);
1306 current_line.push_str(trimmed);
1307 current_line.push_str(marker);
1308
1309 let ends_with_punct = trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1311 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1312 lines.push(current_line.clone());
1313 current_line.clear();
1314 }
1315 } else if i == sentences.len() - 1 {
1316 let ends_with_punct = trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1318
1319 let mut line = String::new();
1320 line.push_str(marker);
1321 line.push_str(trimmed);
1322 line.push_str(marker);
1323
1324 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1325 lines.push(line);
1326 } else {
1327 *current_line = line;
1329 }
1330 } else {
1331 let mut line = String::new();
1333 line.push_str(marker);
1334 line.push_str(trimmed);
1335 line.push_str(marker);
1336 lines.push(line);
1337 }
1338 }
1339 }
1340}
1341
1342fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1344 let mut lines = Vec::new();
1345 let mut current_line = String::new();
1346 let mut current_length = 0;
1347
1348 for element in elements {
1349 let element_str = format!("{element}");
1350 let element_len = element.len();
1351
1352 if let Element::Text(text) = element {
1354 let has_leading_space = text.starts_with(char::is_whitespace);
1356 let words: Vec<&str> = text.split_whitespace().collect();
1358
1359 for (i, word) in words.iter().enumerate() {
1360 let word_len = word.chars().count();
1361 let is_trailing_punct = word
1363 .chars()
1364 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1365
1366 if current_length > 0 && current_length + 1 + word_len > options.line_length && !is_trailing_punct {
1367 lines.push(current_line.trim().to_string());
1369 current_line = word.to_string();
1370 current_length = word_len;
1371 } else {
1372 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1376 current_line.push(' ');
1377 current_length += 1;
1378 }
1379 current_line.push_str(word);
1380 current_length += word_len;
1381 }
1382 }
1383 } else {
1384 if current_length > 0 && current_length + 1 + element_len > options.line_length {
1387 lines.push(current_line.trim().to_string());
1389 current_line = element_str;
1390 current_length = element_len;
1391 } else {
1392 let ends_with_opener =
1395 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1396 if current_length > 0 && !ends_with_opener {
1397 current_line.push(' ');
1398 current_length += 1;
1399 }
1400 current_line.push_str(&element_str);
1401 current_length += element_len;
1402 }
1403 }
1404 }
1405
1406 if !current_line.is_empty() {
1408 lines.push(current_line.trim_end().to_string());
1409 }
1410
1411 lines
1412}
1413
1414pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1416 let lines: Vec<&str> = content.lines().collect();
1417 let mut result = Vec::new();
1418 let mut i = 0;
1419
1420 while i < lines.len() {
1421 let line = lines[i];
1422 let trimmed = line.trim();
1423
1424 if trimmed.is_empty() {
1426 result.push(String::new());
1427 i += 1;
1428 continue;
1429 }
1430
1431 if trimmed.starts_with('#') {
1433 result.push(line.to_string());
1434 i += 1;
1435 continue;
1436 }
1437
1438 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1440 result.push(line.to_string());
1441 i += 1;
1442 while i < lines.len() {
1444 result.push(lines[i].to_string());
1445 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
1446 i += 1;
1447 break;
1448 }
1449 i += 1;
1450 }
1451 continue;
1452 }
1453
1454 if ElementCache::calculate_indentation_width_default(line) >= 4 {
1456 result.push(line.to_string());
1458 i += 1;
1459 while i < lines.len() {
1460 let next_line = lines[i];
1461 if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
1463 result.push(next_line.to_string());
1464 i += 1;
1465 } else {
1466 break;
1467 }
1468 }
1469 continue;
1470 }
1471
1472 if trimmed.starts_with('>') {
1474 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
1477 let quote_prefix = line[0..gt_pos + 1].to_string();
1478 let quote_content = &line[quote_prefix.len()..].trim_start();
1479
1480 let reflowed = reflow_line(quote_content, options);
1481 for reflowed_line in reflowed.iter() {
1482 result.push(format!("{quote_prefix} {reflowed_line}"));
1483 }
1484 i += 1;
1485 continue;
1486 }
1487
1488 if is_horizontal_rule(trimmed) {
1490 result.push(line.to_string());
1491 i += 1;
1492 continue;
1493 }
1494
1495 let is_unordered_list = |s: &str, marker: char| -> bool {
1499 s.starts_with(marker) && !is_horizontal_rule(s) && (s.len() == 1 || s.chars().nth(1) == Some(' '))
1500 };
1501 if is_unordered_list(trimmed, '-')
1502 || is_unordered_list(trimmed, '*')
1503 || is_unordered_list(trimmed, '+')
1504 || is_numbered_list_item(trimmed)
1505 {
1506 let indent = line.len() - line.trim_start().len();
1508 let indent_str = " ".repeat(indent);
1509
1510 let mut marker_end = indent;
1513 let mut content_start = indent;
1514
1515 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
1516 if let Some(period_pos) = line[indent..].find('.') {
1518 marker_end = indent + period_pos + 1; content_start = marker_end;
1520 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1524 content_start += 1;
1525 }
1526 }
1527 } else {
1528 marker_end = indent + 1; content_start = marker_end;
1531 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1535 content_start += 1;
1536 }
1537 }
1538
1539 let marker = &line[indent..marker_end];
1540
1541 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
1544 i += 1;
1545
1546 while i < lines.len() {
1548 let next_line = lines[i];
1549 let next_trimmed = next_line.trim();
1550
1551 if next_trimmed.is_empty()
1553 || next_trimmed.starts_with('#')
1554 || next_trimmed.starts_with("```")
1555 || next_trimmed.starts_with("~~~")
1556 || next_trimmed.starts_with('>')
1557 || next_trimmed.starts_with('|')
1558 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1559 || is_horizontal_rule(next_trimmed)
1560 || (next_trimmed.starts_with('-')
1561 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1562 || (next_trimmed.starts_with('*')
1563 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1564 || (next_trimmed.starts_with('+')
1565 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1566 || is_numbered_list_item(next_trimmed)
1567 || is_definition_list_item(next_trimmed)
1568 {
1569 break;
1570 }
1571
1572 let next_indent = next_line.len() - next_line.trim_start().len();
1574 if next_indent >= content_start {
1575 let trimmed_start = next_line.trim_start();
1578 list_content.push(trim_preserving_hard_break(trimmed_start));
1579 i += 1;
1580 } else {
1581 break;
1583 }
1584 }
1585
1586 let combined_content = if options.preserve_breaks {
1589 list_content[0].clone()
1590 } else {
1591 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1593 if has_hard_breaks {
1594 list_content.join("\n")
1596 } else {
1597 list_content.join(" ")
1599 }
1600 };
1601
1602 let trimmed_marker = marker;
1604 let continuation_spaces = content_start;
1605
1606 let prefix_length = indent + trimmed_marker.len() + 1;
1608
1609 let adjusted_options = ReflowOptions {
1611 line_length: options.line_length.saturating_sub(prefix_length),
1612 ..options.clone()
1613 };
1614
1615 let reflowed = reflow_line(&combined_content, &adjusted_options);
1616 for (j, reflowed_line) in reflowed.iter().enumerate() {
1617 if j == 0 {
1618 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1619 } else {
1620 let continuation_indent = " ".repeat(continuation_spaces);
1622 result.push(format!("{continuation_indent}{reflowed_line}"));
1623 }
1624 }
1625 continue;
1626 }
1627
1628 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
1630 result.push(line.to_string());
1631 i += 1;
1632 continue;
1633 }
1634
1635 if trimmed.starts_with('[') && line.contains("]:") {
1637 result.push(line.to_string());
1638 i += 1;
1639 continue;
1640 }
1641
1642 if is_definition_list_item(trimmed) {
1644 result.push(line.to_string());
1645 i += 1;
1646 continue;
1647 }
1648
1649 let mut is_single_line_paragraph = true;
1651 if i + 1 < lines.len() {
1652 let next_line = lines[i + 1];
1653 let next_trimmed = next_line.trim();
1654 if !next_trimmed.is_empty()
1656 && !next_trimmed.starts_with('#')
1657 && !next_trimmed.starts_with("```")
1658 && !next_trimmed.starts_with("~~~")
1659 && !next_trimmed.starts_with('>')
1660 && !next_trimmed.starts_with('|')
1661 && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1662 && !is_horizontal_rule(next_trimmed)
1663 && !(next_trimmed.starts_with('-')
1664 && !is_horizontal_rule(next_trimmed)
1665 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1666 && !(next_trimmed.starts_with('*')
1667 && !is_horizontal_rule(next_trimmed)
1668 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1669 && !(next_trimmed.starts_with('+')
1670 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1671 && !is_numbered_list_item(next_trimmed)
1672 {
1673 is_single_line_paragraph = false;
1674 }
1675 }
1676
1677 if is_single_line_paragraph && line.chars().count() <= options.line_length {
1679 result.push(line.to_string());
1680 i += 1;
1681 continue;
1682 }
1683
1684 let mut paragraph_parts = Vec::new();
1686 let mut current_part = vec![line];
1687 i += 1;
1688
1689 if options.preserve_breaks {
1691 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1693 Some("\\")
1694 } else if line.ends_with(" ") {
1695 Some(" ")
1696 } else {
1697 None
1698 };
1699 let reflowed = reflow_line(line, options);
1700
1701 if let Some(break_marker) = hard_break_type {
1703 if !reflowed.is_empty() {
1704 let mut reflowed_with_break = reflowed;
1705 let last_idx = reflowed_with_break.len() - 1;
1706 if !has_hard_break(&reflowed_with_break[last_idx]) {
1707 reflowed_with_break[last_idx].push_str(break_marker);
1708 }
1709 result.extend(reflowed_with_break);
1710 }
1711 } else {
1712 result.extend(reflowed);
1713 }
1714 } else {
1715 while i < lines.len() {
1717 let prev_line = if !current_part.is_empty() {
1718 current_part.last().unwrap()
1719 } else {
1720 ""
1721 };
1722 let next_line = lines[i];
1723 let next_trimmed = next_line.trim();
1724
1725 if next_trimmed.is_empty()
1727 || next_trimmed.starts_with('#')
1728 || next_trimmed.starts_with("```")
1729 || next_trimmed.starts_with("~~~")
1730 || next_trimmed.starts_with('>')
1731 || next_trimmed.starts_with('|')
1732 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1733 || is_horizontal_rule(next_trimmed)
1734 || (next_trimmed.starts_with('-')
1735 && !is_horizontal_rule(next_trimmed)
1736 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1737 || (next_trimmed.starts_with('*')
1738 && !is_horizontal_rule(next_trimmed)
1739 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1740 || (next_trimmed.starts_with('+')
1741 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1742 || is_numbered_list_item(next_trimmed)
1743 || is_definition_list_item(next_trimmed)
1744 {
1745 break;
1746 }
1747
1748 let prev_trimmed = prev_line.trim();
1751 let abbreviations = get_abbreviations(&options.abbreviations);
1752 let ends_with_sentence = (prev_trimmed.ends_with('.')
1753 || prev_trimmed.ends_with('!')
1754 || prev_trimmed.ends_with('?')
1755 || prev_trimmed.ends_with(".*")
1756 || prev_trimmed.ends_with("!*")
1757 || prev_trimmed.ends_with("?*")
1758 || prev_trimmed.ends_with("._")
1759 || prev_trimmed.ends_with("!_")
1760 || prev_trimmed.ends_with("?_")
1761 || prev_trimmed.ends_with(".\"")
1763 || prev_trimmed.ends_with("!\"")
1764 || prev_trimmed.ends_with("?\"")
1765 || prev_trimmed.ends_with(".'")
1766 || prev_trimmed.ends_with("!'")
1767 || prev_trimmed.ends_with("?'")
1768 || prev_trimmed.ends_with(".\u{201D}")
1769 || prev_trimmed.ends_with("!\u{201D}")
1770 || prev_trimmed.ends_with("?\u{201D}")
1771 || prev_trimmed.ends_with(".\u{2019}")
1772 || prev_trimmed.ends_with("!\u{2019}")
1773 || prev_trimmed.ends_with("?\u{2019}"))
1774 && !text_ends_with_abbreviation(
1775 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
1776 &abbreviations,
1777 );
1778
1779 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
1780 paragraph_parts.push(current_part.join(" "));
1782 current_part = vec![next_line];
1783 } else {
1784 current_part.push(next_line);
1785 }
1786 i += 1;
1787 }
1788
1789 if !current_part.is_empty() {
1791 if current_part.len() == 1 {
1792 paragraph_parts.push(current_part[0].to_string());
1794 } else {
1795 paragraph_parts.push(current_part.join(" "));
1796 }
1797 }
1798
1799 for (j, part) in paragraph_parts.iter().enumerate() {
1801 let reflowed = reflow_line(part, options);
1802 result.extend(reflowed);
1803
1804 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
1808 let last_idx = result.len() - 1;
1809 if !has_hard_break(&result[last_idx]) {
1810 result[last_idx].push_str(" ");
1811 }
1812 }
1813 }
1814 }
1815 }
1816
1817 let result_text = result.join("\n");
1819 if content.ends_with('\n') && !result_text.ends_with('\n') {
1820 format!("{result_text}\n")
1821 } else {
1822 result_text
1823 }
1824}
1825
1826#[derive(Debug, Clone)]
1828pub struct ParagraphReflow {
1829 pub start_byte: usize,
1831 pub end_byte: usize,
1833 pub reflowed_text: String,
1835}
1836
1837pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1855 if line_number == 0 {
1856 return None;
1857 }
1858
1859 let lines: Vec<&str> = content.lines().collect();
1860
1861 if line_number > lines.len() {
1863 return None;
1864 }
1865
1866 let target_idx = line_number - 1; let target_line = lines[target_idx];
1868 let trimmed = target_line.trim();
1869
1870 if trimmed.is_empty()
1872 || trimmed.starts_with('#')
1873 || trimmed.starts_with("```")
1874 || trimmed.starts_with("~~~")
1875 || ElementCache::calculate_indentation_width_default(target_line) >= 4
1876 || trimmed.starts_with('>')
1877 || crate::utils::table_utils::TableUtils::is_potential_table_row(target_line) || (trimmed.starts_with('[') && target_line.contains("]:")) || is_horizontal_rule(trimmed)
1880 || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1881 && !is_horizontal_rule(trimmed)
1882 && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1883 || is_numbered_list_item(trimmed)
1884 || is_definition_list_item(trimmed)
1885 {
1886 return None;
1887 }
1888
1889 let mut para_start = target_idx;
1891 while para_start > 0 {
1892 let prev_idx = para_start - 1;
1893 let prev_line = lines[prev_idx];
1894 let prev_trimmed = prev_line.trim();
1895
1896 if prev_trimmed.is_empty()
1898 || prev_trimmed.starts_with('#')
1899 || prev_trimmed.starts_with("```")
1900 || prev_trimmed.starts_with("~~~")
1901 || ElementCache::calculate_indentation_width_default(prev_line) >= 4
1902 || prev_trimmed.starts_with('>')
1903 || crate::utils::table_utils::TableUtils::is_potential_table_row(prev_line)
1904 || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1905 || is_horizontal_rule(prev_trimmed)
1906 || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1907 && !is_horizontal_rule(prev_trimmed)
1908 && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1909 || is_numbered_list_item(prev_trimmed)
1910 || is_definition_list_item(prev_trimmed)
1911 {
1912 break;
1913 }
1914
1915 para_start = prev_idx;
1916 }
1917
1918 let mut para_end = target_idx;
1920 while para_end + 1 < lines.len() {
1921 let next_idx = para_end + 1;
1922 let next_line = lines[next_idx];
1923 let next_trimmed = next_line.trim();
1924
1925 if next_trimmed.is_empty()
1927 || next_trimmed.starts_with('#')
1928 || next_trimmed.starts_with("```")
1929 || next_trimmed.starts_with("~~~")
1930 || ElementCache::calculate_indentation_width_default(next_line) >= 4
1931 || next_trimmed.starts_with('>')
1932 || crate::utils::table_utils::TableUtils::is_potential_table_row(next_line)
1933 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1934 || is_horizontal_rule(next_trimmed)
1935 || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1936 && !is_horizontal_rule(next_trimmed)
1937 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1938 || is_numbered_list_item(next_trimmed)
1939 || is_definition_list_item(next_trimmed)
1940 {
1941 break;
1942 }
1943
1944 para_end = next_idx;
1945 }
1946
1947 let paragraph_lines = &lines[para_start..=para_end];
1949
1950 let mut start_byte = 0;
1952 for line in lines.iter().take(para_start) {
1953 start_byte += line.len() + 1; }
1955
1956 let mut end_byte = start_byte;
1957 for line in paragraph_lines.iter() {
1958 end_byte += line.len() + 1; }
1960
1961 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1964
1965 if !includes_trailing_newline {
1967 end_byte -= 1;
1968 }
1969
1970 let paragraph_text = paragraph_lines.join("\n");
1972
1973 let options = ReflowOptions {
1975 line_length,
1976 break_on_sentences: true,
1977 preserve_breaks: false,
1978 sentence_per_line: false,
1979 abbreviations: None,
1980 };
1981
1982 let reflowed = reflow_markdown(¶graph_text, &options);
1984
1985 let reflowed_text = if includes_trailing_newline {
1989 if reflowed.ends_with('\n') {
1991 reflowed
1992 } else {
1993 format!("{reflowed}\n")
1994 }
1995 } else {
1996 if reflowed.ends_with('\n') {
1998 reflowed.trim_end_matches('\n').to_string()
1999 } else {
2000 reflowed
2001 }
2002 };
2003
2004 Some(ParagraphReflow {
2005 start_byte,
2006 end_byte,
2007 reflowed_text,
2008 })
2009}
2010
2011#[cfg(test)]
2012mod tests {
2013 use super::*;
2014
2015 #[test]
2020 fn test_helper_function_text_ends_with_abbreviation() {
2021 let abbreviations = get_abbreviations(&None);
2023
2024 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2026 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2027 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2028 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2029 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2030 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2031 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2032 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2033
2034 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2036 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2037 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2038 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2039 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2040 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
2046}