1use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::regex_cache::{
9 DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
10 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
11 LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
12 REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
13};
14use crate::utils::sentence_utils::{
15 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
16 text_ends_with_abbreviation,
17};
18use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
19use std::collections::HashSet;
20
21#[derive(Clone)]
23pub struct ReflowOptions {
24 pub line_length: usize,
26 pub break_on_sentences: bool,
28 pub preserve_breaks: bool,
30 pub sentence_per_line: bool,
32 pub abbreviations: Option<Vec<String>>,
36}
37
38impl Default for ReflowOptions {
39 fn default() -> Self {
40 Self {
41 line_length: 80,
42 break_on_sentences: true,
43 preserve_breaks: false,
44 sentence_per_line: false,
45 abbreviations: None,
46 }
47 }
48}
49
50fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
54 let chars: Vec<char> = text.chars().collect();
55
56 if pos + 1 >= chars.len() {
57 return false;
58 }
59
60 let c = chars[pos];
61 let next_char = chars[pos + 1];
62
63 if is_cjk_sentence_ending(c) {
66 let mut after_punct_pos = pos + 1;
68 while after_punct_pos < chars.len()
69 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
70 {
71 after_punct_pos += 1;
72 }
73
74 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
76 after_punct_pos += 1;
77 }
78
79 if after_punct_pos >= chars.len() {
81 return false;
82 }
83
84 while after_punct_pos < chars.len()
86 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
87 {
88 after_punct_pos += 1;
89 }
90
91 if after_punct_pos >= chars.len() {
92 return false;
93 }
94
95 return true;
98 }
99
100 if c != '.' && c != '!' && c != '?' {
102 return false;
103 }
104
105 let (_space_pos, after_space_pos) = if next_char == ' ' {
107 (pos + 1, pos + 2)
109 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
110 if chars[pos + 2] == ' ' {
112 (pos + 2, pos + 3)
114 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
115 (pos + 3, pos + 4)
117 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
118 && pos + 4 < chars.len()
119 && chars[pos + 3] == chars[pos + 2]
120 && chars[pos + 4] == ' '
121 {
122 (pos + 4, pos + 5)
124 } else {
125 return false;
126 }
127 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
128 (pos + 2, pos + 3)
130 } else if (next_char == '*' || next_char == '_')
131 && pos + 3 < chars.len()
132 && chars[pos + 2] == next_char
133 && chars[pos + 3] == ' '
134 {
135 (pos + 3, pos + 4)
137 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
138 (pos + 3, pos + 4)
140 } else {
141 return false;
142 };
143
144 let mut next_char_pos = after_space_pos;
146 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
147 next_char_pos += 1;
148 }
149
150 if next_char_pos >= chars.len() {
152 return false;
153 }
154
155 let mut first_letter_pos = next_char_pos;
157 while first_letter_pos < chars.len()
158 && (chars[first_letter_pos] == '*'
159 || chars[first_letter_pos] == '_'
160 || chars[first_letter_pos] == '~'
161 || is_opening_quote(chars[first_letter_pos]))
162 {
163 first_letter_pos += 1;
164 }
165
166 if first_letter_pos >= chars.len() {
168 return false;
169 }
170
171 let first_char = chars[first_letter_pos];
173 if !first_char.is_uppercase() && !is_cjk_char(first_char) {
174 return false;
175 }
176
177 if pos > 0 && c == '.' {
179 if text_ends_with_abbreviation(&text[..=pos], abbreviations) {
182 return false;
183 }
184
185 if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
188 return false;
189 }
190 }
191 true
192}
193
194pub fn split_into_sentences(text: &str) -> Vec<String> {
196 split_into_sentences_custom(text, &None)
197}
198
199pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
201 let abbreviations = get_abbreviations(custom_abbreviations);
202 split_into_sentences_with_set(text, &abbreviations)
203}
204
205fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
208 let mut sentences = Vec::new();
209 let mut current_sentence = String::new();
210 let mut chars = text.chars().peekable();
211 let mut pos = 0;
212
213 while let Some(c) = chars.next() {
214 current_sentence.push(c);
215
216 if is_sentence_boundary(text, pos, abbreviations) {
217 while let Some(&next) = chars.peek() {
219 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
220 current_sentence.push(chars.next().unwrap());
221 pos += 1;
222 } else {
223 break;
224 }
225 }
226
227 if chars.peek() == Some(&' ') {
229 chars.next();
230 pos += 1;
231 }
232
233 sentences.push(current_sentence.trim().to_string());
234 current_sentence.clear();
235 }
236
237 pos += 1;
238 }
239
240 if !current_sentence.trim().is_empty() {
242 sentences.push(current_sentence.trim().to_string());
243 }
244 sentences
245}
246
247fn is_horizontal_rule(line: &str) -> bool {
249 if line.len() < 3 {
250 return false;
251 }
252
253 let chars: Vec<char> = line.chars().collect();
255 if chars.is_empty() {
256 return false;
257 }
258
259 let first_char = chars[0];
260 if first_char != '-' && first_char != '_' && first_char != '*' {
261 return false;
262 }
263
264 for c in &chars {
266 if *c != first_char && *c != ' ' {
267 return false;
268 }
269 }
270
271 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
273 non_space_count >= 3
274}
275
276fn is_numbered_list_item(line: &str) -> bool {
278 let mut chars = line.chars();
279
280 if !chars.next().is_some_and(|c| c.is_numeric()) {
282 return false;
283 }
284
285 while let Some(c) = chars.next() {
287 if c == '.' {
288 return chars.next().is_none_or(|c| c == ' ');
290 }
291 if !c.is_numeric() {
292 return false;
293 }
294 }
295
296 false
297}
298
299fn has_hard_break(line: &str) -> bool {
305 let line = line.strip_suffix('\r').unwrap_or(line);
306 line.ends_with(" ") || line.ends_with('\\')
307}
308
309fn trim_preserving_hard_break(s: &str) -> String {
315 let s = s.strip_suffix('\r').unwrap_or(s);
317
318 if s.ends_with('\\') {
320 return s.to_string();
322 }
323
324 if s.ends_with(" ") {
326 let content_end = s.trim_end().len();
328 if content_end == 0 {
329 return String::new();
331 }
332 format!("{} ", &s[..content_end])
334 } else {
335 s.trim_end().to_string()
337 }
338}
339
340pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
341 if options.sentence_per_line {
343 let elements = parse_markdown_elements(line);
344 return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
345 }
346
347 if options.line_length == 0 || line.chars().count() <= options.line_length {
350 return vec![line.to_string()];
351 }
352
353 let elements = parse_markdown_elements(line);
355
356 reflow_elements(&elements, options)
358}
359
360#[derive(Debug, Clone)]
362enum LinkedImageSource {
363 Inline(String),
365 Reference(String),
367}
368
369#[derive(Debug, Clone)]
371enum LinkedImageTarget {
372 Inline(String),
374 Reference(String),
376}
377
378#[derive(Debug, Clone)]
380enum Element {
381 Text(String),
383 Link { text: String, url: String },
385 ReferenceLink { text: String, reference: String },
387 EmptyReferenceLink { text: String },
389 ShortcutReference { reference: String },
391 InlineImage { alt: String, url: String },
393 ReferenceImage { alt: String, reference: String },
395 EmptyReferenceImage { alt: String },
397 LinkedImage {
403 alt: String,
404 img_source: LinkedImageSource,
405 link_target: LinkedImageTarget,
406 },
407 FootnoteReference { note: String },
409 Strikethrough(String),
411 WikiLink(String),
413 InlineMath(String),
415 DisplayMath(String),
417 EmojiShortcode(String),
419 HtmlTag(String),
421 HtmlEntity(String),
423 HugoShortcode(String),
425 Code(String),
427 Bold {
429 content: String,
430 underscore: bool,
432 },
433 Italic {
435 content: String,
436 underscore: bool,
438 },
439}
440
441impl std::fmt::Display for Element {
442 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
443 match self {
444 Element::Text(s) => write!(f, "{s}"),
445 Element::Link { text, url } => write!(f, "[{text}]({url})"),
446 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
447 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
448 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
449 Element::InlineImage { alt, url } => write!(f, ""),
450 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
451 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
452 Element::LinkedImage {
453 alt,
454 img_source,
455 link_target,
456 } => {
457 let img_part = match img_source {
459 LinkedImageSource::Inline(url) => format!(""),
460 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
461 };
462 match link_target {
464 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
465 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
466 }
467 }
468 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
469 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
470 Element::WikiLink(s) => write!(f, "[[{s}]]"),
471 Element::InlineMath(s) => write!(f, "${s}$"),
472 Element::DisplayMath(s) => write!(f, "$${s}$$"),
473 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
474 Element::HtmlTag(s) => write!(f, "{s}"),
475 Element::HtmlEntity(s) => write!(f, "{s}"),
476 Element::HugoShortcode(s) => write!(f, "{s}"),
477 Element::Code(s) => write!(f, "`{s}`"),
478 Element::Bold { content, underscore } => {
479 if *underscore {
480 write!(f, "__{content}__")
481 } else {
482 write!(f, "**{content}**")
483 }
484 }
485 Element::Italic { content, underscore } => {
486 if *underscore {
487 write!(f, "_{content}_")
488 } else {
489 write!(f, "*{content}*")
490 }
491 }
492 }
493 }
494}
495
496impl Element {
497 fn len(&self) -> usize {
498 match self {
499 Element::Text(s) => s.chars().count(),
500 Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, Element::EmptyReferenceLink { text } => text.chars().count() + 4, Element::ShortcutReference { reference } => reference.chars().count() + 2, Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, Element::LinkedImage {
508 alt,
509 img_source,
510 link_target,
511 } => {
512 let alt_len = alt.chars().count();
515 let img_len = match img_source {
516 LinkedImageSource::Inline(url) => url.chars().count() + 2, LinkedImageSource::Reference(r) => r.chars().count() + 2, };
519 let link_len = match link_target {
520 LinkedImageTarget::Inline(url) => url.chars().count() + 2, LinkedImageTarget::Reference(r) => r.chars().count() + 2, };
523 5 + alt_len + img_len + link_len
526 }
527 Element::FootnoteReference { note } => note.chars().count() + 3, Element::Strikethrough(s) => s.chars().count() + 4, Element::WikiLink(s) => s.chars().count() + 4, Element::InlineMath(s) => s.chars().count() + 2, Element::DisplayMath(s) => s.chars().count() + 4, Element::EmojiShortcode(s) => s.chars().count() + 2, Element::HtmlTag(s) => s.chars().count(), Element::HtmlEntity(s) => s.chars().count(), Element::HugoShortcode(s) => s.chars().count(), Element::Code(s) => s.chars().count() + 2, Element::Bold { content, .. } => content.chars().count() + 4, Element::Italic { content, .. } => content.chars().count() + 2, }
540 }
541}
542
543#[derive(Debug, Clone)]
545struct EmphasisSpan {
546 start: usize,
548 end: usize,
550 content: String,
552 is_strong: bool,
554 is_strikethrough: bool,
556 uses_underscore: bool,
558}
559
560fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
570 let mut spans = Vec::new();
571 let mut options = Options::empty();
572 options.insert(Options::ENABLE_STRIKETHROUGH);
573
574 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
577 let mut strikethrough_stack: Vec<usize> = Vec::new();
578
579 let parser = Parser::new_ext(text, options).into_offset_iter();
580
581 for (event, range) in parser {
582 match event {
583 Event::Start(Tag::Emphasis) => {
584 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
586 emphasis_stack.push((range.start, uses_underscore));
587 }
588 Event::End(TagEnd::Emphasis) => {
589 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
590 let content_start = start_byte + 1;
592 let content_end = range.end - 1;
593 if content_end > content_start
594 && let Some(content) = text.get(content_start..content_end)
595 {
596 spans.push(EmphasisSpan {
597 start: start_byte,
598 end: range.end,
599 content: content.to_string(),
600 is_strong: false,
601 is_strikethrough: false,
602 uses_underscore,
603 });
604 }
605 }
606 }
607 Event::Start(Tag::Strong) => {
608 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
610 strong_stack.push((range.start, uses_underscore));
611 }
612 Event::End(TagEnd::Strong) => {
613 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
614 let content_start = start_byte + 2;
616 let content_end = range.end - 2;
617 if content_end > content_start
618 && let Some(content) = text.get(content_start..content_end)
619 {
620 spans.push(EmphasisSpan {
621 start: start_byte,
622 end: range.end,
623 content: content.to_string(),
624 is_strong: true,
625 is_strikethrough: false,
626 uses_underscore,
627 });
628 }
629 }
630 }
631 Event::Start(Tag::Strikethrough) => {
632 strikethrough_stack.push(range.start);
633 }
634 Event::End(TagEnd::Strikethrough) => {
635 if let Some(start_byte) = strikethrough_stack.pop() {
636 let content_start = start_byte + 2;
638 let content_end = range.end - 2;
639 if content_end > content_start
640 && let Some(content) = text.get(content_start..content_end)
641 {
642 spans.push(EmphasisSpan {
643 start: start_byte,
644 end: range.end,
645 content: content.to_string(),
646 is_strong: false,
647 is_strikethrough: true,
648 uses_underscore: false,
649 });
650 }
651 }
652 }
653 _ => {}
654 }
655 }
656
657 spans.sort_by_key(|s| s.start);
659 spans
660}
661
662fn parse_markdown_elements(text: &str) -> Vec<Element> {
673 let mut elements = Vec::new();
674 let mut remaining = text;
675
676 let emphasis_spans = extract_emphasis_spans(text);
678
679 while !remaining.is_empty() {
680 let current_offset = text.len() - remaining.len();
682 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
684
685 if remaining.contains("[!") {
689 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
691 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
692 {
693 earliest_match = Some((m.start(), "linked_image_ii", m));
694 }
695
696 if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
698 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
699 {
700 earliest_match = Some((m.start(), "linked_image_ri", m));
701 }
702
703 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
705 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
706 {
707 earliest_match = Some((m.start(), "linked_image_ir", m));
708 }
709
710 if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
712 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
713 {
714 earliest_match = Some((m.start(), "linked_image_rr", m));
715 }
716 }
717
718 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
721 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
722 {
723 earliest_match = Some((m.start(), "inline_image", m));
724 }
725
726 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
728 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
729 {
730 earliest_match = Some((m.start(), "ref_image", m));
731 }
732
733 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
735 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
736 {
737 earliest_match = Some((m.start(), "footnote_ref", m));
738 }
739
740 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
742 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
743 {
744 earliest_match = Some((m.start(), "inline_link", m));
745 }
746
747 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
749 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
750 {
751 earliest_match = Some((m.start(), "ref_link", m));
752 }
753
754 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
757 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
758 {
759 earliest_match = Some((m.start(), "shortcut_ref", m));
760 }
761
762 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
764 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
765 {
766 earliest_match = Some((m.start(), "wiki_link", m));
767 }
768
769 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
771 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
772 {
773 earliest_match = Some((m.start(), "display_math", m));
774 }
775
776 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
778 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
779 {
780 earliest_match = Some((m.start(), "inline_math", m));
781 }
782
783 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
787 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
788 {
789 earliest_match = Some((m.start(), "emoji", m));
790 }
791
792 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
794 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
795 {
796 earliest_match = Some((m.start(), "html_entity", m));
797 }
798
799 if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
802 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
803 {
804 earliest_match = Some((m.start(), "hugo_shortcode", m));
805 }
806
807 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
810 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
811 {
812 let matched_text = &remaining[m.start()..m.end()];
814 let is_autolink = matched_text.starts_with("<http://")
815 || matched_text.starts_with("<https://")
816 || matched_text.starts_with("<mailto:")
817 || matched_text.starts_with("<ftp://")
818 || matched_text.starts_with("<ftps://");
819
820 if !is_autolink {
821 earliest_match = Some((m.start(), "html_tag", m));
822 }
823 }
824
825 let mut next_special = remaining.len();
827 let mut special_type = "";
828 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
829
830 if let Some(pos) = remaining.find('`')
832 && pos < next_special
833 {
834 next_special = pos;
835 special_type = "code";
836 }
837
838 for span in &emphasis_spans {
841 if span.start >= current_offset && span.start < current_offset + remaining.len() {
842 let pos_in_remaining = span.start - current_offset;
843 if pos_in_remaining < next_special {
844 next_special = pos_in_remaining;
845 special_type = "pulldown_emphasis";
846 pulldown_emphasis = Some(span);
847 }
848 break; }
850 }
851
852 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
854 pos < next_special
855 } else {
856 false
857 };
858
859 if should_process_markdown_link {
860 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
861
862 if pos > 0 {
864 elements.push(Element::Text(remaining[..pos].to_string()));
865 }
866
867 match pattern_type {
869 "linked_image_ii" => {
871 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
872 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
873 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
874 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
875 elements.push(Element::LinkedImage {
876 alt: alt.to_string(),
877 img_source: LinkedImageSource::Inline(img_url.to_string()),
878 link_target: LinkedImageTarget::Inline(link_url.to_string()),
879 });
880 remaining = &remaining[match_obj.end()..];
881 } else {
882 elements.push(Element::Text("[".to_string()));
883 remaining = &remaining[1..];
884 }
885 }
886 "linked_image_ri" => {
888 if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
889 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
890 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
891 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
892 elements.push(Element::LinkedImage {
893 alt: alt.to_string(),
894 img_source: LinkedImageSource::Reference(img_ref.to_string()),
895 link_target: LinkedImageTarget::Inline(link_url.to_string()),
896 });
897 remaining = &remaining[match_obj.end()..];
898 } else {
899 elements.push(Element::Text("[".to_string()));
900 remaining = &remaining[1..];
901 }
902 }
903 "linked_image_ir" => {
905 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
906 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
907 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
908 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
909 elements.push(Element::LinkedImage {
910 alt: alt.to_string(),
911 img_source: LinkedImageSource::Inline(img_url.to_string()),
912 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
913 });
914 remaining = &remaining[match_obj.end()..];
915 } else {
916 elements.push(Element::Text("[".to_string()));
917 remaining = &remaining[1..];
918 }
919 }
920 "linked_image_rr" => {
922 if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
923 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
924 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
925 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
926 elements.push(Element::LinkedImage {
927 alt: alt.to_string(),
928 img_source: LinkedImageSource::Reference(img_ref.to_string()),
929 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
930 });
931 remaining = &remaining[match_obj.end()..];
932 } else {
933 elements.push(Element::Text("[".to_string()));
934 remaining = &remaining[1..];
935 }
936 }
937 "inline_image" => {
938 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
939 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
940 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
941 elements.push(Element::InlineImage {
942 alt: alt.to_string(),
943 url: url.to_string(),
944 });
945 remaining = &remaining[match_obj.end()..];
946 } else {
947 elements.push(Element::Text("!".to_string()));
948 remaining = &remaining[1..];
949 }
950 }
951 "ref_image" => {
952 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
953 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
954 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
955
956 if reference.is_empty() {
957 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
958 } else {
959 elements.push(Element::ReferenceImage {
960 alt: alt.to_string(),
961 reference: reference.to_string(),
962 });
963 }
964 remaining = &remaining[match_obj.end()..];
965 } else {
966 elements.push(Element::Text("!".to_string()));
967 remaining = &remaining[1..];
968 }
969 }
970 "footnote_ref" => {
971 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
972 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
973 elements.push(Element::FootnoteReference { note: note.to_string() });
974 remaining = &remaining[match_obj.end()..];
975 } else {
976 elements.push(Element::Text("[".to_string()));
977 remaining = &remaining[1..];
978 }
979 }
980 "inline_link" => {
981 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
982 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
983 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
984 elements.push(Element::Link {
985 text: text.to_string(),
986 url: url.to_string(),
987 });
988 remaining = &remaining[match_obj.end()..];
989 } else {
990 elements.push(Element::Text("[".to_string()));
992 remaining = &remaining[1..];
993 }
994 }
995 "ref_link" => {
996 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
997 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
998 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
999
1000 if reference.is_empty() {
1001 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1003 } else {
1004 elements.push(Element::ReferenceLink {
1006 text: text.to_string(),
1007 reference: reference.to_string(),
1008 });
1009 }
1010 remaining = &remaining[match_obj.end()..];
1011 } else {
1012 elements.push(Element::Text("[".to_string()));
1014 remaining = &remaining[1..];
1015 }
1016 }
1017 "shortcut_ref" => {
1018 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1019 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1020 elements.push(Element::ShortcutReference {
1021 reference: reference.to_string(),
1022 });
1023 remaining = &remaining[match_obj.end()..];
1024 } else {
1025 elements.push(Element::Text("[".to_string()));
1027 remaining = &remaining[1..];
1028 }
1029 }
1030 "wiki_link" => {
1031 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1032 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1033 elements.push(Element::WikiLink(content.to_string()));
1034 remaining = &remaining[match_obj.end()..];
1035 } else {
1036 elements.push(Element::Text("[[".to_string()));
1037 remaining = &remaining[2..];
1038 }
1039 }
1040 "display_math" => {
1041 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1042 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1043 elements.push(Element::DisplayMath(math.to_string()));
1044 remaining = &remaining[match_obj.end()..];
1045 } else {
1046 elements.push(Element::Text("$$".to_string()));
1047 remaining = &remaining[2..];
1048 }
1049 }
1050 "inline_math" => {
1051 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1052 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1053 elements.push(Element::InlineMath(math.to_string()));
1054 remaining = &remaining[match_obj.end()..];
1055 } else {
1056 elements.push(Element::Text("$".to_string()));
1057 remaining = &remaining[1..];
1058 }
1059 }
1060 "emoji" => {
1062 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1063 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1064 elements.push(Element::EmojiShortcode(emoji.to_string()));
1065 remaining = &remaining[match_obj.end()..];
1066 } else {
1067 elements.push(Element::Text(":".to_string()));
1068 remaining = &remaining[1..];
1069 }
1070 }
1071 "html_entity" => {
1072 elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
1074 remaining = &remaining[match_obj.end()..];
1075 }
1076 "hugo_shortcode" => {
1077 elements.push(Element::HugoShortcode(remaining[..match_obj.end()].to_string()));
1079 remaining = &remaining[match_obj.end()..];
1080 }
1081 "html_tag" => {
1082 elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
1084 remaining = &remaining[match_obj.end()..];
1085 }
1086 _ => {
1087 elements.push(Element::Text("[".to_string()));
1089 remaining = &remaining[1..];
1090 }
1091 }
1092 } else {
1093 if next_special > 0 && next_special < remaining.len() {
1097 elements.push(Element::Text(remaining[..next_special].to_string()));
1098 remaining = &remaining[next_special..];
1099 }
1100
1101 match special_type {
1103 "code" => {
1104 if let Some(code_end) = remaining[1..].find('`') {
1106 let code = &remaining[1..1 + code_end];
1107 elements.push(Element::Code(code.to_string()));
1108 remaining = &remaining[1 + code_end + 1..];
1109 } else {
1110 elements.push(Element::Text(remaining.to_string()));
1112 break;
1113 }
1114 }
1115 "pulldown_emphasis" => {
1116 if let Some(span) = pulldown_emphasis {
1118 let span_len = span.end - span.start;
1119 if span.is_strikethrough {
1120 elements.push(Element::Strikethrough(span.content.clone()));
1121 } else if span.is_strong {
1122 elements.push(Element::Bold {
1123 content: span.content.clone(),
1124 underscore: span.uses_underscore,
1125 });
1126 } else {
1127 elements.push(Element::Italic {
1128 content: span.content.clone(),
1129 underscore: span.uses_underscore,
1130 });
1131 }
1132 remaining = &remaining[span_len..];
1133 } else {
1134 elements.push(Element::Text(remaining[..1].to_string()));
1136 remaining = &remaining[1..];
1137 }
1138 }
1139 _ => {
1140 elements.push(Element::Text(remaining.to_string()));
1142 break;
1143 }
1144 }
1145 }
1146 }
1147
1148 elements
1149}
1150
1151fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1153 let abbreviations = get_abbreviations(custom_abbreviations);
1154 let mut lines = Vec::new();
1155 let mut current_line = String::new();
1156
1157 for element in elements.iter() {
1158 let element_str = format!("{element}");
1159
1160 if let Element::Text(text) = element {
1162 let combined = format!("{current_line}{text}");
1164 let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1166
1167 if sentences.len() > 1 {
1168 for (i, sentence) in sentences.iter().enumerate() {
1170 if i == 0 {
1171 let trimmed = sentence.trim();
1174
1175 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1176 current_line = sentence.to_string();
1178 } else {
1179 lines.push(sentence.to_string());
1181 current_line.clear();
1182 }
1183 } else if i == sentences.len() - 1 {
1184 let trimmed = sentence.trim();
1186 let ends_with_sentence_punct =
1187 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1188
1189 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1190 lines.push(sentence.to_string());
1192 current_line.clear();
1193 } else {
1194 current_line = sentence.to_string();
1196 }
1197 } else {
1198 lines.push(sentence.to_string());
1200 }
1201 }
1202 } else {
1203 let trimmed = combined.trim();
1205 let ends_with_sentence_punct =
1206 trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1207
1208 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1209 lines.push(trimmed.to_string());
1211 current_line.clear();
1212 } else {
1213 current_line = combined;
1215 }
1216 }
1217 } else if let Element::Italic { content, underscore } = element {
1218 let marker = if *underscore { "_" } else { "*" };
1220 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1221 } else if let Element::Bold { content, underscore } = element {
1222 let marker = if *underscore { "__" } else { "**" };
1224 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1225 } else if let Element::Strikethrough(content) = element {
1226 handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1228 } else {
1229 if !current_line.is_empty()
1232 && !current_line.ends_with(' ')
1233 && !current_line.ends_with('(')
1234 && !current_line.ends_with('[')
1235 {
1236 current_line.push(' ');
1237 }
1238 current_line.push_str(&element_str);
1239 }
1240 }
1241
1242 if !current_line.is_empty() {
1244 lines.push(current_line.trim().to_string());
1245 }
1246 lines
1247}
1248
1249fn handle_emphasis_sentence_split(
1251 content: &str,
1252 marker: &str,
1253 abbreviations: &HashSet<String>,
1254 current_line: &mut String,
1255 lines: &mut Vec<String>,
1256) {
1257 let sentences = split_into_sentences_with_set(content, abbreviations);
1259
1260 if sentences.len() <= 1 {
1261 if !current_line.is_empty()
1263 && !current_line.ends_with(' ')
1264 && !current_line.ends_with('(')
1265 && !current_line.ends_with('[')
1266 {
1267 current_line.push(' ');
1268 }
1269 current_line.push_str(marker);
1270 current_line.push_str(content);
1271 current_line.push_str(marker);
1272
1273 let trimmed = content.trim();
1275 let ends_with_punct = trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1276 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1277 lines.push(current_line.clone());
1278 current_line.clear();
1279 }
1280 } else {
1281 for (i, sentence) in sentences.iter().enumerate() {
1283 let trimmed = sentence.trim();
1284 if trimmed.is_empty() {
1285 continue;
1286 }
1287
1288 if i == 0 {
1289 if !current_line.is_empty()
1291 && !current_line.ends_with(' ')
1292 && !current_line.ends_with('(')
1293 && !current_line.ends_with('[')
1294 {
1295 current_line.push(' ');
1296 }
1297 current_line.push_str(marker);
1298 current_line.push_str(trimmed);
1299 current_line.push_str(marker);
1300
1301 let ends_with_punct = trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1303 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1304 lines.push(current_line.clone());
1305 current_line.clear();
1306 }
1307 } else if i == sentences.len() - 1 {
1308 let ends_with_punct = trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
1310
1311 let mut line = String::new();
1312 line.push_str(marker);
1313 line.push_str(trimmed);
1314 line.push_str(marker);
1315
1316 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1317 lines.push(line);
1318 } else {
1319 *current_line = line;
1321 }
1322 } else {
1323 let mut line = String::new();
1325 line.push_str(marker);
1326 line.push_str(trimmed);
1327 line.push_str(marker);
1328 lines.push(line);
1329 }
1330 }
1331 }
1332}
1333
1334fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1336 let mut lines = Vec::new();
1337 let mut current_line = String::new();
1338 let mut current_length = 0;
1339
1340 for element in elements {
1341 let element_str = format!("{element}");
1342 let element_len = element.len();
1343
1344 if let Element::Text(text) = element {
1346 let has_leading_space = text.starts_with(char::is_whitespace);
1348 let words: Vec<&str> = text.split_whitespace().collect();
1350
1351 for (i, word) in words.iter().enumerate() {
1352 let word_len = word.chars().count();
1353 let is_trailing_punct = word
1355 .chars()
1356 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1357
1358 if current_length > 0 && current_length + 1 + word_len > options.line_length && !is_trailing_punct {
1359 lines.push(current_line.trim().to_string());
1361 current_line = word.to_string();
1362 current_length = word_len;
1363 } else {
1364 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1368 current_line.push(' ');
1369 current_length += 1;
1370 }
1371 current_line.push_str(word);
1372 current_length += word_len;
1373 }
1374 }
1375 } else {
1376 if current_length > 0 && current_length + 1 + element_len > options.line_length {
1379 lines.push(current_line.trim().to_string());
1381 current_line = element_str;
1382 current_length = element_len;
1383 } else {
1384 let ends_with_opener =
1387 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1388 if current_length > 0 && !ends_with_opener {
1389 current_line.push(' ');
1390 current_length += 1;
1391 }
1392 current_line.push_str(&element_str);
1393 current_length += element_len;
1394 }
1395 }
1396 }
1397
1398 if !current_line.is_empty() {
1400 lines.push(current_line.trim_end().to_string());
1401 }
1402
1403 lines
1404}
1405
1406pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1408 let lines: Vec<&str> = content.lines().collect();
1409 let mut result = Vec::new();
1410 let mut i = 0;
1411
1412 while i < lines.len() {
1413 let line = lines[i];
1414 let trimmed = line.trim();
1415
1416 if trimmed.is_empty() {
1418 result.push(String::new());
1419 i += 1;
1420 continue;
1421 }
1422
1423 if trimmed.starts_with('#') {
1425 result.push(line.to_string());
1426 i += 1;
1427 continue;
1428 }
1429
1430 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1432 result.push(line.to_string());
1433 i += 1;
1434 while i < lines.len() {
1436 result.push(lines[i].to_string());
1437 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
1438 i += 1;
1439 break;
1440 }
1441 i += 1;
1442 }
1443 continue;
1444 }
1445
1446 if ElementCache::calculate_indentation_width_default(line) >= 4 {
1448 result.push(line.to_string());
1450 i += 1;
1451 while i < lines.len() {
1452 let next_line = lines[i];
1453 if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
1455 result.push(next_line.to_string());
1456 i += 1;
1457 } else {
1458 break;
1459 }
1460 }
1461 continue;
1462 }
1463
1464 if trimmed.starts_with('>') {
1466 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
1469 let quote_prefix = line[0..gt_pos + 1].to_string();
1470 let quote_content = &line[quote_prefix.len()..].trim_start();
1471
1472 let reflowed = reflow_line(quote_content, options);
1473 for reflowed_line in reflowed.iter() {
1474 result.push(format!("{quote_prefix} {reflowed_line}"));
1475 }
1476 i += 1;
1477 continue;
1478 }
1479
1480 if is_horizontal_rule(trimmed) {
1482 result.push(line.to_string());
1483 i += 1;
1484 continue;
1485 }
1486
1487 let is_unordered_list = |s: &str, marker: char| -> bool {
1491 s.starts_with(marker) && !is_horizontal_rule(s) && (s.len() == 1 || s.chars().nth(1) == Some(' '))
1492 };
1493 if is_unordered_list(trimmed, '-')
1494 || is_unordered_list(trimmed, '*')
1495 || is_unordered_list(trimmed, '+')
1496 || is_numbered_list_item(trimmed)
1497 {
1498 let indent = line.len() - line.trim_start().len();
1500 let indent_str = " ".repeat(indent);
1501
1502 let mut marker_end = indent;
1505 let mut content_start = indent;
1506
1507 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
1508 if let Some(period_pos) = line[indent..].find('.') {
1510 marker_end = indent + period_pos + 1; content_start = marker_end;
1512 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1516 content_start += 1;
1517 }
1518 }
1519 } else {
1520 marker_end = indent + 1; content_start = marker_end;
1523 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1527 content_start += 1;
1528 }
1529 }
1530
1531 let marker = &line[indent..marker_end];
1532
1533 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
1536 i += 1;
1537
1538 while i < lines.len() {
1540 let next_line = lines[i];
1541 let next_trimmed = next_line.trim();
1542
1543 if next_trimmed.is_empty()
1545 || next_trimmed.starts_with('#')
1546 || next_trimmed.starts_with("```")
1547 || next_trimmed.starts_with("~~~")
1548 || next_trimmed.starts_with('>')
1549 || next_trimmed.starts_with('|')
1550 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1551 || is_horizontal_rule(next_trimmed)
1552 || (next_trimmed.starts_with('-')
1553 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1554 || (next_trimmed.starts_with('*')
1555 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1556 || (next_trimmed.starts_with('+')
1557 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1558 || is_numbered_list_item(next_trimmed)
1559 || is_definition_list_item(next_trimmed)
1560 {
1561 break;
1562 }
1563
1564 let next_indent = next_line.len() - next_line.trim_start().len();
1566 if next_indent >= content_start {
1567 let trimmed_start = next_line.trim_start();
1570 list_content.push(trim_preserving_hard_break(trimmed_start));
1571 i += 1;
1572 } else {
1573 break;
1575 }
1576 }
1577
1578 let combined_content = if options.preserve_breaks {
1581 list_content[0].clone()
1582 } else {
1583 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1585 if has_hard_breaks {
1586 list_content.join("\n")
1588 } else {
1589 list_content.join(" ")
1591 }
1592 };
1593
1594 let trimmed_marker = marker;
1596 let continuation_spaces = content_start;
1597
1598 let prefix_length = indent + trimmed_marker.len() + 1;
1600
1601 let adjusted_options = ReflowOptions {
1603 line_length: options.line_length.saturating_sub(prefix_length),
1604 ..options.clone()
1605 };
1606
1607 let reflowed = reflow_line(&combined_content, &adjusted_options);
1608 for (j, reflowed_line) in reflowed.iter().enumerate() {
1609 if j == 0 {
1610 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1611 } else {
1612 let continuation_indent = " ".repeat(continuation_spaces);
1614 result.push(format!("{continuation_indent}{reflowed_line}"));
1615 }
1616 }
1617 continue;
1618 }
1619
1620 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
1622 result.push(line.to_string());
1623 i += 1;
1624 continue;
1625 }
1626
1627 if trimmed.starts_with('[') && line.contains("]:") {
1629 result.push(line.to_string());
1630 i += 1;
1631 continue;
1632 }
1633
1634 if is_definition_list_item(trimmed) {
1636 result.push(line.to_string());
1637 i += 1;
1638 continue;
1639 }
1640
1641 let mut is_single_line_paragraph = true;
1643 if i + 1 < lines.len() {
1644 let next_line = lines[i + 1];
1645 let next_trimmed = next_line.trim();
1646 if !next_trimmed.is_empty()
1648 && !next_trimmed.starts_with('#')
1649 && !next_trimmed.starts_with("```")
1650 && !next_trimmed.starts_with("~~~")
1651 && !next_trimmed.starts_with('>')
1652 && !next_trimmed.starts_with('|')
1653 && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1654 && !is_horizontal_rule(next_trimmed)
1655 && !(next_trimmed.starts_with('-')
1656 && !is_horizontal_rule(next_trimmed)
1657 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1658 && !(next_trimmed.starts_with('*')
1659 && !is_horizontal_rule(next_trimmed)
1660 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1661 && !(next_trimmed.starts_with('+')
1662 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1663 && !is_numbered_list_item(next_trimmed)
1664 {
1665 is_single_line_paragraph = false;
1666 }
1667 }
1668
1669 if is_single_line_paragraph && line.chars().count() <= options.line_length {
1671 result.push(line.to_string());
1672 i += 1;
1673 continue;
1674 }
1675
1676 let mut paragraph_parts = Vec::new();
1678 let mut current_part = vec![line];
1679 i += 1;
1680
1681 if options.preserve_breaks {
1683 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1685 Some("\\")
1686 } else if line.ends_with(" ") {
1687 Some(" ")
1688 } else {
1689 None
1690 };
1691 let reflowed = reflow_line(line, options);
1692
1693 if let Some(break_marker) = hard_break_type {
1695 if !reflowed.is_empty() {
1696 let mut reflowed_with_break = reflowed;
1697 let last_idx = reflowed_with_break.len() - 1;
1698 if !has_hard_break(&reflowed_with_break[last_idx]) {
1699 reflowed_with_break[last_idx].push_str(break_marker);
1700 }
1701 result.extend(reflowed_with_break);
1702 }
1703 } else {
1704 result.extend(reflowed);
1705 }
1706 } else {
1707 while i < lines.len() {
1709 let prev_line = if !current_part.is_empty() {
1710 current_part.last().unwrap()
1711 } else {
1712 ""
1713 };
1714 let next_line = lines[i];
1715 let next_trimmed = next_line.trim();
1716
1717 if next_trimmed.is_empty()
1719 || next_trimmed.starts_with('#')
1720 || next_trimmed.starts_with("```")
1721 || next_trimmed.starts_with("~~~")
1722 || next_trimmed.starts_with('>')
1723 || next_trimmed.starts_with('|')
1724 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1725 || is_horizontal_rule(next_trimmed)
1726 || (next_trimmed.starts_with('-')
1727 && !is_horizontal_rule(next_trimmed)
1728 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1729 || (next_trimmed.starts_with('*')
1730 && !is_horizontal_rule(next_trimmed)
1731 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1732 || (next_trimmed.starts_with('+')
1733 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1734 || is_numbered_list_item(next_trimmed)
1735 || is_definition_list_item(next_trimmed)
1736 {
1737 break;
1738 }
1739
1740 let prev_trimmed = prev_line.trim();
1743 let abbreviations = get_abbreviations(&options.abbreviations);
1744 let ends_with_sentence = (prev_trimmed.ends_with('.')
1745 || prev_trimmed.ends_with('!')
1746 || prev_trimmed.ends_with('?')
1747 || prev_trimmed.ends_with(".*")
1748 || prev_trimmed.ends_with("!*")
1749 || prev_trimmed.ends_with("?*")
1750 || prev_trimmed.ends_with("._")
1751 || prev_trimmed.ends_with("!_")
1752 || prev_trimmed.ends_with("?_")
1753 || prev_trimmed.ends_with(".\"")
1755 || prev_trimmed.ends_with("!\"")
1756 || prev_trimmed.ends_with("?\"")
1757 || prev_trimmed.ends_with(".'")
1758 || prev_trimmed.ends_with("!'")
1759 || prev_trimmed.ends_with("?'")
1760 || prev_trimmed.ends_with(".\u{201D}")
1761 || prev_trimmed.ends_with("!\u{201D}")
1762 || prev_trimmed.ends_with("?\u{201D}")
1763 || prev_trimmed.ends_with(".\u{2019}")
1764 || prev_trimmed.ends_with("!\u{2019}")
1765 || prev_trimmed.ends_with("?\u{2019}"))
1766 && !text_ends_with_abbreviation(
1767 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
1768 &abbreviations,
1769 );
1770
1771 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
1772 paragraph_parts.push(current_part.join(" "));
1774 current_part = vec![next_line];
1775 } else {
1776 current_part.push(next_line);
1777 }
1778 i += 1;
1779 }
1780
1781 if !current_part.is_empty() {
1783 if current_part.len() == 1 {
1784 paragraph_parts.push(current_part[0].to_string());
1786 } else {
1787 paragraph_parts.push(current_part.join(" "));
1788 }
1789 }
1790
1791 for (j, part) in paragraph_parts.iter().enumerate() {
1793 let reflowed = reflow_line(part, options);
1794 result.extend(reflowed);
1795
1796 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
1800 let last_idx = result.len() - 1;
1801 if !has_hard_break(&result[last_idx]) {
1802 result[last_idx].push_str(" ");
1803 }
1804 }
1805 }
1806 }
1807 }
1808
1809 let result_text = result.join("\n");
1811 if content.ends_with('\n') && !result_text.ends_with('\n') {
1812 format!("{result_text}\n")
1813 } else {
1814 result_text
1815 }
1816}
1817
1818#[derive(Debug, Clone)]
1820pub struct ParagraphReflow {
1821 pub start_byte: usize,
1823 pub end_byte: usize,
1825 pub reflowed_text: String,
1827}
1828
1829pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1847 if line_number == 0 {
1848 return None;
1849 }
1850
1851 let lines: Vec<&str> = content.lines().collect();
1852
1853 if line_number > lines.len() {
1855 return None;
1856 }
1857
1858 let target_idx = line_number - 1; let target_line = lines[target_idx];
1860 let trimmed = target_line.trim();
1861
1862 if trimmed.is_empty()
1864 || trimmed.starts_with('#')
1865 || trimmed.starts_with("```")
1866 || trimmed.starts_with("~~~")
1867 || ElementCache::calculate_indentation_width_default(target_line) >= 4
1868 || trimmed.starts_with('>')
1869 || crate::utils::table_utils::TableUtils::is_potential_table_row(target_line) || (trimmed.starts_with('[') && target_line.contains("]:")) || is_horizontal_rule(trimmed)
1872 || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1873 && !is_horizontal_rule(trimmed)
1874 && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1875 || is_numbered_list_item(trimmed)
1876 || is_definition_list_item(trimmed)
1877 {
1878 return None;
1879 }
1880
1881 let mut para_start = target_idx;
1883 while para_start > 0 {
1884 let prev_idx = para_start - 1;
1885 let prev_line = lines[prev_idx];
1886 let prev_trimmed = prev_line.trim();
1887
1888 if prev_trimmed.is_empty()
1890 || prev_trimmed.starts_with('#')
1891 || prev_trimmed.starts_with("```")
1892 || prev_trimmed.starts_with("~~~")
1893 || ElementCache::calculate_indentation_width_default(prev_line) >= 4
1894 || prev_trimmed.starts_with('>')
1895 || crate::utils::table_utils::TableUtils::is_potential_table_row(prev_line)
1896 || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1897 || is_horizontal_rule(prev_trimmed)
1898 || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1899 && !is_horizontal_rule(prev_trimmed)
1900 && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1901 || is_numbered_list_item(prev_trimmed)
1902 || is_definition_list_item(prev_trimmed)
1903 {
1904 break;
1905 }
1906
1907 para_start = prev_idx;
1908 }
1909
1910 let mut para_end = target_idx;
1912 while para_end + 1 < lines.len() {
1913 let next_idx = para_end + 1;
1914 let next_line = lines[next_idx];
1915 let next_trimmed = next_line.trim();
1916
1917 if next_trimmed.is_empty()
1919 || next_trimmed.starts_with('#')
1920 || next_trimmed.starts_with("```")
1921 || next_trimmed.starts_with("~~~")
1922 || ElementCache::calculate_indentation_width_default(next_line) >= 4
1923 || next_trimmed.starts_with('>')
1924 || crate::utils::table_utils::TableUtils::is_potential_table_row(next_line)
1925 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1926 || is_horizontal_rule(next_trimmed)
1927 || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1928 && !is_horizontal_rule(next_trimmed)
1929 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1930 || is_numbered_list_item(next_trimmed)
1931 || is_definition_list_item(next_trimmed)
1932 {
1933 break;
1934 }
1935
1936 para_end = next_idx;
1937 }
1938
1939 let paragraph_lines = &lines[para_start..=para_end];
1941
1942 let mut start_byte = 0;
1944 for line in lines.iter().take(para_start) {
1945 start_byte += line.len() + 1; }
1947
1948 let mut end_byte = start_byte;
1949 for line in paragraph_lines.iter() {
1950 end_byte += line.len() + 1; }
1952
1953 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1956
1957 if !includes_trailing_newline {
1959 end_byte -= 1;
1960 }
1961
1962 let paragraph_text = paragraph_lines.join("\n");
1964
1965 let options = ReflowOptions {
1967 line_length,
1968 break_on_sentences: true,
1969 preserve_breaks: false,
1970 sentence_per_line: false,
1971 abbreviations: None,
1972 };
1973
1974 let reflowed = reflow_markdown(¶graph_text, &options);
1976
1977 let reflowed_text = if includes_trailing_newline {
1981 if reflowed.ends_with('\n') {
1983 reflowed
1984 } else {
1985 format!("{reflowed}\n")
1986 }
1987 } else {
1988 if reflowed.ends_with('\n') {
1990 reflowed.trim_end_matches('\n').to_string()
1991 } else {
1992 reflowed
1993 }
1994 };
1995
1996 Some(ParagraphReflow {
1997 start_byte,
1998 end_byte,
1999 reflowed_text,
2000 })
2001}
2002
2003#[cfg(test)]
2004mod tests {
2005 use super::*;
2006
2007 #[test]
2012 fn test_helper_function_text_ends_with_abbreviation() {
2013 let abbreviations = get_abbreviations(&None);
2015
2016 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2018 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2019 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2020 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2021 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2022 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2023 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2024 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2025
2026 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2028 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2029 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2030 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2031 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2032 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
2038}