1use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::regex_cache::{
9 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
10 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
11 LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
12 REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
13};
14use crate::utils::sentence_utils::{
15 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
16 text_ends_with_abbreviation,
17};
18use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
19use std::collections::HashSet;
20
21#[derive(Clone)]
23pub struct ReflowOptions {
24 pub line_length: usize,
26 pub break_on_sentences: bool,
28 pub preserve_breaks: bool,
30 pub sentence_per_line: bool,
32 pub abbreviations: Option<Vec<String>>,
36}
37
38impl Default for ReflowOptions {
39 fn default() -> Self {
40 Self {
41 line_length: 80,
42 break_on_sentences: true,
43 preserve_breaks: false,
44 sentence_per_line: false,
45 abbreviations: None,
46 }
47 }
48}
49
50fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
54 let chars: Vec<char> = text.chars().collect();
55
56 if pos + 1 >= chars.len() {
57 return false;
58 }
59
60 let c = chars[pos];
61 let next_char = chars[pos + 1];
62
63 if is_cjk_sentence_ending(c) {
66 let mut after_punct_pos = pos + 1;
68 while after_punct_pos < chars.len()
69 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
70 {
71 after_punct_pos += 1;
72 }
73
74 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
76 after_punct_pos += 1;
77 }
78
79 if after_punct_pos >= chars.len() {
81 return false;
82 }
83
84 while after_punct_pos < chars.len()
86 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
87 {
88 after_punct_pos += 1;
89 }
90
91 if after_punct_pos >= chars.len() {
92 return false;
93 }
94
95 return true;
98 }
99
100 if c != '.' && c != '!' && c != '?' {
102 return false;
103 }
104
105 let (_space_pos, after_space_pos) = if next_char == ' ' {
107 (pos + 1, pos + 2)
109 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
110 if chars[pos + 2] == ' ' {
112 (pos + 2, pos + 3)
114 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
115 (pos + 3, pos + 4)
117 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
118 && pos + 4 < chars.len()
119 && chars[pos + 3] == chars[pos + 2]
120 && chars[pos + 4] == ' '
121 {
122 (pos + 4, pos + 5)
124 } else {
125 return false;
126 }
127 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
128 (pos + 2, pos + 3)
130 } else if (next_char == '*' || next_char == '_')
131 && pos + 3 < chars.len()
132 && chars[pos + 2] == next_char
133 && chars[pos + 3] == ' '
134 {
135 (pos + 3, pos + 4)
137 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
138 (pos + 3, pos + 4)
140 } else {
141 return false;
142 };
143
144 let mut next_char_pos = after_space_pos;
146 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
147 next_char_pos += 1;
148 }
149
150 if next_char_pos >= chars.len() {
152 return false;
153 }
154
155 let mut first_letter_pos = next_char_pos;
157 while first_letter_pos < chars.len()
158 && (chars[first_letter_pos] == '*'
159 || chars[first_letter_pos] == '_'
160 || chars[first_letter_pos] == '~'
161 || is_opening_quote(chars[first_letter_pos]))
162 {
163 first_letter_pos += 1;
164 }
165
166 if first_letter_pos >= chars.len() {
168 return false;
169 }
170
171 let first_char = chars[first_letter_pos];
173 if !first_char.is_uppercase() && !is_cjk_char(first_char) {
174 return false;
175 }
176
177 if pos > 0 && c == '.' {
179 if text_ends_with_abbreviation(&text[..=pos], abbreviations) {
182 return false;
183 }
184
185 if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
188 return false;
189 }
190 }
191 true
192}
193
194pub fn split_into_sentences(text: &str) -> Vec<String> {
196 split_into_sentences_custom(text, &None)
197}
198
199pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
201 let abbreviations = get_abbreviations(custom_abbreviations);
202 split_into_sentences_with_set(text, &abbreviations)
203}
204
205fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
208 let mut sentences = Vec::new();
209 let mut current_sentence = String::new();
210 let mut chars = text.chars().peekable();
211 let mut pos = 0;
212
213 while let Some(c) = chars.next() {
214 current_sentence.push(c);
215
216 if is_sentence_boundary(text, pos, abbreviations) {
217 while let Some(&next) = chars.peek() {
219 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
220 current_sentence.push(chars.next().unwrap());
221 pos += 1;
222 } else {
223 break;
224 }
225 }
226
227 if chars.peek() == Some(&' ') {
229 chars.next();
230 pos += 1;
231 }
232
233 sentences.push(current_sentence.trim().to_string());
234 current_sentence.clear();
235 }
236
237 pos += 1;
238 }
239
240 if !current_sentence.trim().is_empty() {
242 sentences.push(current_sentence.trim().to_string());
243 }
244 sentences
245}
246
247fn is_horizontal_rule(line: &str) -> bool {
249 if line.len() < 3 {
250 return false;
251 }
252
253 let chars: Vec<char> = line.chars().collect();
255 if chars.is_empty() {
256 return false;
257 }
258
259 let first_char = chars[0];
260 if first_char != '-' && first_char != '_' && first_char != '*' {
261 return false;
262 }
263
264 for c in &chars {
266 if *c != first_char && *c != ' ' {
267 return false;
268 }
269 }
270
271 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
273 non_space_count >= 3
274}
275
276fn is_numbered_list_item(line: &str) -> bool {
278 let mut chars = line.chars();
279
280 if !chars.next().is_some_and(|c| c.is_numeric()) {
282 return false;
283 }
284
285 while let Some(c) = chars.next() {
287 if c == '.' {
288 return chars.next() == Some(' ');
291 }
292 if !c.is_numeric() {
293 return false;
294 }
295 }
296
297 false
298}
299
300fn has_hard_break(line: &str) -> bool {
306 let line = line.strip_suffix('\r').unwrap_or(line);
307 line.ends_with(" ") || line.ends_with('\\')
308}
309
310fn ends_with_sentence_punct(text: &str) -> bool {
312 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
313}
314
315fn trim_preserving_hard_break(s: &str) -> String {
321 let s = s.strip_suffix('\r').unwrap_or(s);
323
324 if s.ends_with('\\') {
326 return s.to_string();
328 }
329
330 if s.ends_with(" ") {
332 let content_end = s.trim_end().len();
334 if content_end == 0 {
335 return String::new();
337 }
338 format!("{} ", &s[..content_end])
340 } else {
341 s.trim_end().to_string()
343 }
344}
345
346pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
347 if options.sentence_per_line {
349 let elements = parse_markdown_elements(line);
350 return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
351 }
352
353 if options.line_length == 0 || line.chars().count() <= options.line_length {
356 return vec![line.to_string()];
357 }
358
359 let elements = parse_markdown_elements(line);
361
362 reflow_elements(&elements, options)
364}
365
366#[derive(Debug, Clone)]
368enum LinkedImageSource {
369 Inline(String),
371 Reference(String),
373}
374
375#[derive(Debug, Clone)]
377enum LinkedImageTarget {
378 Inline(String),
380 Reference(String),
382}
383
384#[derive(Debug, Clone)]
386enum Element {
387 Text(String),
389 Link { text: String, url: String },
391 ReferenceLink { text: String, reference: String },
393 EmptyReferenceLink { text: String },
395 ShortcutReference { reference: String },
397 InlineImage { alt: String, url: String },
399 ReferenceImage { alt: String, reference: String },
401 EmptyReferenceImage { alt: String },
403 LinkedImage {
409 alt: String,
410 img_source: LinkedImageSource,
411 link_target: LinkedImageTarget,
412 },
413 FootnoteReference { note: String },
415 Strikethrough(String),
417 WikiLink(String),
419 InlineMath(String),
421 DisplayMath(String),
423 EmojiShortcode(String),
425 HtmlTag(String),
427 HtmlEntity(String),
429 HugoShortcode(String),
431 Code(String),
433 Bold {
435 content: String,
436 underscore: bool,
438 },
439 Italic {
441 content: String,
442 underscore: bool,
444 },
445}
446
447impl std::fmt::Display for Element {
448 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
449 match self {
450 Element::Text(s) => write!(f, "{s}"),
451 Element::Link { text, url } => write!(f, "[{text}]({url})"),
452 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
453 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
454 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
455 Element::InlineImage { alt, url } => write!(f, ""),
456 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
457 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
458 Element::LinkedImage {
459 alt,
460 img_source,
461 link_target,
462 } => {
463 let img_part = match img_source {
465 LinkedImageSource::Inline(url) => format!(""),
466 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
467 };
468 match link_target {
470 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
471 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
472 }
473 }
474 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
475 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
476 Element::WikiLink(s) => write!(f, "[[{s}]]"),
477 Element::InlineMath(s) => write!(f, "${s}$"),
478 Element::DisplayMath(s) => write!(f, "$${s}$$"),
479 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
480 Element::HtmlTag(s) => write!(f, "{s}"),
481 Element::HtmlEntity(s) => write!(f, "{s}"),
482 Element::HugoShortcode(s) => write!(f, "{s}"),
483 Element::Code(s) => write!(f, "`{s}`"),
484 Element::Bold { content, underscore } => {
485 if *underscore {
486 write!(f, "__{content}__")
487 } else {
488 write!(f, "**{content}**")
489 }
490 }
491 Element::Italic { content, underscore } => {
492 if *underscore {
493 write!(f, "_{content}_")
494 } else {
495 write!(f, "*{content}*")
496 }
497 }
498 }
499 }
500}
501
502impl Element {
503 fn len(&self) -> usize {
504 match self {
505 Element::Text(s) => s.chars().count(),
506 Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, Element::EmptyReferenceLink { text } => text.chars().count() + 4, Element::ShortcutReference { reference } => reference.chars().count() + 2, Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, Element::LinkedImage {
514 alt,
515 img_source,
516 link_target,
517 } => {
518 let alt_len = alt.chars().count();
521 let img_len = match img_source {
522 LinkedImageSource::Inline(url) => url.chars().count() + 2, LinkedImageSource::Reference(r) => r.chars().count() + 2, };
525 let link_len = match link_target {
526 LinkedImageTarget::Inline(url) => url.chars().count() + 2, LinkedImageTarget::Reference(r) => r.chars().count() + 2, };
529 5 + alt_len + img_len + link_len
532 }
533 Element::FootnoteReference { note } => note.chars().count() + 3, Element::Strikethrough(s) => s.chars().count() + 4, Element::WikiLink(s) => s.chars().count() + 4, Element::InlineMath(s) => s.chars().count() + 2, Element::DisplayMath(s) => s.chars().count() + 4, Element::EmojiShortcode(s) => s.chars().count() + 2, Element::HtmlTag(s) => s.chars().count(), Element::HtmlEntity(s) => s.chars().count(), Element::HugoShortcode(s) => s.chars().count(), Element::Code(s) => s.chars().count() + 2, Element::Bold { content, .. } => content.chars().count() + 4, Element::Italic { content, .. } => content.chars().count() + 2, }
546 }
547}
548
549#[derive(Debug, Clone)]
551struct EmphasisSpan {
552 start: usize,
554 end: usize,
556 content: String,
558 is_strong: bool,
560 is_strikethrough: bool,
562 uses_underscore: bool,
564}
565
566fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
576 let mut spans = Vec::new();
577 let mut options = Options::empty();
578 options.insert(Options::ENABLE_STRIKETHROUGH);
579
580 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
583 let mut strikethrough_stack: Vec<usize> = Vec::new();
584
585 let parser = Parser::new_ext(text, options).into_offset_iter();
586
587 for (event, range) in parser {
588 match event {
589 Event::Start(Tag::Emphasis) => {
590 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
592 emphasis_stack.push((range.start, uses_underscore));
593 }
594 Event::End(TagEnd::Emphasis) => {
595 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
596 let content_start = start_byte + 1;
598 let content_end = range.end - 1;
599 if content_end > content_start
600 && let Some(content) = text.get(content_start..content_end)
601 {
602 spans.push(EmphasisSpan {
603 start: start_byte,
604 end: range.end,
605 content: content.to_string(),
606 is_strong: false,
607 is_strikethrough: false,
608 uses_underscore,
609 });
610 }
611 }
612 }
613 Event::Start(Tag::Strong) => {
614 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
616 strong_stack.push((range.start, uses_underscore));
617 }
618 Event::End(TagEnd::Strong) => {
619 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
620 let content_start = start_byte + 2;
622 let content_end = range.end - 2;
623 if content_end > content_start
624 && let Some(content) = text.get(content_start..content_end)
625 {
626 spans.push(EmphasisSpan {
627 start: start_byte,
628 end: range.end,
629 content: content.to_string(),
630 is_strong: true,
631 is_strikethrough: false,
632 uses_underscore,
633 });
634 }
635 }
636 }
637 Event::Start(Tag::Strikethrough) => {
638 strikethrough_stack.push(range.start);
639 }
640 Event::End(TagEnd::Strikethrough) => {
641 if let Some(start_byte) = strikethrough_stack.pop() {
642 let content_start = start_byte + 2;
644 let content_end = range.end - 2;
645 if content_end > content_start
646 && let Some(content) = text.get(content_start..content_end)
647 {
648 spans.push(EmphasisSpan {
649 start: start_byte,
650 end: range.end,
651 content: content.to_string(),
652 is_strong: false,
653 is_strikethrough: true,
654 uses_underscore: false,
655 });
656 }
657 }
658 }
659 _ => {}
660 }
661 }
662
663 spans.sort_by_key(|s| s.start);
665 spans
666}
667
668fn parse_markdown_elements(text: &str) -> Vec<Element> {
679 let mut elements = Vec::new();
680 let mut remaining = text;
681
682 let emphasis_spans = extract_emphasis_spans(text);
684
685 while !remaining.is_empty() {
686 let current_offset = text.len() - remaining.len();
688 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
690
691 if remaining.contains("[!") {
695 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
697 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
698 {
699 earliest_match = Some((m.start(), "linked_image_ii", m));
700 }
701
702 if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
704 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
705 {
706 earliest_match = Some((m.start(), "linked_image_ri", m));
707 }
708
709 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
711 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
712 {
713 earliest_match = Some((m.start(), "linked_image_ir", m));
714 }
715
716 if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
718 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
719 {
720 earliest_match = Some((m.start(), "linked_image_rr", m));
721 }
722 }
723
724 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
727 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
728 {
729 earliest_match = Some((m.start(), "inline_image", m));
730 }
731
732 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
734 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
735 {
736 earliest_match = Some((m.start(), "ref_image", m));
737 }
738
739 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
741 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
742 {
743 earliest_match = Some((m.start(), "footnote_ref", m));
744 }
745
746 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
748 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
749 {
750 earliest_match = Some((m.start(), "inline_link", m));
751 }
752
753 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
755 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
756 {
757 earliest_match = Some((m.start(), "ref_link", m));
758 }
759
760 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
763 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
764 {
765 earliest_match = Some((m.start(), "shortcut_ref", m));
766 }
767
768 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
770 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
771 {
772 earliest_match = Some((m.start(), "wiki_link", m));
773 }
774
775 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
777 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
778 {
779 earliest_match = Some((m.start(), "display_math", m));
780 }
781
782 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
784 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
785 {
786 earliest_match = Some((m.start(), "inline_math", m));
787 }
788
789 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
793 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
794 {
795 earliest_match = Some((m.start(), "emoji", m));
796 }
797
798 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
800 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
801 {
802 earliest_match = Some((m.start(), "html_entity", m));
803 }
804
805 if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
808 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
809 {
810 earliest_match = Some((m.start(), "hugo_shortcode", m));
811 }
812
813 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
816 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
817 {
818 let matched_text = &remaining[m.start()..m.end()];
820 let is_url_autolink = matched_text.starts_with("<http://")
821 || matched_text.starts_with("<https://")
822 || matched_text.starts_with("<mailto:")
823 || matched_text.starts_with("<ftp://")
824 || matched_text.starts_with("<ftps://");
825
826 let is_email_autolink = {
829 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
830 EMAIL_PATTERN.is_match(content)
831 };
832
833 if !is_url_autolink && !is_email_autolink {
834 earliest_match = Some((m.start(), "html_tag", m));
835 }
836 }
837
838 let mut next_special = remaining.len();
840 let mut special_type = "";
841 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
842
843 if let Some(pos) = remaining.find('`')
845 && pos < next_special
846 {
847 next_special = pos;
848 special_type = "code";
849 }
850
851 for span in &emphasis_spans {
854 if span.start >= current_offset && span.start < current_offset + remaining.len() {
855 let pos_in_remaining = span.start - current_offset;
856 if pos_in_remaining < next_special {
857 next_special = pos_in_remaining;
858 special_type = "pulldown_emphasis";
859 pulldown_emphasis = Some(span);
860 }
861 break; }
863 }
864
865 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
867 pos < next_special
868 } else {
869 false
870 };
871
872 if should_process_markdown_link {
873 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
874
875 if pos > 0 {
877 elements.push(Element::Text(remaining[..pos].to_string()));
878 }
879
880 match pattern_type {
882 "linked_image_ii" => {
884 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
885 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
886 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
887 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
888 elements.push(Element::LinkedImage {
889 alt: alt.to_string(),
890 img_source: LinkedImageSource::Inline(img_url.to_string()),
891 link_target: LinkedImageTarget::Inline(link_url.to_string()),
892 });
893 remaining = &remaining[match_obj.end()..];
894 } else {
895 elements.push(Element::Text("[".to_string()));
896 remaining = &remaining[1..];
897 }
898 }
899 "linked_image_ri" => {
901 if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
902 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
903 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
904 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
905 elements.push(Element::LinkedImage {
906 alt: alt.to_string(),
907 img_source: LinkedImageSource::Reference(img_ref.to_string()),
908 link_target: LinkedImageTarget::Inline(link_url.to_string()),
909 });
910 remaining = &remaining[match_obj.end()..];
911 } else {
912 elements.push(Element::Text("[".to_string()));
913 remaining = &remaining[1..];
914 }
915 }
916 "linked_image_ir" => {
918 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
919 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
920 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
921 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
922 elements.push(Element::LinkedImage {
923 alt: alt.to_string(),
924 img_source: LinkedImageSource::Inline(img_url.to_string()),
925 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
926 });
927 remaining = &remaining[match_obj.end()..];
928 } else {
929 elements.push(Element::Text("[".to_string()));
930 remaining = &remaining[1..];
931 }
932 }
933 "linked_image_rr" => {
935 if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
936 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
937 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
938 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
939 elements.push(Element::LinkedImage {
940 alt: alt.to_string(),
941 img_source: LinkedImageSource::Reference(img_ref.to_string()),
942 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
943 });
944 remaining = &remaining[match_obj.end()..];
945 } else {
946 elements.push(Element::Text("[".to_string()));
947 remaining = &remaining[1..];
948 }
949 }
950 "inline_image" => {
951 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
952 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
953 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
954 elements.push(Element::InlineImage {
955 alt: alt.to_string(),
956 url: url.to_string(),
957 });
958 remaining = &remaining[match_obj.end()..];
959 } else {
960 elements.push(Element::Text("!".to_string()));
961 remaining = &remaining[1..];
962 }
963 }
964 "ref_image" => {
965 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
966 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
967 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
968
969 if reference.is_empty() {
970 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
971 } else {
972 elements.push(Element::ReferenceImage {
973 alt: alt.to_string(),
974 reference: reference.to_string(),
975 });
976 }
977 remaining = &remaining[match_obj.end()..];
978 } else {
979 elements.push(Element::Text("!".to_string()));
980 remaining = &remaining[1..];
981 }
982 }
983 "footnote_ref" => {
984 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
985 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
986 elements.push(Element::FootnoteReference { note: note.to_string() });
987 remaining = &remaining[match_obj.end()..];
988 } else {
989 elements.push(Element::Text("[".to_string()));
990 remaining = &remaining[1..];
991 }
992 }
993 "inline_link" => {
994 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
995 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
996 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
997 elements.push(Element::Link {
998 text: text.to_string(),
999 url: url.to_string(),
1000 });
1001 remaining = &remaining[match_obj.end()..];
1002 } else {
1003 elements.push(Element::Text("[".to_string()));
1005 remaining = &remaining[1..];
1006 }
1007 }
1008 "ref_link" => {
1009 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1010 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1011 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1012
1013 if reference.is_empty() {
1014 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1016 } else {
1017 elements.push(Element::ReferenceLink {
1019 text: text.to_string(),
1020 reference: reference.to_string(),
1021 });
1022 }
1023 remaining = &remaining[match_obj.end()..];
1024 } else {
1025 elements.push(Element::Text("[".to_string()));
1027 remaining = &remaining[1..];
1028 }
1029 }
1030 "shortcut_ref" => {
1031 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1032 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1033 elements.push(Element::ShortcutReference {
1034 reference: reference.to_string(),
1035 });
1036 remaining = &remaining[match_obj.end()..];
1037 } else {
1038 elements.push(Element::Text("[".to_string()));
1040 remaining = &remaining[1..];
1041 }
1042 }
1043 "wiki_link" => {
1044 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1045 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1046 elements.push(Element::WikiLink(content.to_string()));
1047 remaining = &remaining[match_obj.end()..];
1048 } else {
1049 elements.push(Element::Text("[[".to_string()));
1050 remaining = &remaining[2..];
1051 }
1052 }
1053 "display_math" => {
1054 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1055 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1056 elements.push(Element::DisplayMath(math.to_string()));
1057 remaining = &remaining[match_obj.end()..];
1058 } else {
1059 elements.push(Element::Text("$$".to_string()));
1060 remaining = &remaining[2..];
1061 }
1062 }
1063 "inline_math" => {
1064 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1065 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1066 elements.push(Element::InlineMath(math.to_string()));
1067 remaining = &remaining[match_obj.end()..];
1068 } else {
1069 elements.push(Element::Text("$".to_string()));
1070 remaining = &remaining[1..];
1071 }
1072 }
1073 "emoji" => {
1075 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1076 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1077 elements.push(Element::EmojiShortcode(emoji.to_string()));
1078 remaining = &remaining[match_obj.end()..];
1079 } else {
1080 elements.push(Element::Text(":".to_string()));
1081 remaining = &remaining[1..];
1082 }
1083 }
1084 "html_entity" => {
1085 elements.push(Element::HtmlEntity(match_obj.as_str().to_string()));
1087 remaining = &remaining[match_obj.end()..];
1088 }
1089 "hugo_shortcode" => {
1090 elements.push(Element::HugoShortcode(match_obj.as_str().to_string()));
1092 remaining = &remaining[match_obj.end()..];
1093 }
1094 "html_tag" => {
1095 elements.push(Element::HtmlTag(match_obj.as_str().to_string()));
1097 remaining = &remaining[match_obj.end()..];
1098 }
1099 _ => {
1100 elements.push(Element::Text("[".to_string()));
1102 remaining = &remaining[1..];
1103 }
1104 }
1105 } else {
1106 if next_special > 0 && next_special < remaining.len() {
1110 elements.push(Element::Text(remaining[..next_special].to_string()));
1111 remaining = &remaining[next_special..];
1112 }
1113
1114 match special_type {
1116 "code" => {
1117 if let Some(code_end) = remaining[1..].find('`') {
1119 let code = &remaining[1..1 + code_end];
1120 elements.push(Element::Code(code.to_string()));
1121 remaining = &remaining[1 + code_end + 1..];
1122 } else {
1123 elements.push(Element::Text(remaining.to_string()));
1125 break;
1126 }
1127 }
1128 "pulldown_emphasis" => {
1129 if let Some(span) = pulldown_emphasis {
1131 let span_len = span.end - span.start;
1132 if span.is_strikethrough {
1133 elements.push(Element::Strikethrough(span.content.clone()));
1134 } else if span.is_strong {
1135 elements.push(Element::Bold {
1136 content: span.content.clone(),
1137 underscore: span.uses_underscore,
1138 });
1139 } else {
1140 elements.push(Element::Italic {
1141 content: span.content.clone(),
1142 underscore: span.uses_underscore,
1143 });
1144 }
1145 remaining = &remaining[span_len..];
1146 } else {
1147 elements.push(Element::Text(remaining[..1].to_string()));
1149 remaining = &remaining[1..];
1150 }
1151 }
1152 _ => {
1153 elements.push(Element::Text(remaining.to_string()));
1155 break;
1156 }
1157 }
1158 }
1159 }
1160
1161 elements
1162}
1163
1164fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1166 let abbreviations = get_abbreviations(custom_abbreviations);
1167 let mut lines = Vec::new();
1168 let mut current_line = String::new();
1169
1170 for element in elements.iter() {
1171 let element_str = format!("{element}");
1172
1173 if let Element::Text(text) = element {
1175 let combined = format!("{current_line}{text}");
1177 let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1179
1180 if sentences.len() > 1 {
1181 for (i, sentence) in sentences.iter().enumerate() {
1183 if i == 0 {
1184 let trimmed = sentence.trim();
1187
1188 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1189 current_line = sentence.to_string();
1191 } else {
1192 lines.push(sentence.to_string());
1194 current_line.clear();
1195 }
1196 } else if i == sentences.len() - 1 {
1197 let trimmed = sentence.trim();
1199 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1200
1201 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1202 lines.push(sentence.to_string());
1204 current_line.clear();
1205 } else {
1206 current_line = sentence.to_string();
1208 }
1209 } else {
1210 lines.push(sentence.to_string());
1212 }
1213 }
1214 } else {
1215 let trimmed = combined.trim();
1217
1218 if trimmed.is_empty() {
1222 continue;
1223 }
1224
1225 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1226
1227 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1228 lines.push(trimmed.to_string());
1230 current_line.clear();
1231 } else {
1232 current_line = combined;
1234 }
1235 }
1236 } else if let Element::Italic { content, underscore } = element {
1237 let marker = if *underscore { "_" } else { "*" };
1239 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1240 } else if let Element::Bold { content, underscore } = element {
1241 let marker = if *underscore { "__" } else { "**" };
1243 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1244 } else if let Element::Strikethrough(content) = element {
1245 handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1247 } else {
1248 if !current_line.is_empty()
1251 && !current_line.ends_with(' ')
1252 && !current_line.ends_with('(')
1253 && !current_line.ends_with('[')
1254 {
1255 current_line.push(' ');
1256 }
1257 current_line.push_str(&element_str);
1258 }
1259 }
1260
1261 if !current_line.is_empty() {
1263 lines.push(current_line.trim().to_string());
1264 }
1265 lines
1266}
1267
1268fn handle_emphasis_sentence_split(
1270 content: &str,
1271 marker: &str,
1272 abbreviations: &HashSet<String>,
1273 current_line: &mut String,
1274 lines: &mut Vec<String>,
1275) {
1276 let sentences = split_into_sentences_with_set(content, abbreviations);
1278
1279 if sentences.len() <= 1 {
1280 if !current_line.is_empty()
1282 && !current_line.ends_with(' ')
1283 && !current_line.ends_with('(')
1284 && !current_line.ends_with('[')
1285 {
1286 current_line.push(' ');
1287 }
1288 current_line.push_str(marker);
1289 current_line.push_str(content);
1290 current_line.push_str(marker);
1291
1292 let trimmed = content.trim();
1294 let ends_with_punct = ends_with_sentence_punct(trimmed);
1295 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1296 lines.push(current_line.clone());
1297 current_line.clear();
1298 }
1299 } else {
1300 for (i, sentence) in sentences.iter().enumerate() {
1302 let trimmed = sentence.trim();
1303 if trimmed.is_empty() {
1304 continue;
1305 }
1306
1307 if i == 0 {
1308 if !current_line.is_empty()
1310 && !current_line.ends_with(' ')
1311 && !current_line.ends_with('(')
1312 && !current_line.ends_with('[')
1313 {
1314 current_line.push(' ');
1315 }
1316 current_line.push_str(marker);
1317 current_line.push_str(trimmed);
1318 current_line.push_str(marker);
1319
1320 let ends_with_punct = ends_with_sentence_punct(trimmed);
1322 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1323 lines.push(current_line.clone());
1324 current_line.clear();
1325 }
1326 } else if i == sentences.len() - 1 {
1327 let ends_with_punct = ends_with_sentence_punct(trimmed);
1329
1330 let mut line = String::new();
1331 line.push_str(marker);
1332 line.push_str(trimmed);
1333 line.push_str(marker);
1334
1335 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1336 lines.push(line);
1337 } else {
1338 *current_line = line;
1340 }
1341 } else {
1342 let mut line = String::new();
1344 line.push_str(marker);
1345 line.push_str(trimmed);
1346 line.push_str(marker);
1347 lines.push(line);
1348 }
1349 }
1350 }
1351}
1352
1353fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1355 let mut lines = Vec::new();
1356 let mut current_line = String::new();
1357 let mut current_length = 0;
1358
1359 for element in elements {
1360 let element_str = format!("{element}");
1361 let element_len = element.len();
1362
1363 if let Element::Text(text) = element {
1365 let has_leading_space = text.starts_with(char::is_whitespace);
1367 let words: Vec<&str> = text.split_whitespace().collect();
1369
1370 for (i, word) in words.iter().enumerate() {
1371 let word_len = word.chars().count();
1372 let is_trailing_punct = word
1374 .chars()
1375 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1376
1377 if current_length > 0 && current_length + 1 + word_len > options.line_length && !is_trailing_punct {
1378 lines.push(current_line.trim().to_string());
1380 current_line = word.to_string();
1381 current_length = word_len;
1382 } else {
1383 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1387 current_line.push(' ');
1388 current_length += 1;
1389 }
1390 current_line.push_str(word);
1391 current_length += word_len;
1392 }
1393 }
1394 } else {
1395 if current_length > 0 && current_length + 1 + element_len > options.line_length {
1398 lines.push(current_line.trim().to_string());
1400 current_line = element_str;
1401 current_length = element_len;
1402 } else {
1403 let ends_with_opener =
1406 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1407 if current_length > 0 && !ends_with_opener {
1408 current_line.push(' ');
1409 current_length += 1;
1410 }
1411 current_line.push_str(&element_str);
1412 current_length += element_len;
1413 }
1414 }
1415 }
1416
1417 if !current_line.is_empty() {
1419 lines.push(current_line.trim_end().to_string());
1420 }
1421
1422 lines
1423}
1424
1425pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1427 let lines: Vec<&str> = content.lines().collect();
1428 let mut result = Vec::new();
1429 let mut i = 0;
1430
1431 while i < lines.len() {
1432 let line = lines[i];
1433 let trimmed = line.trim();
1434
1435 if trimmed.is_empty() {
1437 result.push(String::new());
1438 i += 1;
1439 continue;
1440 }
1441
1442 if trimmed.starts_with('#') {
1444 result.push(line.to_string());
1445 i += 1;
1446 continue;
1447 }
1448
1449 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1451 result.push(line.to_string());
1452 i += 1;
1453 while i < lines.len() {
1455 result.push(lines[i].to_string());
1456 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
1457 i += 1;
1458 break;
1459 }
1460 i += 1;
1461 }
1462 continue;
1463 }
1464
1465 if ElementCache::calculate_indentation_width_default(line) >= 4 {
1467 result.push(line.to_string());
1469 i += 1;
1470 while i < lines.len() {
1471 let next_line = lines[i];
1472 if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
1474 result.push(next_line.to_string());
1475 i += 1;
1476 } else {
1477 break;
1478 }
1479 }
1480 continue;
1481 }
1482
1483 if trimmed.starts_with('>') {
1485 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
1488 let quote_prefix = line[0..gt_pos + 1].to_string();
1489 let quote_content = &line[quote_prefix.len()..].trim_start();
1490
1491 let reflowed = reflow_line(quote_content, options);
1492 for reflowed_line in reflowed.iter() {
1493 result.push(format!("{quote_prefix} {reflowed_line}"));
1494 }
1495 i += 1;
1496 continue;
1497 }
1498
1499 if is_horizontal_rule(trimmed) {
1501 result.push(line.to_string());
1502 i += 1;
1503 continue;
1504 }
1505
1506 let is_unordered_list = |s: &str, marker: char| -> bool {
1510 s.starts_with(marker) && !is_horizontal_rule(s) && (s.len() == 1 || s.chars().nth(1) == Some(' '))
1511 };
1512 if is_unordered_list(trimmed, '-')
1513 || is_unordered_list(trimmed, '*')
1514 || is_unordered_list(trimmed, '+')
1515 || is_numbered_list_item(trimmed)
1516 {
1517 let indent = line.len() - line.trim_start().len();
1519 let indent_str = " ".repeat(indent);
1520
1521 let mut marker_end = indent;
1524 let mut content_start = indent;
1525
1526 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
1527 if let Some(period_pos) = line[indent..].find('.') {
1529 marker_end = indent + period_pos + 1; content_start = marker_end;
1531 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1535 content_start += 1;
1536 }
1537 }
1538 } else {
1539 marker_end = indent + 1; content_start = marker_end;
1542 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1546 content_start += 1;
1547 }
1548 }
1549
1550 let marker = &line[indent..marker_end];
1551
1552 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
1555 i += 1;
1556
1557 while i < lines.len() {
1559 let next_line = lines[i];
1560 let next_trimmed = next_line.trim();
1561
1562 if next_trimmed.is_empty()
1564 || next_trimmed.starts_with('#')
1565 || next_trimmed.starts_with("```")
1566 || next_trimmed.starts_with("~~~")
1567 || next_trimmed.starts_with('>')
1568 || next_trimmed.starts_with('|')
1569 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1570 || is_horizontal_rule(next_trimmed)
1571 || (next_trimmed.starts_with('-')
1572 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1573 || (next_trimmed.starts_with('*')
1574 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1575 || (next_trimmed.starts_with('+')
1576 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1577 || is_numbered_list_item(next_trimmed)
1578 || is_definition_list_item(next_trimmed)
1579 {
1580 break;
1581 }
1582
1583 let next_indent = next_line.len() - next_line.trim_start().len();
1585 if next_indent >= content_start {
1586 let trimmed_start = next_line.trim_start();
1589 list_content.push(trim_preserving_hard_break(trimmed_start));
1590 i += 1;
1591 } else {
1592 break;
1594 }
1595 }
1596
1597 let combined_content = if options.preserve_breaks {
1600 list_content[0].clone()
1601 } else {
1602 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1604 if has_hard_breaks {
1605 list_content.join("\n")
1607 } else {
1608 list_content.join(" ")
1610 }
1611 };
1612
1613 let trimmed_marker = marker;
1615 let continuation_spaces = content_start;
1616
1617 let prefix_length = indent + trimmed_marker.len() + 1;
1619
1620 let adjusted_options = ReflowOptions {
1622 line_length: options.line_length.saturating_sub(prefix_length),
1623 ..options.clone()
1624 };
1625
1626 let reflowed = reflow_line(&combined_content, &adjusted_options);
1627 for (j, reflowed_line) in reflowed.iter().enumerate() {
1628 if j == 0 {
1629 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1630 } else {
1631 let continuation_indent = " ".repeat(continuation_spaces);
1633 result.push(format!("{continuation_indent}{reflowed_line}"));
1634 }
1635 }
1636 continue;
1637 }
1638
1639 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
1641 result.push(line.to_string());
1642 i += 1;
1643 continue;
1644 }
1645
1646 if trimmed.starts_with('[') && line.contains("]:") {
1648 result.push(line.to_string());
1649 i += 1;
1650 continue;
1651 }
1652
1653 if is_definition_list_item(trimmed) {
1655 result.push(line.to_string());
1656 i += 1;
1657 continue;
1658 }
1659
1660 let mut is_single_line_paragraph = true;
1662 if i + 1 < lines.len() {
1663 let next_line = lines[i + 1];
1664 let next_trimmed = next_line.trim();
1665 if !next_trimmed.is_empty()
1667 && !next_trimmed.starts_with('#')
1668 && !next_trimmed.starts_with("```")
1669 && !next_trimmed.starts_with("~~~")
1670 && !next_trimmed.starts_with('>')
1671 && !next_trimmed.starts_with('|')
1672 && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1673 && !is_horizontal_rule(next_trimmed)
1674 && !(next_trimmed.starts_with('-')
1675 && !is_horizontal_rule(next_trimmed)
1676 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1677 && !(next_trimmed.starts_with('*')
1678 && !is_horizontal_rule(next_trimmed)
1679 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1680 && !(next_trimmed.starts_with('+')
1681 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1682 && !is_numbered_list_item(next_trimmed)
1683 {
1684 is_single_line_paragraph = false;
1685 }
1686 }
1687
1688 if is_single_line_paragraph && line.chars().count() <= options.line_length {
1690 result.push(line.to_string());
1691 i += 1;
1692 continue;
1693 }
1694
1695 let mut paragraph_parts = Vec::new();
1697 let mut current_part = vec![line];
1698 i += 1;
1699
1700 if options.preserve_breaks {
1702 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1704 Some("\\")
1705 } else if line.ends_with(" ") {
1706 Some(" ")
1707 } else {
1708 None
1709 };
1710 let reflowed = reflow_line(line, options);
1711
1712 if let Some(break_marker) = hard_break_type {
1714 if !reflowed.is_empty() {
1715 let mut reflowed_with_break = reflowed;
1716 let last_idx = reflowed_with_break.len() - 1;
1717 if !has_hard_break(&reflowed_with_break[last_idx]) {
1718 reflowed_with_break[last_idx].push_str(break_marker);
1719 }
1720 result.extend(reflowed_with_break);
1721 }
1722 } else {
1723 result.extend(reflowed);
1724 }
1725 } else {
1726 while i < lines.len() {
1728 let prev_line = if !current_part.is_empty() {
1729 current_part.last().unwrap()
1730 } else {
1731 ""
1732 };
1733 let next_line = lines[i];
1734 let next_trimmed = next_line.trim();
1735
1736 if next_trimmed.is_empty()
1738 || next_trimmed.starts_with('#')
1739 || next_trimmed.starts_with("```")
1740 || next_trimmed.starts_with("~~~")
1741 || next_trimmed.starts_with('>')
1742 || next_trimmed.starts_with('|')
1743 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1744 || is_horizontal_rule(next_trimmed)
1745 || (next_trimmed.starts_with('-')
1746 && !is_horizontal_rule(next_trimmed)
1747 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1748 || (next_trimmed.starts_with('*')
1749 && !is_horizontal_rule(next_trimmed)
1750 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1751 || (next_trimmed.starts_with('+')
1752 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1753 || is_numbered_list_item(next_trimmed)
1754 || is_definition_list_item(next_trimmed)
1755 {
1756 break;
1757 }
1758
1759 let prev_trimmed = prev_line.trim();
1762 let abbreviations = get_abbreviations(&options.abbreviations);
1763 let ends_with_sentence = (prev_trimmed.ends_with('.')
1764 || prev_trimmed.ends_with('!')
1765 || prev_trimmed.ends_with('?')
1766 || prev_trimmed.ends_with(".*")
1767 || prev_trimmed.ends_with("!*")
1768 || prev_trimmed.ends_with("?*")
1769 || prev_trimmed.ends_with("._")
1770 || prev_trimmed.ends_with("!_")
1771 || prev_trimmed.ends_with("?_")
1772 || prev_trimmed.ends_with(".\"")
1774 || prev_trimmed.ends_with("!\"")
1775 || prev_trimmed.ends_with("?\"")
1776 || prev_trimmed.ends_with(".'")
1777 || prev_trimmed.ends_with("!'")
1778 || prev_trimmed.ends_with("?'")
1779 || prev_trimmed.ends_with(".\u{201D}")
1780 || prev_trimmed.ends_with("!\u{201D}")
1781 || prev_trimmed.ends_with("?\u{201D}")
1782 || prev_trimmed.ends_with(".\u{2019}")
1783 || prev_trimmed.ends_with("!\u{2019}")
1784 || prev_trimmed.ends_with("?\u{2019}"))
1785 && !text_ends_with_abbreviation(
1786 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
1787 &abbreviations,
1788 );
1789
1790 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
1791 paragraph_parts.push(current_part.join(" "));
1793 current_part = vec![next_line];
1794 } else {
1795 current_part.push(next_line);
1796 }
1797 i += 1;
1798 }
1799
1800 if !current_part.is_empty() {
1802 if current_part.len() == 1 {
1803 paragraph_parts.push(current_part[0].to_string());
1805 } else {
1806 paragraph_parts.push(current_part.join(" "));
1807 }
1808 }
1809
1810 for (j, part) in paragraph_parts.iter().enumerate() {
1812 let reflowed = reflow_line(part, options);
1813 result.extend(reflowed);
1814
1815 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
1819 let last_idx = result.len() - 1;
1820 if !has_hard_break(&result[last_idx]) {
1821 result[last_idx].push_str(" ");
1822 }
1823 }
1824 }
1825 }
1826 }
1827
1828 let result_text = result.join("\n");
1830 if content.ends_with('\n') && !result_text.ends_with('\n') {
1831 format!("{result_text}\n")
1832 } else {
1833 result_text
1834 }
1835}
1836
1837#[derive(Debug, Clone)]
1839pub struct ParagraphReflow {
1840 pub start_byte: usize,
1842 pub end_byte: usize,
1844 pub reflowed_text: String,
1846}
1847
1848pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1866 if line_number == 0 {
1867 return None;
1868 }
1869
1870 let lines: Vec<&str> = content.lines().collect();
1871
1872 if line_number > lines.len() {
1874 return None;
1875 }
1876
1877 let target_idx = line_number - 1; let target_line = lines[target_idx];
1879 let trimmed = target_line.trim();
1880
1881 if trimmed.is_empty()
1883 || trimmed.starts_with('#')
1884 || trimmed.starts_with("```")
1885 || trimmed.starts_with("~~~")
1886 || ElementCache::calculate_indentation_width_default(target_line) >= 4
1887 || trimmed.starts_with('>')
1888 || crate::utils::table_utils::TableUtils::is_potential_table_row(target_line) || (trimmed.starts_with('[') && target_line.contains("]:")) || is_horizontal_rule(trimmed)
1891 || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1892 && !is_horizontal_rule(trimmed)
1893 && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1894 || is_numbered_list_item(trimmed)
1895 || is_definition_list_item(trimmed)
1896 {
1897 return None;
1898 }
1899
1900 let mut para_start = target_idx;
1902 while para_start > 0 {
1903 let prev_idx = para_start - 1;
1904 let prev_line = lines[prev_idx];
1905 let prev_trimmed = prev_line.trim();
1906
1907 if prev_trimmed.is_empty()
1909 || prev_trimmed.starts_with('#')
1910 || prev_trimmed.starts_with("```")
1911 || prev_trimmed.starts_with("~~~")
1912 || ElementCache::calculate_indentation_width_default(prev_line) >= 4
1913 || prev_trimmed.starts_with('>')
1914 || crate::utils::table_utils::TableUtils::is_potential_table_row(prev_line)
1915 || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1916 || is_horizontal_rule(prev_trimmed)
1917 || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1918 && !is_horizontal_rule(prev_trimmed)
1919 && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1920 || is_numbered_list_item(prev_trimmed)
1921 || is_definition_list_item(prev_trimmed)
1922 {
1923 break;
1924 }
1925
1926 para_start = prev_idx;
1927 }
1928
1929 let mut para_end = target_idx;
1931 while para_end + 1 < lines.len() {
1932 let next_idx = para_end + 1;
1933 let next_line = lines[next_idx];
1934 let next_trimmed = next_line.trim();
1935
1936 if next_trimmed.is_empty()
1938 || next_trimmed.starts_with('#')
1939 || next_trimmed.starts_with("```")
1940 || next_trimmed.starts_with("~~~")
1941 || ElementCache::calculate_indentation_width_default(next_line) >= 4
1942 || next_trimmed.starts_with('>')
1943 || crate::utils::table_utils::TableUtils::is_potential_table_row(next_line)
1944 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1945 || is_horizontal_rule(next_trimmed)
1946 || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1947 && !is_horizontal_rule(next_trimmed)
1948 && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1949 || is_numbered_list_item(next_trimmed)
1950 || is_definition_list_item(next_trimmed)
1951 {
1952 break;
1953 }
1954
1955 para_end = next_idx;
1956 }
1957
1958 let paragraph_lines = &lines[para_start..=para_end];
1960
1961 let mut start_byte = 0;
1963 for line in lines.iter().take(para_start) {
1964 start_byte += line.len() + 1; }
1966
1967 let mut end_byte = start_byte;
1968 for line in paragraph_lines.iter() {
1969 end_byte += line.len() + 1; }
1971
1972 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1975
1976 if !includes_trailing_newline {
1978 end_byte -= 1;
1979 }
1980
1981 let paragraph_text = paragraph_lines.join("\n");
1983
1984 let options = ReflowOptions {
1986 line_length,
1987 break_on_sentences: true,
1988 preserve_breaks: false,
1989 sentence_per_line: false,
1990 abbreviations: None,
1991 };
1992
1993 let reflowed = reflow_markdown(¶graph_text, &options);
1995
1996 let reflowed_text = if includes_trailing_newline {
2000 if reflowed.ends_with('\n') {
2002 reflowed
2003 } else {
2004 format!("{reflowed}\n")
2005 }
2006 } else {
2007 if reflowed.ends_with('\n') {
2009 reflowed.trim_end_matches('\n').to_string()
2010 } else {
2011 reflowed
2012 }
2013 };
2014
2015 Some(ParagraphReflow {
2016 start_byte,
2017 end_byte,
2018 reflowed_text,
2019 })
2020}
2021
2022#[cfg(test)]
2023mod tests {
2024 use super::*;
2025
2026 #[test]
2031 fn test_helper_function_text_ends_with_abbreviation() {
2032 let abbreviations = get_abbreviations(&None);
2034
2035 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2037 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2038 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2039 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2040 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2041 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2042 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2043 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2044
2045 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2047 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2048 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2049 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2050 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2051 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
2057}