1use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::regex_cache::{
9 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
10 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
11 LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
12 REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
13};
14use crate::utils::sentence_utils::{
15 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
16 text_ends_with_abbreviation,
17};
18use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
19use std::collections::HashSet;
20use unicode_width::UnicodeWidthStr;
21
22#[derive(Clone, Copy, Debug, Default, PartialEq)]
24pub enum ReflowLengthMode {
25 Chars,
27 #[default]
29 Visual,
30 Bytes,
32}
33
34fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
36 match mode {
37 ReflowLengthMode::Chars => s.chars().count(),
38 ReflowLengthMode::Visual => s.width(),
39 ReflowLengthMode::Bytes => s.len(),
40 }
41}
42
43#[derive(Clone)]
45pub struct ReflowOptions {
46 pub line_length: usize,
48 pub break_on_sentences: bool,
50 pub preserve_breaks: bool,
52 pub sentence_per_line: bool,
54 pub semantic_line_breaks: bool,
56 pub abbreviations: Option<Vec<String>>,
60 pub length_mode: ReflowLengthMode,
62}
63
64impl Default for ReflowOptions {
65 fn default() -> Self {
66 Self {
67 line_length: 80,
68 break_on_sentences: true,
69 preserve_breaks: false,
70 sentence_per_line: false,
71 semantic_line_breaks: false,
72 abbreviations: None,
73 length_mode: ReflowLengthMode::default(),
74 }
75 }
76}
77
78fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
82 let chars: Vec<char> = text.chars().collect();
83
84 if pos + 1 >= chars.len() {
85 return false;
86 }
87
88 let c = chars[pos];
89 let next_char = chars[pos + 1];
90
91 if is_cjk_sentence_ending(c) {
94 let mut after_punct_pos = pos + 1;
96 while after_punct_pos < chars.len()
97 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
98 {
99 after_punct_pos += 1;
100 }
101
102 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
104 after_punct_pos += 1;
105 }
106
107 if after_punct_pos >= chars.len() {
109 return false;
110 }
111
112 while after_punct_pos < chars.len()
114 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
115 {
116 after_punct_pos += 1;
117 }
118
119 if after_punct_pos >= chars.len() {
120 return false;
121 }
122
123 return true;
126 }
127
128 if c != '.' && c != '!' && c != '?' {
130 return false;
131 }
132
133 let (_space_pos, after_space_pos) = if next_char == ' ' {
135 (pos + 1, pos + 2)
137 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
138 if chars[pos + 2] == ' ' {
140 (pos + 2, pos + 3)
142 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
143 (pos + 3, pos + 4)
145 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
146 && pos + 4 < chars.len()
147 && chars[pos + 3] == chars[pos + 2]
148 && chars[pos + 4] == ' '
149 {
150 (pos + 4, pos + 5)
152 } else {
153 return false;
154 }
155 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
156 (pos + 2, pos + 3)
158 } else if (next_char == '*' || next_char == '_')
159 && pos + 3 < chars.len()
160 && chars[pos + 2] == next_char
161 && chars[pos + 3] == ' '
162 {
163 (pos + 3, pos + 4)
165 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
166 (pos + 3, pos + 4)
168 } else {
169 return false;
170 };
171
172 let mut next_char_pos = after_space_pos;
174 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
175 next_char_pos += 1;
176 }
177
178 if next_char_pos >= chars.len() {
180 return false;
181 }
182
183 let mut first_letter_pos = next_char_pos;
185 while first_letter_pos < chars.len()
186 && (chars[first_letter_pos] == '*'
187 || chars[first_letter_pos] == '_'
188 || chars[first_letter_pos] == '~'
189 || is_opening_quote(chars[first_letter_pos]))
190 {
191 first_letter_pos += 1;
192 }
193
194 if first_letter_pos >= chars.len() {
196 return false;
197 }
198
199 let first_char = chars[first_letter_pos];
201 if !first_char.is_uppercase() && !is_cjk_char(first_char) {
202 return false;
203 }
204
205 if pos > 0 && c == '.' {
207 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
209 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
210 return false;
211 }
212
213 if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
216 return false;
217 }
218 }
219 true
220}
221
222pub fn split_into_sentences(text: &str) -> Vec<String> {
224 split_into_sentences_custom(text, &None)
225}
226
227pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
229 let abbreviations = get_abbreviations(custom_abbreviations);
230 split_into_sentences_with_set(text, &abbreviations)
231}
232
233fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
236 let mut sentences = Vec::new();
237 let mut current_sentence = String::new();
238 let mut chars = text.chars().peekable();
239 let mut pos = 0;
240
241 while let Some(c) = chars.next() {
242 current_sentence.push(c);
243
244 if is_sentence_boundary(text, pos, abbreviations) {
245 while let Some(&next) = chars.peek() {
247 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
248 current_sentence.push(chars.next().unwrap());
249 pos += 1;
250 } else {
251 break;
252 }
253 }
254
255 if chars.peek() == Some(&' ') {
257 chars.next();
258 pos += 1;
259 }
260
261 sentences.push(current_sentence.trim().to_string());
262 current_sentence.clear();
263 }
264
265 pos += 1;
266 }
267
268 if !current_sentence.trim().is_empty() {
270 sentences.push(current_sentence.trim().to_string());
271 }
272 sentences
273}
274
275fn is_horizontal_rule(line: &str) -> bool {
277 if line.len() < 3 {
278 return false;
279 }
280
281 let chars: Vec<char> = line.chars().collect();
283 if chars.is_empty() {
284 return false;
285 }
286
287 let first_char = chars[0];
288 if first_char != '-' && first_char != '_' && first_char != '*' {
289 return false;
290 }
291
292 for c in &chars {
294 if *c != first_char && *c != ' ' {
295 return false;
296 }
297 }
298
299 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
301 non_space_count >= 3
302}
303
304fn is_numbered_list_item(line: &str) -> bool {
306 let mut chars = line.chars();
307
308 if !chars.next().is_some_and(|c| c.is_numeric()) {
310 return false;
311 }
312
313 while let Some(c) = chars.next() {
315 if c == '.' {
316 return chars.next() == Some(' ');
319 }
320 if !c.is_numeric() {
321 return false;
322 }
323 }
324
325 false
326}
327
328fn is_unordered_list_marker(s: &str) -> bool {
330 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
331 && !is_horizontal_rule(s)
332 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
333}
334
335fn is_block_boundary_core(trimmed: &str) -> bool {
338 trimmed.is_empty()
339 || trimmed.starts_with('#')
340 || trimmed.starts_with("```")
341 || trimmed.starts_with("~~~")
342 || trimmed.starts_with('>')
343 || (trimmed.starts_with('[') && trimmed.contains("]:"))
344 || is_horizontal_rule(trimmed)
345 || is_unordered_list_marker(trimmed)
346 || is_numbered_list_item(trimmed)
347 || is_definition_list_item(trimmed)
348 || trimmed.starts_with(":::")
349}
350
351fn is_block_boundary(trimmed: &str) -> bool {
354 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
355}
356
357fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
361 is_block_boundary_core(trimmed)
362 || ElementCache::calculate_indentation_width_default(line) >= 4
363 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
364}
365
366fn has_hard_break(line: &str) -> bool {
372 let line = line.strip_suffix('\r').unwrap_or(line);
373 line.ends_with(" ") || line.ends_with('\\')
374}
375
376fn ends_with_sentence_punct(text: &str) -> bool {
378 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
379}
380
381fn trim_preserving_hard_break(s: &str) -> String {
387 let s = s.strip_suffix('\r').unwrap_or(s);
389
390 if s.ends_with('\\') {
392 return s.to_string();
394 }
395
396 if s.ends_with(" ") {
398 let content_end = s.trim_end().len();
400 if content_end == 0 {
401 return String::new();
403 }
404 format!("{} ", &s[..content_end])
406 } else {
407 s.trim_end().to_string()
409 }
410}
411
412pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
413 if options.sentence_per_line {
415 let elements = parse_markdown_elements(line);
416 return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
417 }
418
419 if options.semantic_line_breaks {
421 let elements = parse_markdown_elements(line);
422 return reflow_elements_semantic(&elements, options);
423 }
424
425 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
428 return vec![line.to_string()];
429 }
430
431 let elements = parse_markdown_elements(line);
433
434 reflow_elements(&elements, options)
436}
437
438#[derive(Debug, Clone)]
440enum LinkedImageSource {
441 Inline(String),
443 Reference(String),
445}
446
447#[derive(Debug, Clone)]
449enum LinkedImageTarget {
450 Inline(String),
452 Reference(String),
454}
455
456#[derive(Debug, Clone)]
458enum Element {
459 Text(String),
461 Link { text: String, url: String },
463 ReferenceLink { text: String, reference: String },
465 EmptyReferenceLink { text: String },
467 ShortcutReference { reference: String },
469 InlineImage { alt: String, url: String },
471 ReferenceImage { alt: String, reference: String },
473 EmptyReferenceImage { alt: String },
475 LinkedImage {
481 alt: String,
482 img_source: LinkedImageSource,
483 link_target: LinkedImageTarget,
484 },
485 FootnoteReference { note: String },
487 Strikethrough(String),
489 WikiLink(String),
491 InlineMath(String),
493 DisplayMath(String),
495 EmojiShortcode(String),
497 Autolink(String),
499 HtmlTag(String),
501 HtmlEntity(String),
503 HugoShortcode(String),
505 Code(String),
507 Bold {
509 content: String,
510 underscore: bool,
512 },
513 Italic {
515 content: String,
516 underscore: bool,
518 },
519}
520
521impl std::fmt::Display for Element {
522 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
523 match self {
524 Element::Text(s) => write!(f, "{s}"),
525 Element::Link { text, url } => write!(f, "[{text}]({url})"),
526 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
527 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
528 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
529 Element::InlineImage { alt, url } => write!(f, ""),
530 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
531 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
532 Element::LinkedImage {
533 alt,
534 img_source,
535 link_target,
536 } => {
537 let img_part = match img_source {
539 LinkedImageSource::Inline(url) => format!(""),
540 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
541 };
542 match link_target {
544 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
545 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
546 }
547 }
548 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
549 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
550 Element::WikiLink(s) => write!(f, "[[{s}]]"),
551 Element::InlineMath(s) => write!(f, "${s}$"),
552 Element::DisplayMath(s) => write!(f, "$${s}$$"),
553 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
554 Element::Autolink(s) => write!(f, "{s}"),
555 Element::HtmlTag(s) => write!(f, "{s}"),
556 Element::HtmlEntity(s) => write!(f, "{s}"),
557 Element::HugoShortcode(s) => write!(f, "{s}"),
558 Element::Code(s) => write!(f, "`{s}`"),
559 Element::Bold { content, underscore } => {
560 if *underscore {
561 write!(f, "__{content}__")
562 } else {
563 write!(f, "**{content}**")
564 }
565 }
566 Element::Italic { content, underscore } => {
567 if *underscore {
568 write!(f, "_{content}_")
569 } else {
570 write!(f, "*{content}*")
571 }
572 }
573 }
574 }
575}
576
577impl Element {
578 fn display_width(&self, mode: ReflowLengthMode) -> usize {
582 let formatted = format!("{self}");
583 display_len(&formatted, mode)
584 }
585}
586
587#[derive(Debug, Clone)]
589struct EmphasisSpan {
590 start: usize,
592 end: usize,
594 content: String,
596 is_strong: bool,
598 is_strikethrough: bool,
600 uses_underscore: bool,
602}
603
604fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
614 let mut spans = Vec::new();
615 let mut options = Options::empty();
616 options.insert(Options::ENABLE_STRIKETHROUGH);
617
618 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
621 let mut strikethrough_stack: Vec<usize> = Vec::new();
622
623 let parser = Parser::new_ext(text, options).into_offset_iter();
624
625 for (event, range) in parser {
626 match event {
627 Event::Start(Tag::Emphasis) => {
628 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
630 emphasis_stack.push((range.start, uses_underscore));
631 }
632 Event::End(TagEnd::Emphasis) => {
633 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
634 let content_start = start_byte + 1;
636 let content_end = range.end - 1;
637 if content_end > content_start
638 && let Some(content) = text.get(content_start..content_end)
639 {
640 spans.push(EmphasisSpan {
641 start: start_byte,
642 end: range.end,
643 content: content.to_string(),
644 is_strong: false,
645 is_strikethrough: false,
646 uses_underscore,
647 });
648 }
649 }
650 }
651 Event::Start(Tag::Strong) => {
652 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
654 strong_stack.push((range.start, uses_underscore));
655 }
656 Event::End(TagEnd::Strong) => {
657 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
658 let content_start = start_byte + 2;
660 let content_end = range.end - 2;
661 if content_end > content_start
662 && let Some(content) = text.get(content_start..content_end)
663 {
664 spans.push(EmphasisSpan {
665 start: start_byte,
666 end: range.end,
667 content: content.to_string(),
668 is_strong: true,
669 is_strikethrough: false,
670 uses_underscore,
671 });
672 }
673 }
674 }
675 Event::Start(Tag::Strikethrough) => {
676 strikethrough_stack.push(range.start);
677 }
678 Event::End(TagEnd::Strikethrough) => {
679 if let Some(start_byte) = strikethrough_stack.pop() {
680 let content_start = start_byte + 2;
682 let content_end = range.end - 2;
683 if content_end > content_start
684 && let Some(content) = text.get(content_start..content_end)
685 {
686 spans.push(EmphasisSpan {
687 start: start_byte,
688 end: range.end,
689 content: content.to_string(),
690 is_strong: false,
691 is_strikethrough: true,
692 uses_underscore: false,
693 });
694 }
695 }
696 }
697 _ => {}
698 }
699 }
700
701 spans.sort_by_key(|s| s.start);
703 spans
704}
705
706fn parse_markdown_elements(text: &str) -> Vec<Element> {
717 let mut elements = Vec::new();
718 let mut remaining = text;
719
720 let emphasis_spans = extract_emphasis_spans(text);
722
723 while !remaining.is_empty() {
724 let current_offset = text.len() - remaining.len();
726 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
728
729 if remaining.contains("[!") {
733 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
735 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
736 {
737 earliest_match = Some((m.start(), "linked_image_ii", m));
738 }
739
740 if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
742 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
743 {
744 earliest_match = Some((m.start(), "linked_image_ri", m));
745 }
746
747 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
749 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
750 {
751 earliest_match = Some((m.start(), "linked_image_ir", m));
752 }
753
754 if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
756 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
757 {
758 earliest_match = Some((m.start(), "linked_image_rr", m));
759 }
760 }
761
762 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
765 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
766 {
767 earliest_match = Some((m.start(), "inline_image", m));
768 }
769
770 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
772 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
773 {
774 earliest_match = Some((m.start(), "ref_image", m));
775 }
776
777 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
779 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
780 {
781 earliest_match = Some((m.start(), "footnote_ref", m));
782 }
783
784 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
786 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
787 {
788 earliest_match = Some((m.start(), "inline_link", m));
789 }
790
791 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
793 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
794 {
795 earliest_match = Some((m.start(), "ref_link", m));
796 }
797
798 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
801 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
802 {
803 earliest_match = Some((m.start(), "shortcut_ref", m));
804 }
805
806 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
808 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
809 {
810 earliest_match = Some((m.start(), "wiki_link", m));
811 }
812
813 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
815 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
816 {
817 earliest_match = Some((m.start(), "display_math", m));
818 }
819
820 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
822 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
823 {
824 earliest_match = Some((m.start(), "inline_math", m));
825 }
826
827 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
831 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
832 {
833 earliest_match = Some((m.start(), "emoji", m));
834 }
835
836 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
838 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
839 {
840 earliest_match = Some((m.start(), "html_entity", m));
841 }
842
843 if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
846 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
847 {
848 earliest_match = Some((m.start(), "hugo_shortcode", m));
849 }
850
851 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
854 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
855 {
856 let matched_text = &remaining[m.start()..m.end()];
858 let is_url_autolink = matched_text.starts_with("<http://")
859 || matched_text.starts_with("<https://")
860 || matched_text.starts_with("<mailto:")
861 || matched_text.starts_with("<ftp://")
862 || matched_text.starts_with("<ftps://");
863
864 let is_email_autolink = {
867 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
868 EMAIL_PATTERN.is_match(content)
869 };
870
871 if is_url_autolink || is_email_autolink {
872 earliest_match = Some((m.start(), "autolink", m));
873 } else {
874 earliest_match = Some((m.start(), "html_tag", m));
875 }
876 }
877
878 let mut next_special = remaining.len();
880 let mut special_type = "";
881 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
882
883 if let Some(pos) = remaining.find('`')
885 && pos < next_special
886 {
887 next_special = pos;
888 special_type = "code";
889 }
890
891 for span in &emphasis_spans {
894 if span.start >= current_offset && span.start < current_offset + remaining.len() {
895 let pos_in_remaining = span.start - current_offset;
896 if pos_in_remaining < next_special {
897 next_special = pos_in_remaining;
898 special_type = "pulldown_emphasis";
899 pulldown_emphasis = Some(span);
900 }
901 break; }
903 }
904
905 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
907 pos < next_special
908 } else {
909 false
910 };
911
912 if should_process_markdown_link {
913 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
914
915 if pos > 0 {
917 elements.push(Element::Text(remaining[..pos].to_string()));
918 }
919
920 match pattern_type {
922 "linked_image_ii" => {
924 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
925 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
926 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
927 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
928 elements.push(Element::LinkedImage {
929 alt: alt.to_string(),
930 img_source: LinkedImageSource::Inline(img_url.to_string()),
931 link_target: LinkedImageTarget::Inline(link_url.to_string()),
932 });
933 remaining = &remaining[match_obj.end()..];
934 } else {
935 elements.push(Element::Text("[".to_string()));
936 remaining = &remaining[1..];
937 }
938 }
939 "linked_image_ri" => {
941 if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
942 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
943 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
944 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
945 elements.push(Element::LinkedImage {
946 alt: alt.to_string(),
947 img_source: LinkedImageSource::Reference(img_ref.to_string()),
948 link_target: LinkedImageTarget::Inline(link_url.to_string()),
949 });
950 remaining = &remaining[match_obj.end()..];
951 } else {
952 elements.push(Element::Text("[".to_string()));
953 remaining = &remaining[1..];
954 }
955 }
956 "linked_image_ir" => {
958 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
959 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
960 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
961 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
962 elements.push(Element::LinkedImage {
963 alt: alt.to_string(),
964 img_source: LinkedImageSource::Inline(img_url.to_string()),
965 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
966 });
967 remaining = &remaining[match_obj.end()..];
968 } else {
969 elements.push(Element::Text("[".to_string()));
970 remaining = &remaining[1..];
971 }
972 }
973 "linked_image_rr" => {
975 if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
976 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
977 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
978 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
979 elements.push(Element::LinkedImage {
980 alt: alt.to_string(),
981 img_source: LinkedImageSource::Reference(img_ref.to_string()),
982 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
983 });
984 remaining = &remaining[match_obj.end()..];
985 } else {
986 elements.push(Element::Text("[".to_string()));
987 remaining = &remaining[1..];
988 }
989 }
990 "inline_image" => {
991 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
992 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
993 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
994 elements.push(Element::InlineImage {
995 alt: alt.to_string(),
996 url: url.to_string(),
997 });
998 remaining = &remaining[match_obj.end()..];
999 } else {
1000 elements.push(Element::Text("!".to_string()));
1001 remaining = &remaining[1..];
1002 }
1003 }
1004 "ref_image" => {
1005 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
1006 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1007 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1008
1009 if reference.is_empty() {
1010 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1011 } else {
1012 elements.push(Element::ReferenceImage {
1013 alt: alt.to_string(),
1014 reference: reference.to_string(),
1015 });
1016 }
1017 remaining = &remaining[match_obj.end()..];
1018 } else {
1019 elements.push(Element::Text("!".to_string()));
1020 remaining = &remaining[1..];
1021 }
1022 }
1023 "footnote_ref" => {
1024 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
1025 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1026 elements.push(Element::FootnoteReference { note: note.to_string() });
1027 remaining = &remaining[match_obj.end()..];
1028 } else {
1029 elements.push(Element::Text("[".to_string()));
1030 remaining = &remaining[1..];
1031 }
1032 }
1033 "inline_link" => {
1034 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1035 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1036 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1037 elements.push(Element::Link {
1038 text: text.to_string(),
1039 url: url.to_string(),
1040 });
1041 remaining = &remaining[match_obj.end()..];
1042 } else {
1043 elements.push(Element::Text("[".to_string()));
1045 remaining = &remaining[1..];
1046 }
1047 }
1048 "ref_link" => {
1049 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1050 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1051 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1052
1053 if reference.is_empty() {
1054 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1056 } else {
1057 elements.push(Element::ReferenceLink {
1059 text: text.to_string(),
1060 reference: reference.to_string(),
1061 });
1062 }
1063 remaining = &remaining[match_obj.end()..];
1064 } else {
1065 elements.push(Element::Text("[".to_string()));
1067 remaining = &remaining[1..];
1068 }
1069 }
1070 "shortcut_ref" => {
1071 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1072 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1073 elements.push(Element::ShortcutReference {
1074 reference: reference.to_string(),
1075 });
1076 remaining = &remaining[match_obj.end()..];
1077 } else {
1078 elements.push(Element::Text("[".to_string()));
1080 remaining = &remaining[1..];
1081 }
1082 }
1083 "wiki_link" => {
1084 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1085 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1086 elements.push(Element::WikiLink(content.to_string()));
1087 remaining = &remaining[match_obj.end()..];
1088 } else {
1089 elements.push(Element::Text("[[".to_string()));
1090 remaining = &remaining[2..];
1091 }
1092 }
1093 "display_math" => {
1094 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1095 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1096 elements.push(Element::DisplayMath(math.to_string()));
1097 remaining = &remaining[match_obj.end()..];
1098 } else {
1099 elements.push(Element::Text("$$".to_string()));
1100 remaining = &remaining[2..];
1101 }
1102 }
1103 "inline_math" => {
1104 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1105 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1106 elements.push(Element::InlineMath(math.to_string()));
1107 remaining = &remaining[match_obj.end()..];
1108 } else {
1109 elements.push(Element::Text("$".to_string()));
1110 remaining = &remaining[1..];
1111 }
1112 }
1113 "emoji" => {
1115 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1116 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1117 elements.push(Element::EmojiShortcode(emoji.to_string()));
1118 remaining = &remaining[match_obj.end()..];
1119 } else {
1120 elements.push(Element::Text(":".to_string()));
1121 remaining = &remaining[1..];
1122 }
1123 }
1124 "html_entity" => {
1125 elements.push(Element::HtmlEntity(match_obj.as_str().to_string()));
1127 remaining = &remaining[match_obj.end()..];
1128 }
1129 "hugo_shortcode" => {
1130 elements.push(Element::HugoShortcode(match_obj.as_str().to_string()));
1132 remaining = &remaining[match_obj.end()..];
1133 }
1134 "autolink" => {
1135 elements.push(Element::Autolink(match_obj.as_str().to_string()));
1137 remaining = &remaining[match_obj.end()..];
1138 }
1139 "html_tag" => {
1140 elements.push(Element::HtmlTag(match_obj.as_str().to_string()));
1142 remaining = &remaining[match_obj.end()..];
1143 }
1144 _ => {
1145 elements.push(Element::Text("[".to_string()));
1147 remaining = &remaining[1..];
1148 }
1149 }
1150 } else {
1151 if next_special > 0 && next_special < remaining.len() {
1155 elements.push(Element::Text(remaining[..next_special].to_string()));
1156 remaining = &remaining[next_special..];
1157 }
1158
1159 match special_type {
1161 "code" => {
1162 if let Some(code_end) = remaining[1..].find('`') {
1164 let code = &remaining[1..1 + code_end];
1165 elements.push(Element::Code(code.to_string()));
1166 remaining = &remaining[1 + code_end + 1..];
1167 } else {
1168 elements.push(Element::Text(remaining.to_string()));
1170 break;
1171 }
1172 }
1173 "pulldown_emphasis" => {
1174 if let Some(span) = pulldown_emphasis {
1176 let span_len = span.end - span.start;
1177 if span.is_strikethrough {
1178 elements.push(Element::Strikethrough(span.content.clone()));
1179 } else if span.is_strong {
1180 elements.push(Element::Bold {
1181 content: span.content.clone(),
1182 underscore: span.uses_underscore,
1183 });
1184 } else {
1185 elements.push(Element::Italic {
1186 content: span.content.clone(),
1187 underscore: span.uses_underscore,
1188 });
1189 }
1190 remaining = &remaining[span_len..];
1191 } else {
1192 elements.push(Element::Text(remaining[..1].to_string()));
1194 remaining = &remaining[1..];
1195 }
1196 }
1197 _ => {
1198 elements.push(Element::Text(remaining.to_string()));
1200 break;
1201 }
1202 }
1203 }
1204 }
1205
1206 elements
1207}
1208
1209fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1211 let abbreviations = get_abbreviations(custom_abbreviations);
1212 let mut lines = Vec::new();
1213 let mut current_line = String::new();
1214
1215 for (idx, element) in elements.iter().enumerate() {
1216 let element_str = format!("{element}");
1217
1218 if let Element::Text(text) = element {
1220 let combined = format!("{current_line}{text}");
1222 let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1224
1225 if sentences.len() > 1 {
1226 for (i, sentence) in sentences.iter().enumerate() {
1228 if i == 0 {
1229 let trimmed = sentence.trim();
1232
1233 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1234 current_line = sentence.to_string();
1236 } else {
1237 lines.push(sentence.to_string());
1239 current_line.clear();
1240 }
1241 } else if i == sentences.len() - 1 {
1242 let trimmed = sentence.trim();
1244 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1245
1246 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1247 lines.push(sentence.to_string());
1249 current_line.clear();
1250 } else {
1251 current_line = sentence.to_string();
1253 }
1254 } else {
1255 lines.push(sentence.to_string());
1257 }
1258 }
1259 } else {
1260 let trimmed = combined.trim();
1262
1263 if trimmed.is_empty() {
1267 continue;
1268 }
1269
1270 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1271
1272 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1273 lines.push(trimmed.to_string());
1275 current_line.clear();
1276 } else {
1277 current_line = combined;
1279 }
1280 }
1281 } else if let Element::Italic { content, underscore } = element {
1282 let marker = if *underscore { "_" } else { "*" };
1284 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1285 } else if let Element::Bold { content, underscore } = element {
1286 let marker = if *underscore { "__" } else { "**" };
1288 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1289 } else if let Element::Strikethrough(content) = element {
1290 handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1292 } else {
1293 let is_adjacent = if idx > 0 {
1296 match &elements[idx - 1] {
1297 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1298 _ => true,
1299 }
1300 } else {
1301 false
1302 };
1303
1304 if !is_adjacent
1306 && !current_line.is_empty()
1307 && !current_line.ends_with(' ')
1308 && !current_line.ends_with('(')
1309 && !current_line.ends_with('[')
1310 {
1311 current_line.push(' ');
1312 }
1313 current_line.push_str(&element_str);
1314 }
1315 }
1316
1317 if !current_line.is_empty() {
1319 lines.push(current_line.trim().to_string());
1320 }
1321 lines
1322}
1323
1324fn handle_emphasis_sentence_split(
1326 content: &str,
1327 marker: &str,
1328 abbreviations: &HashSet<String>,
1329 current_line: &mut String,
1330 lines: &mut Vec<String>,
1331) {
1332 let sentences = split_into_sentences_with_set(content, abbreviations);
1334
1335 if sentences.len() <= 1 {
1336 if !current_line.is_empty()
1338 && !current_line.ends_with(' ')
1339 && !current_line.ends_with('(')
1340 && !current_line.ends_with('[')
1341 {
1342 current_line.push(' ');
1343 }
1344 current_line.push_str(marker);
1345 current_line.push_str(content);
1346 current_line.push_str(marker);
1347
1348 let trimmed = content.trim();
1350 let ends_with_punct = ends_with_sentence_punct(trimmed);
1351 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1352 lines.push(current_line.clone());
1353 current_line.clear();
1354 }
1355 } else {
1356 for (i, sentence) in sentences.iter().enumerate() {
1358 let trimmed = sentence.trim();
1359 if trimmed.is_empty() {
1360 continue;
1361 }
1362
1363 if i == 0 {
1364 if !current_line.is_empty()
1366 && !current_line.ends_with(' ')
1367 && !current_line.ends_with('(')
1368 && !current_line.ends_with('[')
1369 {
1370 current_line.push(' ');
1371 }
1372 current_line.push_str(marker);
1373 current_line.push_str(trimmed);
1374 current_line.push_str(marker);
1375
1376 let ends_with_punct = ends_with_sentence_punct(trimmed);
1378 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1379 lines.push(current_line.clone());
1380 current_line.clear();
1381 }
1382 } else if i == sentences.len() - 1 {
1383 let ends_with_punct = ends_with_sentence_punct(trimmed);
1385
1386 let mut line = String::new();
1387 line.push_str(marker);
1388 line.push_str(trimmed);
1389 line.push_str(marker);
1390
1391 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1392 lines.push(line);
1393 } else {
1394 *current_line = line;
1396 }
1397 } else {
1398 let mut line = String::new();
1400 line.push_str(marker);
1401 line.push_str(trimmed);
1402 line.push_str(marker);
1403 lines.push(line);
1404 }
1405 }
1406 }
1407}
1408
1409const BREAK_WORDS: &[&str] = &[
1413 "and",
1414 "or",
1415 "but",
1416 "nor",
1417 "yet",
1418 "so",
1419 "for",
1420 "which",
1421 "that",
1422 "because",
1423 "when",
1424 "if",
1425 "while",
1426 "where",
1427 "although",
1428 "though",
1429 "unless",
1430 "since",
1431 "after",
1432 "before",
1433 "until",
1434 "as",
1435 "once",
1436 "whether",
1437 "however",
1438 "therefore",
1439 "moreover",
1440 "furthermore",
1441 "nevertheless",
1442 "whereas",
1443];
1444
1445fn is_clause_punctuation(c: char) -> bool {
1447 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1449
1450fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1454 let mut spans = Vec::new();
1455 let mut offset = 0;
1456 for element in elements {
1457 let rendered = format!("{element}");
1458 let len = rendered.len();
1459 if !matches!(element, Element::Text(_)) {
1460 spans.push((offset, offset + len));
1461 }
1462 offset += len;
1463 }
1464 spans
1465}
1466
1467fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1469 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1470}
1471
1472const MIN_SPLIT_RATIO: f64 = 0.3;
1475
1476fn split_at_clause_punctuation(
1480 text: &str,
1481 line_length: usize,
1482 element_spans: &[(usize, usize)],
1483 length_mode: ReflowLengthMode,
1484) -> Option<(String, String)> {
1485 let chars: Vec<char> = text.chars().collect();
1486 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1487
1488 let mut width_acc = 0;
1490 let mut search_end_char = 0;
1491 for (idx, &c) in chars.iter().enumerate() {
1492 let c_width = display_len(&c.to_string(), length_mode);
1493 if width_acc + c_width > line_length {
1494 break;
1495 }
1496 width_acc += c_width;
1497 search_end_char = idx + 1;
1498 }
1499
1500 let mut best_pos = None;
1501 for i in (0..search_end_char).rev() {
1502 if is_clause_punctuation(chars[i]) {
1503 let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1505 if !is_inside_element(byte_pos, element_spans) {
1506 best_pos = Some(i);
1507 break;
1508 }
1509 }
1510 }
1511
1512 let pos = best_pos?;
1513
1514 let first: String = chars[..=pos].iter().collect();
1516 let first_display_len = display_len(&first, length_mode);
1517 if first_display_len < min_first_len {
1518 return None;
1519 }
1520
1521 let rest: String = chars[pos + 1..].iter().collect();
1523 let rest = rest.trim_start().to_string();
1524
1525 if rest.is_empty() {
1526 return None;
1527 }
1528
1529 Some((first, rest))
1530}
1531
1532fn split_at_break_word(
1536 text: &str,
1537 line_length: usize,
1538 element_spans: &[(usize, usize)],
1539 length_mode: ReflowLengthMode,
1540) -> Option<(String, String)> {
1541 let lower = text.to_lowercase();
1542 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1543 let mut best_split: Option<(usize, usize)> = None; for &word in BREAK_WORDS {
1546 let mut search_start = 0;
1547 while let Some(pos) = lower[search_start..].find(word) {
1548 let abs_pos = search_start + pos;
1549
1550 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1552 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1553
1554 if preceded_by_space && followed_by_space {
1555 let first_part = text[..abs_pos].trim_end();
1557 let first_part_len = display_len(first_part, length_mode);
1558
1559 if first_part_len >= min_first_len
1560 && first_part_len <= line_length
1561 && !is_inside_element(abs_pos, element_spans)
1562 {
1563 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1565 best_split = Some((abs_pos, word.len()));
1566 }
1567 }
1568 }
1569
1570 search_start = abs_pos + word.len();
1571 }
1572 }
1573
1574 let (byte_start, _word_len) = best_split?;
1575
1576 let first = text[..byte_start].trim_end().to_string();
1577 let rest = text[byte_start..].to_string();
1578
1579 if first.is_empty() || rest.trim().is_empty() {
1580 return None;
1581 }
1582
1583 Some((first, rest))
1584}
1585
1586fn cascade_split_line(
1589 text: &str,
1590 line_length: usize,
1591 abbreviations: &Option<Vec<String>>,
1592 length_mode: ReflowLengthMode,
1593) -> Vec<String> {
1594 if line_length == 0 || display_len(text, length_mode) <= line_length {
1595 return vec![text.to_string()];
1596 }
1597
1598 let elements = parse_markdown_elements(text);
1599 let element_spans = compute_element_spans(&elements);
1600
1601 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
1603 let mut result = vec![first];
1604 result.extend(cascade_split_line(&rest, line_length, abbreviations, length_mode));
1605 return result;
1606 }
1607
1608 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
1610 let mut result = vec![first];
1611 result.extend(cascade_split_line(&rest, line_length, abbreviations, length_mode));
1612 return result;
1613 }
1614
1615 let options = ReflowOptions {
1617 line_length,
1618 break_on_sentences: false,
1619 preserve_breaks: false,
1620 sentence_per_line: false,
1621 semantic_line_breaks: false,
1622 abbreviations: abbreviations.clone(),
1623 length_mode,
1624 };
1625 reflow_elements(&elements, &options)
1626}
1627
1628fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1632 let sentence_lines = reflow_elements_sentence_per_line(elements, &options.abbreviations);
1634
1635 if options.line_length == 0 {
1638 return sentence_lines;
1639 }
1640
1641 let length_mode = options.length_mode;
1642 let mut result = Vec::new();
1643 for line in sentence_lines {
1644 if display_len(&line, length_mode) <= options.line_length {
1645 result.push(line);
1646 } else {
1647 result.extend(cascade_split_line(
1648 &line,
1649 options.line_length,
1650 &options.abbreviations,
1651 length_mode,
1652 ));
1653 }
1654 }
1655
1656 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1659 let mut merged: Vec<String> = Vec::with_capacity(result.len());
1660 for line in result {
1661 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
1662 let prev_ends_at_sentence = {
1664 let trimmed = merged.last().unwrap().trim_end();
1665 trimmed
1666 .chars()
1667 .rev()
1668 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1669 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1670 };
1671
1672 if !prev_ends_at_sentence {
1673 let prev = merged.last_mut().unwrap();
1674 let combined = format!("{prev} {line}");
1675 if display_len(&combined, length_mode) <= options.line_length {
1677 *prev = combined;
1678 continue;
1679 }
1680 }
1681 }
1682 merged.push(line);
1683 }
1684 merged
1685}
1686
1687fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
1695 line.char_indices()
1696 .rev()
1697 .map(|(pos, _)| pos)
1698 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
1699}
1700
1701fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1703 let mut lines = Vec::new();
1704 let mut current_line = String::new();
1705 let mut current_length = 0;
1706 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
1708 let length_mode = options.length_mode;
1709
1710 for (idx, element) in elements.iter().enumerate() {
1711 let element_str = format!("{element}");
1712 let element_len = element.display_width(length_mode);
1713
1714 let is_adjacent_to_prev = if idx > 0 {
1720 match (&elements[idx - 1], element) {
1721 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1722 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1723 _ => true,
1724 }
1725 } else {
1726 false
1727 };
1728
1729 if let Element::Text(text) = element {
1731 let has_leading_space = text.starts_with(char::is_whitespace);
1733 let words: Vec<&str> = text.split_whitespace().collect();
1735
1736 for (i, word) in words.iter().enumerate() {
1737 let word_len = display_len(word, length_mode);
1738 let is_trailing_punct = word
1740 .chars()
1741 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1742
1743 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1746
1747 if is_first_adjacent {
1748 if current_length + word_len > options.line_length && current_length > 0 {
1750 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1753 let before = current_line[..last_space].trim_end().to_string();
1754 let after = current_line[last_space + 1..].to_string();
1755 lines.push(before);
1756 current_line = format!("{after}{word}");
1757 current_length = display_len(¤t_line, length_mode);
1758 current_line_element_spans.clear();
1759 } else {
1760 current_line.push_str(word);
1761 current_length += word_len;
1762 }
1763 } else {
1764 current_line.push_str(word);
1765 current_length += word_len;
1766 }
1767 } else if current_length > 0
1768 && current_length + 1 + word_len > options.line_length
1769 && !is_trailing_punct
1770 {
1771 lines.push(current_line.trim().to_string());
1773 current_line = word.to_string();
1774 current_length = word_len;
1775 current_line_element_spans.clear();
1776 } else {
1777 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1781 current_line.push(' ');
1782 current_length += 1;
1783 }
1784 current_line.push_str(word);
1785 current_length += word_len;
1786 }
1787 }
1788 } else {
1789 if is_adjacent_to_prev {
1793 if current_length + element_len > options.line_length {
1795 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1798 let before = current_line[..last_space].trim_end().to_string();
1799 let after = current_line[last_space + 1..].to_string();
1800 lines.push(before);
1801 current_line = format!("{after}{element_str}");
1802 current_length = display_len(¤t_line, length_mode);
1803 current_line_element_spans.clear();
1804 let start = after.len();
1806 current_line_element_spans.push((start, start + element_str.len()));
1807 } else {
1808 let start = current_line.len();
1810 current_line.push_str(&element_str);
1811 current_length += element_len;
1812 current_line_element_spans.push((start, current_line.len()));
1813 }
1814 } else {
1815 let start = current_line.len();
1816 current_line.push_str(&element_str);
1817 current_length += element_len;
1818 current_line_element_spans.push((start, current_line.len()));
1819 }
1820 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
1821 lines.push(current_line.trim().to_string());
1823 current_line = element_str.clone();
1824 current_length = element_len;
1825 current_line_element_spans.clear();
1826 current_line_element_spans.push((0, element_str.len()));
1827 } else {
1828 let ends_with_opener =
1830 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1831 if current_length > 0 && !ends_with_opener {
1832 current_line.push(' ');
1833 current_length += 1;
1834 }
1835 let start = current_line.len();
1836 current_line.push_str(&element_str);
1837 current_length += element_len;
1838 current_line_element_spans.push((start, current_line.len()));
1839 }
1840 }
1841 }
1842
1843 if !current_line.is_empty() {
1845 lines.push(current_line.trim_end().to_string());
1846 }
1847
1848 lines
1849}
1850
1851pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1853 let lines: Vec<&str> = content.lines().collect();
1854 let mut result = Vec::new();
1855 let mut i = 0;
1856
1857 while i < lines.len() {
1858 let line = lines[i];
1859 let trimmed = line.trim();
1860
1861 if trimmed.is_empty() {
1863 result.push(String::new());
1864 i += 1;
1865 continue;
1866 }
1867
1868 if trimmed.starts_with('#') {
1870 result.push(line.to_string());
1871 i += 1;
1872 continue;
1873 }
1874
1875 if trimmed.starts_with(":::") {
1877 result.push(line.to_string());
1878 i += 1;
1879 continue;
1880 }
1881
1882 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1884 result.push(line.to_string());
1885 i += 1;
1886 while i < lines.len() {
1888 result.push(lines[i].to_string());
1889 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
1890 i += 1;
1891 break;
1892 }
1893 i += 1;
1894 }
1895 continue;
1896 }
1897
1898 if ElementCache::calculate_indentation_width_default(line) >= 4 {
1900 result.push(line.to_string());
1902 i += 1;
1903 while i < lines.len() {
1904 let next_line = lines[i];
1905 if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
1907 result.push(next_line.to_string());
1908 i += 1;
1909 } else {
1910 break;
1911 }
1912 }
1913 continue;
1914 }
1915
1916 if trimmed.starts_with('>') {
1918 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
1921 let quote_prefix = line[0..gt_pos + 1].to_string();
1922 let quote_content = &line[quote_prefix.len()..].trim_start();
1923
1924 let reflowed = reflow_line(quote_content, options);
1925 for reflowed_line in reflowed.iter() {
1926 result.push(format!("{quote_prefix} {reflowed_line}"));
1927 }
1928 i += 1;
1929 continue;
1930 }
1931
1932 if is_horizontal_rule(trimmed) {
1934 result.push(line.to_string());
1935 i += 1;
1936 continue;
1937 }
1938
1939 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
1941 let indent = line.len() - line.trim_start().len();
1943 let indent_str = " ".repeat(indent);
1944
1945 let mut marker_end = indent;
1948 let mut content_start = indent;
1949
1950 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
1951 if let Some(period_pos) = line[indent..].find('.') {
1953 marker_end = indent + period_pos + 1; content_start = marker_end;
1955 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1959 content_start += 1;
1960 }
1961 }
1962 } else {
1963 marker_end = indent + 1; content_start = marker_end;
1966 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1970 content_start += 1;
1971 }
1972 }
1973
1974 let marker = &line[indent..marker_end];
1975
1976 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
1979 i += 1;
1980
1981 while i < lines.len() {
1983 let next_line = lines[i];
1984 let next_trimmed = next_line.trim();
1985
1986 if is_block_boundary(next_trimmed) {
1988 break;
1989 }
1990
1991 let next_indent = next_line.len() - next_line.trim_start().len();
1993 if next_indent >= content_start {
1994 let trimmed_start = next_line.trim_start();
1997 list_content.push(trim_preserving_hard_break(trimmed_start));
1998 i += 1;
1999 } else {
2000 break;
2002 }
2003 }
2004
2005 let combined_content = if options.preserve_breaks {
2008 list_content[0].clone()
2009 } else {
2010 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2012 if has_hard_breaks {
2013 list_content.join("\n")
2015 } else {
2016 list_content.join(" ")
2018 }
2019 };
2020
2021 let trimmed_marker = marker;
2023 let continuation_spaces = content_start;
2024
2025 let prefix_length = indent + trimmed_marker.len() + 1;
2027
2028 let adjusted_options = ReflowOptions {
2030 line_length: options.line_length.saturating_sub(prefix_length),
2031 ..options.clone()
2032 };
2033
2034 let reflowed = reflow_line(&combined_content, &adjusted_options);
2035 for (j, reflowed_line) in reflowed.iter().enumerate() {
2036 if j == 0 {
2037 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2038 } else {
2039 let continuation_indent = " ".repeat(continuation_spaces);
2041 result.push(format!("{continuation_indent}{reflowed_line}"));
2042 }
2043 }
2044 continue;
2045 }
2046
2047 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2049 result.push(line.to_string());
2050 i += 1;
2051 continue;
2052 }
2053
2054 if trimmed.starts_with('[') && line.contains("]:") {
2056 result.push(line.to_string());
2057 i += 1;
2058 continue;
2059 }
2060
2061 if is_definition_list_item(trimmed) {
2063 result.push(line.to_string());
2064 i += 1;
2065 continue;
2066 }
2067
2068 let mut is_single_line_paragraph = true;
2070 if i + 1 < lines.len() {
2071 let next_trimmed = lines[i + 1].trim();
2072 if !is_block_boundary(next_trimmed) {
2074 is_single_line_paragraph = false;
2075 }
2076 }
2077
2078 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2080 result.push(line.to_string());
2081 i += 1;
2082 continue;
2083 }
2084
2085 let mut paragraph_parts = Vec::new();
2087 let mut current_part = vec![line];
2088 i += 1;
2089
2090 if options.preserve_breaks {
2092 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2094 Some("\\")
2095 } else if line.ends_with(" ") {
2096 Some(" ")
2097 } else {
2098 None
2099 };
2100 let reflowed = reflow_line(line, options);
2101
2102 if let Some(break_marker) = hard_break_type {
2104 if !reflowed.is_empty() {
2105 let mut reflowed_with_break = reflowed;
2106 let last_idx = reflowed_with_break.len() - 1;
2107 if !has_hard_break(&reflowed_with_break[last_idx]) {
2108 reflowed_with_break[last_idx].push_str(break_marker);
2109 }
2110 result.extend(reflowed_with_break);
2111 }
2112 } else {
2113 result.extend(reflowed);
2114 }
2115 } else {
2116 while i < lines.len() {
2118 let prev_line = if !current_part.is_empty() {
2119 current_part.last().unwrap()
2120 } else {
2121 ""
2122 };
2123 let next_line = lines[i];
2124 let next_trimmed = next_line.trim();
2125
2126 if is_block_boundary(next_trimmed) {
2128 break;
2129 }
2130
2131 let prev_trimmed = prev_line.trim();
2134 let abbreviations = get_abbreviations(&options.abbreviations);
2135 let ends_with_sentence = (prev_trimmed.ends_with('.')
2136 || prev_trimmed.ends_with('!')
2137 || prev_trimmed.ends_with('?')
2138 || prev_trimmed.ends_with(".*")
2139 || prev_trimmed.ends_with("!*")
2140 || prev_trimmed.ends_with("?*")
2141 || prev_trimmed.ends_with("._")
2142 || prev_trimmed.ends_with("!_")
2143 || prev_trimmed.ends_with("?_")
2144 || prev_trimmed.ends_with(".\"")
2146 || prev_trimmed.ends_with("!\"")
2147 || prev_trimmed.ends_with("?\"")
2148 || prev_trimmed.ends_with(".'")
2149 || prev_trimmed.ends_with("!'")
2150 || prev_trimmed.ends_with("?'")
2151 || prev_trimmed.ends_with(".\u{201D}")
2152 || prev_trimmed.ends_with("!\u{201D}")
2153 || prev_trimmed.ends_with("?\u{201D}")
2154 || prev_trimmed.ends_with(".\u{2019}")
2155 || prev_trimmed.ends_with("!\u{2019}")
2156 || prev_trimmed.ends_with("?\u{2019}"))
2157 && !text_ends_with_abbreviation(
2158 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2159 &abbreviations,
2160 );
2161
2162 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2163 paragraph_parts.push(current_part.join(" "));
2165 current_part = vec![next_line];
2166 } else {
2167 current_part.push(next_line);
2168 }
2169 i += 1;
2170 }
2171
2172 if !current_part.is_empty() {
2174 if current_part.len() == 1 {
2175 paragraph_parts.push(current_part[0].to_string());
2177 } else {
2178 paragraph_parts.push(current_part.join(" "));
2179 }
2180 }
2181
2182 for (j, part) in paragraph_parts.iter().enumerate() {
2184 let reflowed = reflow_line(part, options);
2185 result.extend(reflowed);
2186
2187 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2191 let last_idx = result.len() - 1;
2192 if !has_hard_break(&result[last_idx]) {
2193 result[last_idx].push_str(" ");
2194 }
2195 }
2196 }
2197 }
2198 }
2199
2200 let result_text = result.join("\n");
2202 if content.ends_with('\n') && !result_text.ends_with('\n') {
2203 format!("{result_text}\n")
2204 } else {
2205 result_text
2206 }
2207}
2208
2209#[derive(Debug, Clone)]
2211pub struct ParagraphReflow {
2212 pub start_byte: usize,
2214 pub end_byte: usize,
2216 pub reflowed_text: String,
2218}
2219
2220pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2238 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
2239}
2240
2241pub fn reflow_paragraph_at_line_with_mode(
2243 content: &str,
2244 line_number: usize,
2245 line_length: usize,
2246 length_mode: ReflowLengthMode,
2247) -> Option<ParagraphReflow> {
2248 if line_number == 0 {
2249 return None;
2250 }
2251
2252 let lines: Vec<&str> = content.lines().collect();
2253
2254 if line_number > lines.len() {
2256 return None;
2257 }
2258
2259 let target_idx = line_number - 1; let target_line = lines[target_idx];
2261 let trimmed = target_line.trim();
2262
2263 if is_paragraph_boundary(trimmed, target_line) {
2265 return None;
2266 }
2267
2268 let mut para_start = target_idx;
2270 while para_start > 0 {
2271 let prev_idx = para_start - 1;
2272 let prev_line = lines[prev_idx];
2273 let prev_trimmed = prev_line.trim();
2274
2275 if is_paragraph_boundary(prev_trimmed, prev_line) {
2277 break;
2278 }
2279
2280 para_start = prev_idx;
2281 }
2282
2283 let mut para_end = target_idx;
2285 while para_end + 1 < lines.len() {
2286 let next_idx = para_end + 1;
2287 let next_line = lines[next_idx];
2288 let next_trimmed = next_line.trim();
2289
2290 if is_paragraph_boundary(next_trimmed, next_line) {
2292 break;
2293 }
2294
2295 para_end = next_idx;
2296 }
2297
2298 let paragraph_lines = &lines[para_start..=para_end];
2300
2301 let mut start_byte = 0;
2303 for line in lines.iter().take(para_start) {
2304 start_byte += line.len() + 1; }
2306
2307 let mut end_byte = start_byte;
2308 for line in paragraph_lines.iter() {
2309 end_byte += line.len() + 1; }
2311
2312 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2315
2316 if !includes_trailing_newline {
2318 end_byte -= 1;
2319 }
2320
2321 let paragraph_text = paragraph_lines.join("\n");
2323
2324 let options = ReflowOptions {
2326 line_length,
2327 break_on_sentences: true,
2328 preserve_breaks: false,
2329 sentence_per_line: false,
2330 semantic_line_breaks: false,
2331 abbreviations: None,
2332 length_mode,
2333 };
2334
2335 let reflowed = reflow_markdown(¶graph_text, &options);
2337
2338 let reflowed_text = if includes_trailing_newline {
2342 if reflowed.ends_with('\n') {
2344 reflowed
2345 } else {
2346 format!("{reflowed}\n")
2347 }
2348 } else {
2349 if reflowed.ends_with('\n') {
2351 reflowed.trim_end_matches('\n').to_string()
2352 } else {
2353 reflowed
2354 }
2355 };
2356
2357 Some(ParagraphReflow {
2358 start_byte,
2359 end_byte,
2360 reflowed_text,
2361 })
2362}
2363
2364#[cfg(test)]
2365mod tests {
2366 use super::*;
2367
2368 #[test]
2373 fn test_helper_function_text_ends_with_abbreviation() {
2374 let abbreviations = get_abbreviations(&None);
2376
2377 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2379 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2380 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2381 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2382 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2383 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2384 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2385 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2386
2387 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2389 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2390 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2391 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2392 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2393 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
2399
2400 #[test]
2401 fn test_is_unordered_list_marker() {
2402 assert!(is_unordered_list_marker("- item"));
2404 assert!(is_unordered_list_marker("* item"));
2405 assert!(is_unordered_list_marker("+ item"));
2406 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
2408 assert!(is_unordered_list_marker("+"));
2409
2410 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
2421
2422 #[test]
2423 fn test_is_block_boundary() {
2424 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
2446 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
2449 }
2450
2451 #[test]
2452 fn test_definition_list_boundary_in_single_line_paragraph() {
2453 let options = ReflowOptions {
2456 line_length: 80,
2457 ..Default::default()
2458 };
2459 let input = "Term\n: Definition of the term";
2460 let result = reflow_markdown(input, &options);
2461 assert!(
2463 result.contains(": Definition"),
2464 "Definition list item should not be merged into previous line. Got: {result:?}"
2465 );
2466 let lines: Vec<&str> = result.lines().collect();
2467 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
2468 assert_eq!(lines[0], "Term");
2469 assert_eq!(lines[1], ": Definition of the term");
2470 }
2471
2472 #[test]
2473 fn test_is_paragraph_boundary() {
2474 assert!(is_paragraph_boundary("# Heading", "# Heading"));
2476 assert!(is_paragraph_boundary("- item", "- item"));
2477 assert!(is_paragraph_boundary(":::", ":::"));
2478 assert!(is_paragraph_boundary(": definition", ": definition"));
2479
2480 assert!(is_paragraph_boundary("code", " code"));
2482 assert!(is_paragraph_boundary("code", "\tcode"));
2483
2484 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
2486 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
2490 assert!(!is_paragraph_boundary("text", " text")); }
2492
2493 #[test]
2494 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
2495 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
2498 let result = reflow_paragraph_at_line(content, 3, 80);
2500 assert!(result.is_none(), "Div marker line should not be reflowed");
2501 }
2502}