1use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::regex_cache::{
9 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
10 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
11 LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
12 REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
13};
14use crate::utils::sentence_utils::{
15 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
16 text_ends_with_abbreviation,
17};
18use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
19use std::collections::HashSet;
20use unicode_width::UnicodeWidthStr;
21
22#[derive(Clone, Copy, Debug, Default, PartialEq)]
24pub enum ReflowLengthMode {
25 Chars,
27 #[default]
29 Visual,
30 Bytes,
32}
33
34fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
36 match mode {
37 ReflowLengthMode::Chars => s.chars().count(),
38 ReflowLengthMode::Visual => s.width(),
39 ReflowLengthMode::Bytes => s.len(),
40 }
41}
42
43#[derive(Clone)]
45pub struct ReflowOptions {
46 pub line_length: usize,
48 pub break_on_sentences: bool,
50 pub preserve_breaks: bool,
52 pub sentence_per_line: bool,
54 pub semantic_line_breaks: bool,
56 pub abbreviations: Option<Vec<String>>,
60 pub length_mode: ReflowLengthMode,
62}
63
64impl Default for ReflowOptions {
65 fn default() -> Self {
66 Self {
67 line_length: 80,
68 break_on_sentences: true,
69 preserve_breaks: false,
70 sentence_per_line: false,
71 semantic_line_breaks: false,
72 abbreviations: None,
73 length_mode: ReflowLengthMode::default(),
74 }
75 }
76}
77
78fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
82 let chars: Vec<char> = text.chars().collect();
83
84 if pos + 1 >= chars.len() {
85 return false;
86 }
87
88 let c = chars[pos];
89 let next_char = chars[pos + 1];
90
91 if is_cjk_sentence_ending(c) {
94 let mut after_punct_pos = pos + 1;
96 while after_punct_pos < chars.len()
97 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
98 {
99 after_punct_pos += 1;
100 }
101
102 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
104 after_punct_pos += 1;
105 }
106
107 if after_punct_pos >= chars.len() {
109 return false;
110 }
111
112 while after_punct_pos < chars.len()
114 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
115 {
116 after_punct_pos += 1;
117 }
118
119 if after_punct_pos >= chars.len() {
120 return false;
121 }
122
123 return true;
126 }
127
128 if c != '.' && c != '!' && c != '?' {
130 return false;
131 }
132
133 let (_space_pos, after_space_pos) = if next_char == ' ' {
135 (pos + 1, pos + 2)
137 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
138 if chars[pos + 2] == ' ' {
140 (pos + 2, pos + 3)
142 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
143 (pos + 3, pos + 4)
145 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
146 && pos + 4 < chars.len()
147 && chars[pos + 3] == chars[pos + 2]
148 && chars[pos + 4] == ' '
149 {
150 (pos + 4, pos + 5)
152 } else {
153 return false;
154 }
155 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
156 (pos + 2, pos + 3)
158 } else if (next_char == '*' || next_char == '_')
159 && pos + 3 < chars.len()
160 && chars[pos + 2] == next_char
161 && chars[pos + 3] == ' '
162 {
163 (pos + 3, pos + 4)
165 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
166 (pos + 3, pos + 4)
168 } else {
169 return false;
170 };
171
172 let mut next_char_pos = after_space_pos;
174 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
175 next_char_pos += 1;
176 }
177
178 if next_char_pos >= chars.len() {
180 return false;
181 }
182
183 let mut first_letter_pos = next_char_pos;
185 while first_letter_pos < chars.len()
186 && (chars[first_letter_pos] == '*'
187 || chars[first_letter_pos] == '_'
188 || chars[first_letter_pos] == '~'
189 || is_opening_quote(chars[first_letter_pos]))
190 {
191 first_letter_pos += 1;
192 }
193
194 if first_letter_pos >= chars.len() {
196 return false;
197 }
198
199 let first_char = chars[first_letter_pos];
201 if !first_char.is_uppercase() && !is_cjk_char(first_char) {
202 return false;
203 }
204
205 if pos > 0 && c == '.' {
207 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
209 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
210 return false;
211 }
212
213 if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
216 return false;
217 }
218 }
219 true
220}
221
222pub fn split_into_sentences(text: &str) -> Vec<String> {
224 split_into_sentences_custom(text, &None)
225}
226
227pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
229 let abbreviations = get_abbreviations(custom_abbreviations);
230 split_into_sentences_with_set(text, &abbreviations)
231}
232
233fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
236 let mut sentences = Vec::new();
237 let mut current_sentence = String::new();
238 let mut chars = text.chars().peekable();
239 let mut pos = 0;
240
241 while let Some(c) = chars.next() {
242 current_sentence.push(c);
243
244 if is_sentence_boundary(text, pos, abbreviations) {
245 while let Some(&next) = chars.peek() {
247 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
248 current_sentence.push(chars.next().unwrap());
249 pos += 1;
250 } else {
251 break;
252 }
253 }
254
255 if chars.peek() == Some(&' ') {
257 chars.next();
258 pos += 1;
259 }
260
261 sentences.push(current_sentence.trim().to_string());
262 current_sentence.clear();
263 }
264
265 pos += 1;
266 }
267
268 if !current_sentence.trim().is_empty() {
270 sentences.push(current_sentence.trim().to_string());
271 }
272 sentences
273}
274
275fn is_horizontal_rule(line: &str) -> bool {
277 if line.len() < 3 {
278 return false;
279 }
280
281 let chars: Vec<char> = line.chars().collect();
283 if chars.is_empty() {
284 return false;
285 }
286
287 let first_char = chars[0];
288 if first_char != '-' && first_char != '_' && first_char != '*' {
289 return false;
290 }
291
292 for c in &chars {
294 if *c != first_char && *c != ' ' {
295 return false;
296 }
297 }
298
299 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
301 non_space_count >= 3
302}
303
304fn is_numbered_list_item(line: &str) -> bool {
306 let mut chars = line.chars();
307
308 if !chars.next().is_some_and(|c| c.is_numeric()) {
310 return false;
311 }
312
313 while let Some(c) = chars.next() {
315 if c == '.' {
316 return chars.next() == Some(' ');
319 }
320 if !c.is_numeric() {
321 return false;
322 }
323 }
324
325 false
326}
327
328fn is_unordered_list_marker(s: &str) -> bool {
330 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
331 && !is_horizontal_rule(s)
332 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
333}
334
335fn is_block_boundary_core(trimmed: &str) -> bool {
338 trimmed.is_empty()
339 || trimmed.starts_with('#')
340 || trimmed.starts_with("```")
341 || trimmed.starts_with("~~~")
342 || trimmed.starts_with('>')
343 || (trimmed.starts_with('[') && trimmed.contains("]:"))
344 || is_horizontal_rule(trimmed)
345 || is_unordered_list_marker(trimmed)
346 || is_numbered_list_item(trimmed)
347 || is_definition_list_item(trimmed)
348 || trimmed.starts_with(":::")
349}
350
351fn is_block_boundary(trimmed: &str) -> bool {
354 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
355}
356
357fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
361 is_block_boundary_core(trimmed)
362 || ElementCache::calculate_indentation_width_default(line) >= 4
363 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
364}
365
366fn has_hard_break(line: &str) -> bool {
372 let line = line.strip_suffix('\r').unwrap_or(line);
373 line.ends_with(" ") || line.ends_with('\\')
374}
375
376fn ends_with_sentence_punct(text: &str) -> bool {
378 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
379}
380
381fn trim_preserving_hard_break(s: &str) -> String {
387 let s = s.strip_suffix('\r').unwrap_or(s);
389
390 if s.ends_with('\\') {
392 return s.to_string();
394 }
395
396 if s.ends_with(" ") {
398 let content_end = s.trim_end().len();
400 if content_end == 0 {
401 return String::new();
403 }
404 format!("{} ", &s[..content_end])
406 } else {
407 s.trim_end().to_string()
409 }
410}
411
412pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
413 if options.sentence_per_line {
415 let elements = parse_markdown_elements(line);
416 return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
417 }
418
419 if options.semantic_line_breaks {
421 let elements = parse_markdown_elements(line);
422 return reflow_elements_semantic(&elements, options);
423 }
424
425 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
428 return vec![line.to_string()];
429 }
430
431 let elements = parse_markdown_elements(line);
433
434 reflow_elements(&elements, options)
436}
437
438#[derive(Debug, Clone)]
440enum LinkedImageSource {
441 Inline(String),
443 Reference(String),
445}
446
447#[derive(Debug, Clone)]
449enum LinkedImageTarget {
450 Inline(String),
452 Reference(String),
454}
455
456#[derive(Debug, Clone)]
458enum Element {
459 Text(String),
461 Link { text: String, url: String },
463 ReferenceLink { text: String, reference: String },
465 EmptyReferenceLink { text: String },
467 ShortcutReference { reference: String },
469 InlineImage { alt: String, url: String },
471 ReferenceImage { alt: String, reference: String },
473 EmptyReferenceImage { alt: String },
475 LinkedImage {
481 alt: String,
482 img_source: LinkedImageSource,
483 link_target: LinkedImageTarget,
484 },
485 FootnoteReference { note: String },
487 Strikethrough(String),
489 WikiLink(String),
491 InlineMath(String),
493 DisplayMath(String),
495 EmojiShortcode(String),
497 Autolink(String),
499 HtmlTag(String),
501 HtmlEntity(String),
503 HugoShortcode(String),
505 Code(String),
507 Bold {
509 content: String,
510 underscore: bool,
512 },
513 Italic {
515 content: String,
516 underscore: bool,
518 },
519}
520
521impl std::fmt::Display for Element {
522 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
523 match self {
524 Element::Text(s) => write!(f, "{s}"),
525 Element::Link { text, url } => write!(f, "[{text}]({url})"),
526 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
527 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
528 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
529 Element::InlineImage { alt, url } => write!(f, ""),
530 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
531 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
532 Element::LinkedImage {
533 alt,
534 img_source,
535 link_target,
536 } => {
537 let img_part = match img_source {
539 LinkedImageSource::Inline(url) => format!(""),
540 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
541 };
542 match link_target {
544 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
545 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
546 }
547 }
548 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
549 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
550 Element::WikiLink(s) => write!(f, "[[{s}]]"),
551 Element::InlineMath(s) => write!(f, "${s}$"),
552 Element::DisplayMath(s) => write!(f, "$${s}$$"),
553 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
554 Element::Autolink(s) => write!(f, "{s}"),
555 Element::HtmlTag(s) => write!(f, "{s}"),
556 Element::HtmlEntity(s) => write!(f, "{s}"),
557 Element::HugoShortcode(s) => write!(f, "{s}"),
558 Element::Code(s) => write!(f, "`{s}`"),
559 Element::Bold { content, underscore } => {
560 if *underscore {
561 write!(f, "__{content}__")
562 } else {
563 write!(f, "**{content}**")
564 }
565 }
566 Element::Italic { content, underscore } => {
567 if *underscore {
568 write!(f, "_{content}_")
569 } else {
570 write!(f, "*{content}*")
571 }
572 }
573 }
574 }
575}
576
577impl Element {
578 fn display_width(&self, mode: ReflowLengthMode) -> usize {
582 let formatted = format!("{self}");
583 display_len(&formatted, mode)
584 }
585}
586
587#[derive(Debug, Clone)]
589struct EmphasisSpan {
590 start: usize,
592 end: usize,
594 content: String,
596 is_strong: bool,
598 is_strikethrough: bool,
600 uses_underscore: bool,
602}
603
604fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
614 let mut spans = Vec::new();
615 let mut options = Options::empty();
616 options.insert(Options::ENABLE_STRIKETHROUGH);
617
618 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
621 let mut strikethrough_stack: Vec<usize> = Vec::new();
622
623 let parser = Parser::new_ext(text, options).into_offset_iter();
624
625 for (event, range) in parser {
626 match event {
627 Event::Start(Tag::Emphasis) => {
628 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
630 emphasis_stack.push((range.start, uses_underscore));
631 }
632 Event::End(TagEnd::Emphasis) => {
633 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
634 let content_start = start_byte + 1;
636 let content_end = range.end - 1;
637 if content_end > content_start
638 && let Some(content) = text.get(content_start..content_end)
639 {
640 spans.push(EmphasisSpan {
641 start: start_byte,
642 end: range.end,
643 content: content.to_string(),
644 is_strong: false,
645 is_strikethrough: false,
646 uses_underscore,
647 });
648 }
649 }
650 }
651 Event::Start(Tag::Strong) => {
652 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
654 strong_stack.push((range.start, uses_underscore));
655 }
656 Event::End(TagEnd::Strong) => {
657 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
658 let content_start = start_byte + 2;
660 let content_end = range.end - 2;
661 if content_end > content_start
662 && let Some(content) = text.get(content_start..content_end)
663 {
664 spans.push(EmphasisSpan {
665 start: start_byte,
666 end: range.end,
667 content: content.to_string(),
668 is_strong: true,
669 is_strikethrough: false,
670 uses_underscore,
671 });
672 }
673 }
674 }
675 Event::Start(Tag::Strikethrough) => {
676 strikethrough_stack.push(range.start);
677 }
678 Event::End(TagEnd::Strikethrough) => {
679 if let Some(start_byte) = strikethrough_stack.pop() {
680 let content_start = start_byte + 2;
682 let content_end = range.end - 2;
683 if content_end > content_start
684 && let Some(content) = text.get(content_start..content_end)
685 {
686 spans.push(EmphasisSpan {
687 start: start_byte,
688 end: range.end,
689 content: content.to_string(),
690 is_strong: false,
691 is_strikethrough: true,
692 uses_underscore: false,
693 });
694 }
695 }
696 }
697 _ => {}
698 }
699 }
700
701 spans.sort_by_key(|s| s.start);
703 spans
704}
705
706fn parse_markdown_elements(text: &str) -> Vec<Element> {
717 let mut elements = Vec::new();
718 let mut remaining = text;
719
720 let emphasis_spans = extract_emphasis_spans(text);
722
723 while !remaining.is_empty() {
724 let current_offset = text.len() - remaining.len();
726 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
728
729 if remaining.contains("[!") {
733 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
735 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
736 {
737 earliest_match = Some((m.start(), "linked_image_ii", m));
738 }
739
740 if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
742 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
743 {
744 earliest_match = Some((m.start(), "linked_image_ri", m));
745 }
746
747 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
749 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
750 {
751 earliest_match = Some((m.start(), "linked_image_ir", m));
752 }
753
754 if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
756 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
757 {
758 earliest_match = Some((m.start(), "linked_image_rr", m));
759 }
760 }
761
762 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
765 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
766 {
767 earliest_match = Some((m.start(), "inline_image", m));
768 }
769
770 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
772 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
773 {
774 earliest_match = Some((m.start(), "ref_image", m));
775 }
776
777 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
779 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
780 {
781 earliest_match = Some((m.start(), "footnote_ref", m));
782 }
783
784 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
786 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
787 {
788 earliest_match = Some((m.start(), "inline_link", m));
789 }
790
791 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
793 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
794 {
795 earliest_match = Some((m.start(), "ref_link", m));
796 }
797
798 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
801 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
802 {
803 earliest_match = Some((m.start(), "shortcut_ref", m));
804 }
805
806 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
808 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
809 {
810 earliest_match = Some((m.start(), "wiki_link", m));
811 }
812
813 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
815 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
816 {
817 earliest_match = Some((m.start(), "display_math", m));
818 }
819
820 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
822 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
823 {
824 earliest_match = Some((m.start(), "inline_math", m));
825 }
826
827 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
831 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
832 {
833 earliest_match = Some((m.start(), "emoji", m));
834 }
835
836 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
838 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
839 {
840 earliest_match = Some((m.start(), "html_entity", m));
841 }
842
843 if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
846 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
847 {
848 earliest_match = Some((m.start(), "hugo_shortcode", m));
849 }
850
851 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
854 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
855 {
856 let matched_text = &remaining[m.start()..m.end()];
858 let is_url_autolink = matched_text.starts_with("<http://")
859 || matched_text.starts_with("<https://")
860 || matched_text.starts_with("<mailto:")
861 || matched_text.starts_with("<ftp://")
862 || matched_text.starts_with("<ftps://");
863
864 let is_email_autolink = {
867 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
868 EMAIL_PATTERN.is_match(content)
869 };
870
871 if is_url_autolink || is_email_autolink {
872 earliest_match = Some((m.start(), "autolink", m));
873 } else {
874 earliest_match = Some((m.start(), "html_tag", m));
875 }
876 }
877
878 let mut next_special = remaining.len();
880 let mut special_type = "";
881 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
882
883 if let Some(pos) = remaining.find('`')
885 && pos < next_special
886 {
887 next_special = pos;
888 special_type = "code";
889 }
890
891 for span in &emphasis_spans {
894 if span.start >= current_offset && span.start < current_offset + remaining.len() {
895 let pos_in_remaining = span.start - current_offset;
896 if pos_in_remaining < next_special {
897 next_special = pos_in_remaining;
898 special_type = "pulldown_emphasis";
899 pulldown_emphasis = Some(span);
900 }
901 break; }
903 }
904
905 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
907 pos < next_special
908 } else {
909 false
910 };
911
912 if should_process_markdown_link {
913 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
914
915 if pos > 0 {
917 elements.push(Element::Text(remaining[..pos].to_string()));
918 }
919
920 match pattern_type {
922 "linked_image_ii" => {
924 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
925 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
926 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
927 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
928 elements.push(Element::LinkedImage {
929 alt: alt.to_string(),
930 img_source: LinkedImageSource::Inline(img_url.to_string()),
931 link_target: LinkedImageTarget::Inline(link_url.to_string()),
932 });
933 remaining = &remaining[match_obj.end()..];
934 } else {
935 elements.push(Element::Text("[".to_string()));
936 remaining = &remaining[1..];
937 }
938 }
939 "linked_image_ri" => {
941 if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
942 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
943 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
944 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
945 elements.push(Element::LinkedImage {
946 alt: alt.to_string(),
947 img_source: LinkedImageSource::Reference(img_ref.to_string()),
948 link_target: LinkedImageTarget::Inline(link_url.to_string()),
949 });
950 remaining = &remaining[match_obj.end()..];
951 } else {
952 elements.push(Element::Text("[".to_string()));
953 remaining = &remaining[1..];
954 }
955 }
956 "linked_image_ir" => {
958 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
959 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
960 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
961 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
962 elements.push(Element::LinkedImage {
963 alt: alt.to_string(),
964 img_source: LinkedImageSource::Inline(img_url.to_string()),
965 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
966 });
967 remaining = &remaining[match_obj.end()..];
968 } else {
969 elements.push(Element::Text("[".to_string()));
970 remaining = &remaining[1..];
971 }
972 }
973 "linked_image_rr" => {
975 if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
976 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
977 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
978 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
979 elements.push(Element::LinkedImage {
980 alt: alt.to_string(),
981 img_source: LinkedImageSource::Reference(img_ref.to_string()),
982 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
983 });
984 remaining = &remaining[match_obj.end()..];
985 } else {
986 elements.push(Element::Text("[".to_string()));
987 remaining = &remaining[1..];
988 }
989 }
990 "inline_image" => {
991 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
992 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
993 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
994 elements.push(Element::InlineImage {
995 alt: alt.to_string(),
996 url: url.to_string(),
997 });
998 remaining = &remaining[match_obj.end()..];
999 } else {
1000 elements.push(Element::Text("!".to_string()));
1001 remaining = &remaining[1..];
1002 }
1003 }
1004 "ref_image" => {
1005 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
1006 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1007 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1008
1009 if reference.is_empty() {
1010 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1011 } else {
1012 elements.push(Element::ReferenceImage {
1013 alt: alt.to_string(),
1014 reference: reference.to_string(),
1015 });
1016 }
1017 remaining = &remaining[match_obj.end()..];
1018 } else {
1019 elements.push(Element::Text("!".to_string()));
1020 remaining = &remaining[1..];
1021 }
1022 }
1023 "footnote_ref" => {
1024 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
1025 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1026 elements.push(Element::FootnoteReference { note: note.to_string() });
1027 remaining = &remaining[match_obj.end()..];
1028 } else {
1029 elements.push(Element::Text("[".to_string()));
1030 remaining = &remaining[1..];
1031 }
1032 }
1033 "inline_link" => {
1034 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1035 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1036 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1037 elements.push(Element::Link {
1038 text: text.to_string(),
1039 url: url.to_string(),
1040 });
1041 remaining = &remaining[match_obj.end()..];
1042 } else {
1043 elements.push(Element::Text("[".to_string()));
1045 remaining = &remaining[1..];
1046 }
1047 }
1048 "ref_link" => {
1049 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1050 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1051 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1052
1053 if reference.is_empty() {
1054 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1056 } else {
1057 elements.push(Element::ReferenceLink {
1059 text: text.to_string(),
1060 reference: reference.to_string(),
1061 });
1062 }
1063 remaining = &remaining[match_obj.end()..];
1064 } else {
1065 elements.push(Element::Text("[".to_string()));
1067 remaining = &remaining[1..];
1068 }
1069 }
1070 "shortcut_ref" => {
1071 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1072 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1073 elements.push(Element::ShortcutReference {
1074 reference: reference.to_string(),
1075 });
1076 remaining = &remaining[match_obj.end()..];
1077 } else {
1078 elements.push(Element::Text("[".to_string()));
1080 remaining = &remaining[1..];
1081 }
1082 }
1083 "wiki_link" => {
1084 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1085 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1086 elements.push(Element::WikiLink(content.to_string()));
1087 remaining = &remaining[match_obj.end()..];
1088 } else {
1089 elements.push(Element::Text("[[".to_string()));
1090 remaining = &remaining[2..];
1091 }
1092 }
1093 "display_math" => {
1094 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1095 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1096 elements.push(Element::DisplayMath(math.to_string()));
1097 remaining = &remaining[match_obj.end()..];
1098 } else {
1099 elements.push(Element::Text("$$".to_string()));
1100 remaining = &remaining[2..];
1101 }
1102 }
1103 "inline_math" => {
1104 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1105 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1106 elements.push(Element::InlineMath(math.to_string()));
1107 remaining = &remaining[match_obj.end()..];
1108 } else {
1109 elements.push(Element::Text("$".to_string()));
1110 remaining = &remaining[1..];
1111 }
1112 }
1113 "emoji" => {
1115 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1116 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1117 elements.push(Element::EmojiShortcode(emoji.to_string()));
1118 remaining = &remaining[match_obj.end()..];
1119 } else {
1120 elements.push(Element::Text(":".to_string()));
1121 remaining = &remaining[1..];
1122 }
1123 }
1124 "html_entity" => {
1125 elements.push(Element::HtmlEntity(match_obj.as_str().to_string()));
1127 remaining = &remaining[match_obj.end()..];
1128 }
1129 "hugo_shortcode" => {
1130 elements.push(Element::HugoShortcode(match_obj.as_str().to_string()));
1132 remaining = &remaining[match_obj.end()..];
1133 }
1134 "autolink" => {
1135 elements.push(Element::Autolink(match_obj.as_str().to_string()));
1137 remaining = &remaining[match_obj.end()..];
1138 }
1139 "html_tag" => {
1140 elements.push(Element::HtmlTag(match_obj.as_str().to_string()));
1142 remaining = &remaining[match_obj.end()..];
1143 }
1144 _ => {
1145 elements.push(Element::Text("[".to_string()));
1147 remaining = &remaining[1..];
1148 }
1149 }
1150 } else {
1151 if next_special > 0 && next_special < remaining.len() {
1155 elements.push(Element::Text(remaining[..next_special].to_string()));
1156 remaining = &remaining[next_special..];
1157 }
1158
1159 match special_type {
1161 "code" => {
1162 if let Some(code_end) = remaining[1..].find('`') {
1164 let code = &remaining[1..1 + code_end];
1165 elements.push(Element::Code(code.to_string()));
1166 remaining = &remaining[1 + code_end + 1..];
1167 } else {
1168 elements.push(Element::Text(remaining.to_string()));
1170 break;
1171 }
1172 }
1173 "pulldown_emphasis" => {
1174 if let Some(span) = pulldown_emphasis {
1176 let span_len = span.end - span.start;
1177 if span.is_strikethrough {
1178 elements.push(Element::Strikethrough(span.content.clone()));
1179 } else if span.is_strong {
1180 elements.push(Element::Bold {
1181 content: span.content.clone(),
1182 underscore: span.uses_underscore,
1183 });
1184 } else {
1185 elements.push(Element::Italic {
1186 content: span.content.clone(),
1187 underscore: span.uses_underscore,
1188 });
1189 }
1190 remaining = &remaining[span_len..];
1191 } else {
1192 elements.push(Element::Text(remaining[..1].to_string()));
1194 remaining = &remaining[1..];
1195 }
1196 }
1197 _ => {
1198 elements.push(Element::Text(remaining.to_string()));
1200 break;
1201 }
1202 }
1203 }
1204 }
1205
1206 elements
1207}
1208
1209fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1211 let abbreviations = get_abbreviations(custom_abbreviations);
1212 let mut lines = Vec::new();
1213 let mut current_line = String::new();
1214
1215 for (idx, element) in elements.iter().enumerate() {
1216 let element_str = format!("{element}");
1217
1218 if let Element::Text(text) = element {
1220 let combined = format!("{current_line}{text}");
1222 let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1224
1225 if sentences.len() > 1 {
1226 for (i, sentence) in sentences.iter().enumerate() {
1228 if i == 0 {
1229 let trimmed = sentence.trim();
1232
1233 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1234 current_line = sentence.to_string();
1236 } else {
1237 lines.push(sentence.to_string());
1239 current_line.clear();
1240 }
1241 } else if i == sentences.len() - 1 {
1242 let trimmed = sentence.trim();
1244 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1245
1246 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1247 lines.push(sentence.to_string());
1249 current_line.clear();
1250 } else {
1251 current_line = sentence.to_string();
1253 }
1254 } else {
1255 lines.push(sentence.to_string());
1257 }
1258 }
1259 } else {
1260 let trimmed = combined.trim();
1262
1263 if trimmed.is_empty() {
1267 continue;
1268 }
1269
1270 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1271
1272 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1273 lines.push(trimmed.to_string());
1275 current_line.clear();
1276 } else {
1277 current_line = combined;
1279 }
1280 }
1281 } else if let Element::Italic { content, underscore } = element {
1282 let marker = if *underscore { "_" } else { "*" };
1284 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1285 } else if let Element::Bold { content, underscore } = element {
1286 let marker = if *underscore { "__" } else { "**" };
1288 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1289 } else if let Element::Strikethrough(content) = element {
1290 handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1292 } else {
1293 let is_adjacent = if idx > 0 {
1296 match &elements[idx - 1] {
1297 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1298 _ => true,
1299 }
1300 } else {
1301 false
1302 };
1303
1304 if !is_adjacent
1306 && !current_line.is_empty()
1307 && !current_line.ends_with(' ')
1308 && !current_line.ends_with('(')
1309 && !current_line.ends_with('[')
1310 {
1311 current_line.push(' ');
1312 }
1313 current_line.push_str(&element_str);
1314 }
1315 }
1316
1317 if !current_line.is_empty() {
1319 lines.push(current_line.trim().to_string());
1320 }
1321 lines
1322}
1323
1324fn handle_emphasis_sentence_split(
1326 content: &str,
1327 marker: &str,
1328 abbreviations: &HashSet<String>,
1329 current_line: &mut String,
1330 lines: &mut Vec<String>,
1331) {
1332 let sentences = split_into_sentences_with_set(content, abbreviations);
1334
1335 if sentences.len() <= 1 {
1336 if !current_line.is_empty()
1338 && !current_line.ends_with(' ')
1339 && !current_line.ends_with('(')
1340 && !current_line.ends_with('[')
1341 {
1342 current_line.push(' ');
1343 }
1344 current_line.push_str(marker);
1345 current_line.push_str(content);
1346 current_line.push_str(marker);
1347
1348 let trimmed = content.trim();
1350 let ends_with_punct = ends_with_sentence_punct(trimmed);
1351 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1352 lines.push(current_line.clone());
1353 current_line.clear();
1354 }
1355 } else {
1356 for (i, sentence) in sentences.iter().enumerate() {
1358 let trimmed = sentence.trim();
1359 if trimmed.is_empty() {
1360 continue;
1361 }
1362
1363 if i == 0 {
1364 if !current_line.is_empty()
1366 && !current_line.ends_with(' ')
1367 && !current_line.ends_with('(')
1368 && !current_line.ends_with('[')
1369 {
1370 current_line.push(' ');
1371 }
1372 current_line.push_str(marker);
1373 current_line.push_str(trimmed);
1374 current_line.push_str(marker);
1375
1376 let ends_with_punct = ends_with_sentence_punct(trimmed);
1378 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1379 lines.push(current_line.clone());
1380 current_line.clear();
1381 }
1382 } else if i == sentences.len() - 1 {
1383 let ends_with_punct = ends_with_sentence_punct(trimmed);
1385
1386 let mut line = String::new();
1387 line.push_str(marker);
1388 line.push_str(trimmed);
1389 line.push_str(marker);
1390
1391 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1392 lines.push(line);
1393 } else {
1394 *current_line = line;
1396 }
1397 } else {
1398 let mut line = String::new();
1400 line.push_str(marker);
1401 line.push_str(trimmed);
1402 line.push_str(marker);
1403 lines.push(line);
1404 }
1405 }
1406 }
1407}
1408
1409const BREAK_WORDS: &[&str] = &[
1413 "and",
1414 "or",
1415 "but",
1416 "nor",
1417 "yet",
1418 "so",
1419 "for",
1420 "which",
1421 "that",
1422 "because",
1423 "when",
1424 "if",
1425 "while",
1426 "where",
1427 "although",
1428 "though",
1429 "unless",
1430 "since",
1431 "after",
1432 "before",
1433 "until",
1434 "as",
1435 "once",
1436 "whether",
1437 "however",
1438 "therefore",
1439 "moreover",
1440 "furthermore",
1441 "nevertheless",
1442 "whereas",
1443];
1444
1445fn is_clause_punctuation(c: char) -> bool {
1447 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1449
1450fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1454 let mut spans = Vec::new();
1455 let mut offset = 0;
1456 for element in elements {
1457 let rendered = format!("{element}");
1458 let len = rendered.len();
1459 if !matches!(element, Element::Text(_)) {
1460 spans.push((offset, offset + len));
1461 }
1462 offset += len;
1463 }
1464 spans
1465}
1466
1467fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1469 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1470}
1471
1472const MIN_SPLIT_RATIO: f64 = 0.3;
1475
1476fn split_at_clause_punctuation(
1480 text: &str,
1481 line_length: usize,
1482 element_spans: &[(usize, usize)],
1483 length_mode: ReflowLengthMode,
1484) -> Option<(String, String)> {
1485 let chars: Vec<char> = text.chars().collect();
1486 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1487
1488 let mut width_acc = 0;
1490 let mut search_end_char = 0;
1491 for (idx, &c) in chars.iter().enumerate() {
1492 let c_width = display_len(&c.to_string(), length_mode);
1493 if width_acc + c_width > line_length {
1494 break;
1495 }
1496 width_acc += c_width;
1497 search_end_char = idx + 1;
1498 }
1499
1500 let mut best_pos = None;
1501 for i in (0..search_end_char).rev() {
1502 if is_clause_punctuation(chars[i]) {
1503 let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1505 if !is_inside_element(byte_pos, element_spans) {
1506 best_pos = Some(i);
1507 break;
1508 }
1509 }
1510 }
1511
1512 let pos = best_pos?;
1513
1514 let first: String = chars[..=pos].iter().collect();
1516 let first_display_len = display_len(&first, length_mode);
1517 if first_display_len < min_first_len {
1518 return None;
1519 }
1520
1521 let rest: String = chars[pos + 1..].iter().collect();
1523 let rest = rest.trim_start().to_string();
1524
1525 if rest.is_empty() {
1526 return None;
1527 }
1528
1529 Some((first, rest))
1530}
1531
1532fn split_at_break_word(
1536 text: &str,
1537 line_length: usize,
1538 element_spans: &[(usize, usize)],
1539 length_mode: ReflowLengthMode,
1540) -> Option<(String, String)> {
1541 let lower = text.to_lowercase();
1542 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1543 let mut best_split: Option<(usize, usize)> = None; for &word in BREAK_WORDS {
1546 let mut search_start = 0;
1547 while let Some(pos) = lower[search_start..].find(word) {
1548 let abs_pos = search_start + pos;
1549
1550 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1552 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1553
1554 if preceded_by_space && followed_by_space {
1555 let first_part = text[..abs_pos].trim_end();
1557 let first_part_len = display_len(first_part, length_mode);
1558
1559 if first_part_len >= min_first_len
1560 && first_part_len <= line_length
1561 && !is_inside_element(abs_pos, element_spans)
1562 {
1563 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1565 best_split = Some((abs_pos, word.len()));
1566 }
1567 }
1568 }
1569
1570 search_start = abs_pos + word.len();
1571 }
1572 }
1573
1574 let (byte_start, _word_len) = best_split?;
1575
1576 let first = text[..byte_start].trim_end().to_string();
1577 let rest = text[byte_start..].to_string();
1578
1579 if first.is_empty() || rest.trim().is_empty() {
1580 return None;
1581 }
1582
1583 Some((first, rest))
1584}
1585
1586fn cascade_split_line(
1589 text: &str,
1590 line_length: usize,
1591 abbreviations: &Option<Vec<String>>,
1592 length_mode: ReflowLengthMode,
1593) -> Vec<String> {
1594 if line_length == 0 || display_len(text, length_mode) <= line_length {
1595 return vec![text.to_string()];
1596 }
1597
1598 let elements = parse_markdown_elements(text);
1599 let element_spans = compute_element_spans(&elements);
1600
1601 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
1603 let mut result = vec![first];
1604 result.extend(cascade_split_line(&rest, line_length, abbreviations, length_mode));
1605 return result;
1606 }
1607
1608 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
1610 let mut result = vec![first];
1611 result.extend(cascade_split_line(&rest, line_length, abbreviations, length_mode));
1612 return result;
1613 }
1614
1615 let options = ReflowOptions {
1617 line_length,
1618 break_on_sentences: false,
1619 preserve_breaks: false,
1620 sentence_per_line: false,
1621 semantic_line_breaks: false,
1622 abbreviations: abbreviations.clone(),
1623 length_mode,
1624 };
1625 reflow_elements(&elements, &options)
1626}
1627
1628fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1632 let sentence_lines = reflow_elements_sentence_per_line(elements, &options.abbreviations);
1634
1635 if options.line_length == 0 {
1638 return sentence_lines;
1639 }
1640
1641 let length_mode = options.length_mode;
1642 let mut result = Vec::new();
1643 for line in sentence_lines {
1644 if display_len(&line, length_mode) <= options.line_length {
1645 result.push(line);
1646 } else {
1647 result.extend(cascade_split_line(
1648 &line,
1649 options.line_length,
1650 &options.abbreviations,
1651 length_mode,
1652 ));
1653 }
1654 }
1655
1656 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1659 let mut merged: Vec<String> = Vec::with_capacity(result.len());
1660 for line in result {
1661 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
1662 let prev_ends_at_sentence = {
1664 let trimmed = merged.last().unwrap().trim_end();
1665 trimmed
1666 .chars()
1667 .rev()
1668 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1669 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1670 };
1671
1672 if !prev_ends_at_sentence {
1673 let prev = merged.last_mut().unwrap();
1674 let combined = format!("{prev} {line}");
1675 if display_len(&combined, length_mode) <= options.line_length + options.line_length / 10 {
1678 *prev = combined;
1679 continue;
1680 }
1681 }
1682 }
1683 merged.push(line);
1684 }
1685 merged
1686}
1687
1688fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
1696 line.char_indices()
1697 .rev()
1698 .map(|(pos, _)| pos)
1699 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
1700}
1701
1702fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1704 let mut lines = Vec::new();
1705 let mut current_line = String::new();
1706 let mut current_length = 0;
1707 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
1709 let length_mode = options.length_mode;
1710
1711 for (idx, element) in elements.iter().enumerate() {
1712 let element_str = format!("{element}");
1713 let element_len = element.display_width(length_mode);
1714
1715 let is_adjacent_to_prev = if idx > 0 {
1721 match (&elements[idx - 1], element) {
1722 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1723 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1724 _ => true,
1725 }
1726 } else {
1727 false
1728 };
1729
1730 if let Element::Text(text) = element {
1732 let has_leading_space = text.starts_with(char::is_whitespace);
1734 let words: Vec<&str> = text.split_whitespace().collect();
1736
1737 for (i, word) in words.iter().enumerate() {
1738 let word_len = display_len(word, length_mode);
1739 let is_trailing_punct = word
1741 .chars()
1742 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1743
1744 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1747
1748 if is_first_adjacent {
1749 if current_length + word_len > options.line_length && current_length > 0 {
1751 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1754 let before = current_line[..last_space].trim_end().to_string();
1755 let after = current_line[last_space + 1..].to_string();
1756 lines.push(before);
1757 current_line = format!("{after}{word}");
1758 current_length = display_len(¤t_line, length_mode);
1759 current_line_element_spans.clear();
1760 } else {
1761 current_line.push_str(word);
1762 current_length += word_len;
1763 }
1764 } else {
1765 current_line.push_str(word);
1766 current_length += word_len;
1767 }
1768 } else if current_length > 0
1769 && current_length + 1 + word_len > options.line_length
1770 && !is_trailing_punct
1771 {
1772 lines.push(current_line.trim().to_string());
1774 current_line = word.to_string();
1775 current_length = word_len;
1776 current_line_element_spans.clear();
1777 } else {
1778 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1782 current_line.push(' ');
1783 current_length += 1;
1784 }
1785 current_line.push_str(word);
1786 current_length += word_len;
1787 }
1788 }
1789 } else {
1790 if is_adjacent_to_prev {
1794 if current_length + element_len > options.line_length {
1796 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1799 let before = current_line[..last_space].trim_end().to_string();
1800 let after = current_line[last_space + 1..].to_string();
1801 lines.push(before);
1802 current_line = format!("{after}{element_str}");
1803 current_length = display_len(¤t_line, length_mode);
1804 current_line_element_spans.clear();
1805 let start = after.len();
1807 current_line_element_spans.push((start, start + element_str.len()));
1808 } else {
1809 let start = current_line.len();
1811 current_line.push_str(&element_str);
1812 current_length += element_len;
1813 current_line_element_spans.push((start, current_line.len()));
1814 }
1815 } else {
1816 let start = current_line.len();
1817 current_line.push_str(&element_str);
1818 current_length += element_len;
1819 current_line_element_spans.push((start, current_line.len()));
1820 }
1821 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
1822 lines.push(current_line.trim().to_string());
1824 current_line = element_str.clone();
1825 current_length = element_len;
1826 current_line_element_spans.clear();
1827 current_line_element_spans.push((0, element_str.len()));
1828 } else {
1829 let ends_with_opener =
1831 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1832 if current_length > 0 && !ends_with_opener {
1833 current_line.push(' ');
1834 current_length += 1;
1835 }
1836 let start = current_line.len();
1837 current_line.push_str(&element_str);
1838 current_length += element_len;
1839 current_line_element_spans.push((start, current_line.len()));
1840 }
1841 }
1842 }
1843
1844 if !current_line.is_empty() {
1846 lines.push(current_line.trim_end().to_string());
1847 }
1848
1849 lines
1850}
1851
1852pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1854 let lines: Vec<&str> = content.lines().collect();
1855 let mut result = Vec::new();
1856 let mut i = 0;
1857
1858 while i < lines.len() {
1859 let line = lines[i];
1860 let trimmed = line.trim();
1861
1862 if trimmed.is_empty() {
1864 result.push(String::new());
1865 i += 1;
1866 continue;
1867 }
1868
1869 if trimmed.starts_with('#') {
1871 result.push(line.to_string());
1872 i += 1;
1873 continue;
1874 }
1875
1876 if trimmed.starts_with(":::") {
1878 result.push(line.to_string());
1879 i += 1;
1880 continue;
1881 }
1882
1883 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1885 result.push(line.to_string());
1886 i += 1;
1887 while i < lines.len() {
1889 result.push(lines[i].to_string());
1890 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
1891 i += 1;
1892 break;
1893 }
1894 i += 1;
1895 }
1896 continue;
1897 }
1898
1899 if ElementCache::calculate_indentation_width_default(line) >= 4 {
1901 result.push(line.to_string());
1903 i += 1;
1904 while i < lines.len() {
1905 let next_line = lines[i];
1906 if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
1908 result.push(next_line.to_string());
1909 i += 1;
1910 } else {
1911 break;
1912 }
1913 }
1914 continue;
1915 }
1916
1917 if trimmed.starts_with('>') {
1919 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
1922 let quote_prefix = line[0..gt_pos + 1].to_string();
1923 let quote_content = &line[quote_prefix.len()..].trim_start();
1924
1925 let reflowed = reflow_line(quote_content, options);
1926 for reflowed_line in reflowed.iter() {
1927 result.push(format!("{quote_prefix} {reflowed_line}"));
1928 }
1929 i += 1;
1930 continue;
1931 }
1932
1933 if is_horizontal_rule(trimmed) {
1935 result.push(line.to_string());
1936 i += 1;
1937 continue;
1938 }
1939
1940 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
1942 let indent = line.len() - line.trim_start().len();
1944 let indent_str = " ".repeat(indent);
1945
1946 let mut marker_end = indent;
1949 let mut content_start = indent;
1950
1951 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
1952 if let Some(period_pos) = line[indent..].find('.') {
1954 marker_end = indent + period_pos + 1; content_start = marker_end;
1956 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1960 content_start += 1;
1961 }
1962 }
1963 } else {
1964 marker_end = indent + 1; content_start = marker_end;
1967 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1971 content_start += 1;
1972 }
1973 }
1974
1975 let marker = &line[indent..marker_end];
1976
1977 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
1980 i += 1;
1981
1982 while i < lines.len() {
1984 let next_line = lines[i];
1985 let next_trimmed = next_line.trim();
1986
1987 if is_block_boundary(next_trimmed) {
1989 break;
1990 }
1991
1992 let next_indent = next_line.len() - next_line.trim_start().len();
1994 if next_indent >= content_start {
1995 let trimmed_start = next_line.trim_start();
1998 list_content.push(trim_preserving_hard_break(trimmed_start));
1999 i += 1;
2000 } else {
2001 break;
2003 }
2004 }
2005
2006 let combined_content = if options.preserve_breaks {
2009 list_content[0].clone()
2010 } else {
2011 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2013 if has_hard_breaks {
2014 list_content.join("\n")
2016 } else {
2017 list_content.join(" ")
2019 }
2020 };
2021
2022 let trimmed_marker = marker;
2024 let continuation_spaces = content_start;
2025
2026 let prefix_length = indent + trimmed_marker.len() + 1;
2028
2029 let adjusted_options = ReflowOptions {
2031 line_length: options.line_length.saturating_sub(prefix_length),
2032 ..options.clone()
2033 };
2034
2035 let reflowed = reflow_line(&combined_content, &adjusted_options);
2036 for (j, reflowed_line) in reflowed.iter().enumerate() {
2037 if j == 0 {
2038 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2039 } else {
2040 let continuation_indent = " ".repeat(continuation_spaces);
2042 result.push(format!("{continuation_indent}{reflowed_line}"));
2043 }
2044 }
2045 continue;
2046 }
2047
2048 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2050 result.push(line.to_string());
2051 i += 1;
2052 continue;
2053 }
2054
2055 if trimmed.starts_with('[') && line.contains("]:") {
2057 result.push(line.to_string());
2058 i += 1;
2059 continue;
2060 }
2061
2062 if is_definition_list_item(trimmed) {
2064 result.push(line.to_string());
2065 i += 1;
2066 continue;
2067 }
2068
2069 let mut is_single_line_paragraph = true;
2071 if i + 1 < lines.len() {
2072 let next_trimmed = lines[i + 1].trim();
2073 if !is_block_boundary(next_trimmed) {
2075 is_single_line_paragraph = false;
2076 }
2077 }
2078
2079 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2081 result.push(line.to_string());
2082 i += 1;
2083 continue;
2084 }
2085
2086 let mut paragraph_parts = Vec::new();
2088 let mut current_part = vec![line];
2089 i += 1;
2090
2091 if options.preserve_breaks {
2093 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2095 Some("\\")
2096 } else if line.ends_with(" ") {
2097 Some(" ")
2098 } else {
2099 None
2100 };
2101 let reflowed = reflow_line(line, options);
2102
2103 if let Some(break_marker) = hard_break_type {
2105 if !reflowed.is_empty() {
2106 let mut reflowed_with_break = reflowed;
2107 let last_idx = reflowed_with_break.len() - 1;
2108 if !has_hard_break(&reflowed_with_break[last_idx]) {
2109 reflowed_with_break[last_idx].push_str(break_marker);
2110 }
2111 result.extend(reflowed_with_break);
2112 }
2113 } else {
2114 result.extend(reflowed);
2115 }
2116 } else {
2117 while i < lines.len() {
2119 let prev_line = if !current_part.is_empty() {
2120 current_part.last().unwrap()
2121 } else {
2122 ""
2123 };
2124 let next_line = lines[i];
2125 let next_trimmed = next_line.trim();
2126
2127 if is_block_boundary(next_trimmed) {
2129 break;
2130 }
2131
2132 let prev_trimmed = prev_line.trim();
2135 let abbreviations = get_abbreviations(&options.abbreviations);
2136 let ends_with_sentence = (prev_trimmed.ends_with('.')
2137 || prev_trimmed.ends_with('!')
2138 || prev_trimmed.ends_with('?')
2139 || prev_trimmed.ends_with(".*")
2140 || prev_trimmed.ends_with("!*")
2141 || prev_trimmed.ends_with("?*")
2142 || prev_trimmed.ends_with("._")
2143 || prev_trimmed.ends_with("!_")
2144 || prev_trimmed.ends_with("?_")
2145 || prev_trimmed.ends_with(".\"")
2147 || prev_trimmed.ends_with("!\"")
2148 || prev_trimmed.ends_with("?\"")
2149 || prev_trimmed.ends_with(".'")
2150 || prev_trimmed.ends_with("!'")
2151 || prev_trimmed.ends_with("?'")
2152 || prev_trimmed.ends_with(".\u{201D}")
2153 || prev_trimmed.ends_with("!\u{201D}")
2154 || prev_trimmed.ends_with("?\u{201D}")
2155 || prev_trimmed.ends_with(".\u{2019}")
2156 || prev_trimmed.ends_with("!\u{2019}")
2157 || prev_trimmed.ends_with("?\u{2019}"))
2158 && !text_ends_with_abbreviation(
2159 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2160 &abbreviations,
2161 );
2162
2163 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2164 paragraph_parts.push(current_part.join(" "));
2166 current_part = vec![next_line];
2167 } else {
2168 current_part.push(next_line);
2169 }
2170 i += 1;
2171 }
2172
2173 if !current_part.is_empty() {
2175 if current_part.len() == 1 {
2176 paragraph_parts.push(current_part[0].to_string());
2178 } else {
2179 paragraph_parts.push(current_part.join(" "));
2180 }
2181 }
2182
2183 for (j, part) in paragraph_parts.iter().enumerate() {
2185 let reflowed = reflow_line(part, options);
2186 result.extend(reflowed);
2187
2188 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2192 let last_idx = result.len() - 1;
2193 if !has_hard_break(&result[last_idx]) {
2194 result[last_idx].push_str(" ");
2195 }
2196 }
2197 }
2198 }
2199 }
2200
2201 let result_text = result.join("\n");
2203 if content.ends_with('\n') && !result_text.ends_with('\n') {
2204 format!("{result_text}\n")
2205 } else {
2206 result_text
2207 }
2208}
2209
2210#[derive(Debug, Clone)]
2212pub struct ParagraphReflow {
2213 pub start_byte: usize,
2215 pub end_byte: usize,
2217 pub reflowed_text: String,
2219}
2220
2221pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2239 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
2240}
2241
2242pub fn reflow_paragraph_at_line_with_mode(
2244 content: &str,
2245 line_number: usize,
2246 line_length: usize,
2247 length_mode: ReflowLengthMode,
2248) -> Option<ParagraphReflow> {
2249 if line_number == 0 {
2250 return None;
2251 }
2252
2253 let lines: Vec<&str> = content.lines().collect();
2254
2255 if line_number > lines.len() {
2257 return None;
2258 }
2259
2260 let target_idx = line_number - 1; let target_line = lines[target_idx];
2262 let trimmed = target_line.trim();
2263
2264 if is_paragraph_boundary(trimmed, target_line) {
2266 return None;
2267 }
2268
2269 let mut para_start = target_idx;
2271 while para_start > 0 {
2272 let prev_idx = para_start - 1;
2273 let prev_line = lines[prev_idx];
2274 let prev_trimmed = prev_line.trim();
2275
2276 if is_paragraph_boundary(prev_trimmed, prev_line) {
2278 break;
2279 }
2280
2281 para_start = prev_idx;
2282 }
2283
2284 let mut para_end = target_idx;
2286 while para_end + 1 < lines.len() {
2287 let next_idx = para_end + 1;
2288 let next_line = lines[next_idx];
2289 let next_trimmed = next_line.trim();
2290
2291 if is_paragraph_boundary(next_trimmed, next_line) {
2293 break;
2294 }
2295
2296 para_end = next_idx;
2297 }
2298
2299 let paragraph_lines = &lines[para_start..=para_end];
2301
2302 let mut start_byte = 0;
2304 for line in lines.iter().take(para_start) {
2305 start_byte += line.len() + 1; }
2307
2308 let mut end_byte = start_byte;
2309 for line in paragraph_lines.iter() {
2310 end_byte += line.len() + 1; }
2312
2313 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2316
2317 if !includes_trailing_newline {
2319 end_byte -= 1;
2320 }
2321
2322 let paragraph_text = paragraph_lines.join("\n");
2324
2325 let options = ReflowOptions {
2327 line_length,
2328 break_on_sentences: true,
2329 preserve_breaks: false,
2330 sentence_per_line: false,
2331 semantic_line_breaks: false,
2332 abbreviations: None,
2333 length_mode,
2334 };
2335
2336 let reflowed = reflow_markdown(¶graph_text, &options);
2338
2339 let reflowed_text = if includes_trailing_newline {
2343 if reflowed.ends_with('\n') {
2345 reflowed
2346 } else {
2347 format!("{reflowed}\n")
2348 }
2349 } else {
2350 if reflowed.ends_with('\n') {
2352 reflowed.trim_end_matches('\n').to_string()
2353 } else {
2354 reflowed
2355 }
2356 };
2357
2358 Some(ParagraphReflow {
2359 start_byte,
2360 end_byte,
2361 reflowed_text,
2362 })
2363}
2364
2365#[cfg(test)]
2366mod tests {
2367 use super::*;
2368
2369 #[test]
2374 fn test_helper_function_text_ends_with_abbreviation() {
2375 let abbreviations = get_abbreviations(&None);
2377
2378 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2380 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2381 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2382 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2383 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2384 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2385 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2386 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2387
2388 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2390 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2391 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2392 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2393 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2394 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
2400
2401 #[test]
2402 fn test_is_unordered_list_marker() {
2403 assert!(is_unordered_list_marker("- item"));
2405 assert!(is_unordered_list_marker("* item"));
2406 assert!(is_unordered_list_marker("+ item"));
2407 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
2409 assert!(is_unordered_list_marker("+"));
2410
2411 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
2422
2423 #[test]
2424 fn test_is_block_boundary() {
2425 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
2447 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
2450 }
2451
2452 #[test]
2453 fn test_definition_list_boundary_in_single_line_paragraph() {
2454 let options = ReflowOptions {
2457 line_length: 80,
2458 ..Default::default()
2459 };
2460 let input = "Term\n: Definition of the term";
2461 let result = reflow_markdown(input, &options);
2462 assert!(
2464 result.contains(": Definition"),
2465 "Definition list item should not be merged into previous line. Got: {result:?}"
2466 );
2467 let lines: Vec<&str> = result.lines().collect();
2468 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
2469 assert_eq!(lines[0], "Term");
2470 assert_eq!(lines[1], ": Definition of the term");
2471 }
2472
2473 #[test]
2474 fn test_is_paragraph_boundary() {
2475 assert!(is_paragraph_boundary("# Heading", "# Heading"));
2477 assert!(is_paragraph_boundary("- item", "- item"));
2478 assert!(is_paragraph_boundary(":::", ":::"));
2479 assert!(is_paragraph_boundary(": definition", ": definition"));
2480
2481 assert!(is_paragraph_boundary("code", " code"));
2483 assert!(is_paragraph_boundary("code", "\tcode"));
2484
2485 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
2487 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
2491 assert!(!is_paragraph_boundary("text", " text")); }
2493
2494 #[test]
2495 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
2496 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
2499 let result = reflow_paragraph_at_line(content, 3, 80);
2501 assert!(result.is_none(), "Div marker line should not be reflowed");
2502 }
2503}