1use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::regex_cache::{
9 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
10 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
11 LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
12 REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
13};
14use crate::utils::sentence_utils::{
15 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
16 text_ends_with_abbreviation,
17};
18use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
19use std::collections::HashSet;
20
21#[derive(Clone)]
23pub struct ReflowOptions {
24 pub line_length: usize,
26 pub break_on_sentences: bool,
28 pub preserve_breaks: bool,
30 pub sentence_per_line: bool,
32 pub semantic_line_breaks: bool,
34 pub abbreviations: Option<Vec<String>>,
38}
39
40impl Default for ReflowOptions {
41 fn default() -> Self {
42 Self {
43 line_length: 80,
44 break_on_sentences: true,
45 preserve_breaks: false,
46 sentence_per_line: false,
47 semantic_line_breaks: false,
48 abbreviations: None,
49 }
50 }
51}
52
53fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
57 let chars: Vec<char> = text.chars().collect();
58
59 if pos + 1 >= chars.len() {
60 return false;
61 }
62
63 let c = chars[pos];
64 let next_char = chars[pos + 1];
65
66 if is_cjk_sentence_ending(c) {
69 let mut after_punct_pos = pos + 1;
71 while after_punct_pos < chars.len()
72 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
73 {
74 after_punct_pos += 1;
75 }
76
77 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
79 after_punct_pos += 1;
80 }
81
82 if after_punct_pos >= chars.len() {
84 return false;
85 }
86
87 while after_punct_pos < chars.len()
89 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
90 {
91 after_punct_pos += 1;
92 }
93
94 if after_punct_pos >= chars.len() {
95 return false;
96 }
97
98 return true;
101 }
102
103 if c != '.' && c != '!' && c != '?' {
105 return false;
106 }
107
108 let (_space_pos, after_space_pos) = if next_char == ' ' {
110 (pos + 1, pos + 2)
112 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
113 if chars[pos + 2] == ' ' {
115 (pos + 2, pos + 3)
117 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
118 (pos + 3, pos + 4)
120 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
121 && pos + 4 < chars.len()
122 && chars[pos + 3] == chars[pos + 2]
123 && chars[pos + 4] == ' '
124 {
125 (pos + 4, pos + 5)
127 } else {
128 return false;
129 }
130 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
131 (pos + 2, pos + 3)
133 } else if (next_char == '*' || next_char == '_')
134 && pos + 3 < chars.len()
135 && chars[pos + 2] == next_char
136 && chars[pos + 3] == ' '
137 {
138 (pos + 3, pos + 4)
140 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
141 (pos + 3, pos + 4)
143 } else {
144 return false;
145 };
146
147 let mut next_char_pos = after_space_pos;
149 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
150 next_char_pos += 1;
151 }
152
153 if next_char_pos >= chars.len() {
155 return false;
156 }
157
158 let mut first_letter_pos = next_char_pos;
160 while first_letter_pos < chars.len()
161 && (chars[first_letter_pos] == '*'
162 || chars[first_letter_pos] == '_'
163 || chars[first_letter_pos] == '~'
164 || is_opening_quote(chars[first_letter_pos]))
165 {
166 first_letter_pos += 1;
167 }
168
169 if first_letter_pos >= chars.len() {
171 return false;
172 }
173
174 let first_char = chars[first_letter_pos];
176 if !first_char.is_uppercase() && !is_cjk_char(first_char) {
177 return false;
178 }
179
180 if pos > 0 && c == '.' {
182 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
184 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
185 return false;
186 }
187
188 if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
191 return false;
192 }
193 }
194 true
195}
196
197pub fn split_into_sentences(text: &str) -> Vec<String> {
199 split_into_sentences_custom(text, &None)
200}
201
202pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
204 let abbreviations = get_abbreviations(custom_abbreviations);
205 split_into_sentences_with_set(text, &abbreviations)
206}
207
208fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
211 let mut sentences = Vec::new();
212 let mut current_sentence = String::new();
213 let mut chars = text.chars().peekable();
214 let mut pos = 0;
215
216 while let Some(c) = chars.next() {
217 current_sentence.push(c);
218
219 if is_sentence_boundary(text, pos, abbreviations) {
220 while let Some(&next) = chars.peek() {
222 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
223 current_sentence.push(chars.next().unwrap());
224 pos += 1;
225 } else {
226 break;
227 }
228 }
229
230 if chars.peek() == Some(&' ') {
232 chars.next();
233 pos += 1;
234 }
235
236 sentences.push(current_sentence.trim().to_string());
237 current_sentence.clear();
238 }
239
240 pos += 1;
241 }
242
243 if !current_sentence.trim().is_empty() {
245 sentences.push(current_sentence.trim().to_string());
246 }
247 sentences
248}
249
250fn is_horizontal_rule(line: &str) -> bool {
252 if line.len() < 3 {
253 return false;
254 }
255
256 let chars: Vec<char> = line.chars().collect();
258 if chars.is_empty() {
259 return false;
260 }
261
262 let first_char = chars[0];
263 if first_char != '-' && first_char != '_' && first_char != '*' {
264 return false;
265 }
266
267 for c in &chars {
269 if *c != first_char && *c != ' ' {
270 return false;
271 }
272 }
273
274 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
276 non_space_count >= 3
277}
278
279fn is_numbered_list_item(line: &str) -> bool {
281 let mut chars = line.chars();
282
283 if !chars.next().is_some_and(|c| c.is_numeric()) {
285 return false;
286 }
287
288 while let Some(c) = chars.next() {
290 if c == '.' {
291 return chars.next() == Some(' ');
294 }
295 if !c.is_numeric() {
296 return false;
297 }
298 }
299
300 false
301}
302
303fn is_unordered_list_marker(s: &str) -> bool {
305 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
306 && !is_horizontal_rule(s)
307 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
308}
309
310fn is_block_boundary_core(trimmed: &str) -> bool {
313 trimmed.is_empty()
314 || trimmed.starts_with('#')
315 || trimmed.starts_with("```")
316 || trimmed.starts_with("~~~")
317 || trimmed.starts_with('>')
318 || (trimmed.starts_with('[') && trimmed.contains("]:"))
319 || is_horizontal_rule(trimmed)
320 || is_unordered_list_marker(trimmed)
321 || is_numbered_list_item(trimmed)
322 || is_definition_list_item(trimmed)
323 || trimmed.starts_with(":::")
324}
325
326fn is_block_boundary(trimmed: &str) -> bool {
329 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
330}
331
332fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
336 is_block_boundary_core(trimmed)
337 || ElementCache::calculate_indentation_width_default(line) >= 4
338 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
339}
340
341fn has_hard_break(line: &str) -> bool {
347 let line = line.strip_suffix('\r').unwrap_or(line);
348 line.ends_with(" ") || line.ends_with('\\')
349}
350
351fn ends_with_sentence_punct(text: &str) -> bool {
353 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
354}
355
356fn trim_preserving_hard_break(s: &str) -> String {
362 let s = s.strip_suffix('\r').unwrap_or(s);
364
365 if s.ends_with('\\') {
367 return s.to_string();
369 }
370
371 if s.ends_with(" ") {
373 let content_end = s.trim_end().len();
375 if content_end == 0 {
376 return String::new();
378 }
379 format!("{} ", &s[..content_end])
381 } else {
382 s.trim_end().to_string()
384 }
385}
386
387pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
388 if options.sentence_per_line {
390 let elements = parse_markdown_elements(line);
391 return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
392 }
393
394 if options.semantic_line_breaks {
396 let elements = parse_markdown_elements(line);
397 return reflow_elements_semantic(&elements, options);
398 }
399
400 if options.line_length == 0 || line.chars().count() <= options.line_length {
403 return vec![line.to_string()];
404 }
405
406 let elements = parse_markdown_elements(line);
408
409 reflow_elements(&elements, options)
411}
412
413#[derive(Debug, Clone)]
415enum LinkedImageSource {
416 Inline(String),
418 Reference(String),
420}
421
422#[derive(Debug, Clone)]
424enum LinkedImageTarget {
425 Inline(String),
427 Reference(String),
429}
430
431#[derive(Debug, Clone)]
433enum Element {
434 Text(String),
436 Link { text: String, url: String },
438 ReferenceLink { text: String, reference: String },
440 EmptyReferenceLink { text: String },
442 ShortcutReference { reference: String },
444 InlineImage { alt: String, url: String },
446 ReferenceImage { alt: String, reference: String },
448 EmptyReferenceImage { alt: String },
450 LinkedImage {
456 alt: String,
457 img_source: LinkedImageSource,
458 link_target: LinkedImageTarget,
459 },
460 FootnoteReference { note: String },
462 Strikethrough(String),
464 WikiLink(String),
466 InlineMath(String),
468 DisplayMath(String),
470 EmojiShortcode(String),
472 HtmlTag(String),
474 HtmlEntity(String),
476 HugoShortcode(String),
478 Code(String),
480 Bold {
482 content: String,
483 underscore: bool,
485 },
486 Italic {
488 content: String,
489 underscore: bool,
491 },
492}
493
494impl std::fmt::Display for Element {
495 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
496 match self {
497 Element::Text(s) => write!(f, "{s}"),
498 Element::Link { text, url } => write!(f, "[{text}]({url})"),
499 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
500 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
501 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
502 Element::InlineImage { alt, url } => write!(f, ""),
503 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
504 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
505 Element::LinkedImage {
506 alt,
507 img_source,
508 link_target,
509 } => {
510 let img_part = match img_source {
512 LinkedImageSource::Inline(url) => format!(""),
513 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
514 };
515 match link_target {
517 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
518 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
519 }
520 }
521 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
522 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
523 Element::WikiLink(s) => write!(f, "[[{s}]]"),
524 Element::InlineMath(s) => write!(f, "${s}$"),
525 Element::DisplayMath(s) => write!(f, "$${s}$$"),
526 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
527 Element::HtmlTag(s) => write!(f, "{s}"),
528 Element::HtmlEntity(s) => write!(f, "{s}"),
529 Element::HugoShortcode(s) => write!(f, "{s}"),
530 Element::Code(s) => write!(f, "`{s}`"),
531 Element::Bold { content, underscore } => {
532 if *underscore {
533 write!(f, "__{content}__")
534 } else {
535 write!(f, "**{content}**")
536 }
537 }
538 Element::Italic { content, underscore } => {
539 if *underscore {
540 write!(f, "_{content}_")
541 } else {
542 write!(f, "*{content}*")
543 }
544 }
545 }
546 }
547}
548
549impl Element {
550 fn len(&self) -> usize {
551 match self {
552 Element::Text(s) => s.chars().count(),
553 Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, Element::EmptyReferenceLink { text } => text.chars().count() + 4, Element::ShortcutReference { reference } => reference.chars().count() + 2, Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, Element::LinkedImage {
561 alt,
562 img_source,
563 link_target,
564 } => {
565 let alt_len = alt.chars().count();
568 let img_len = match img_source {
569 LinkedImageSource::Inline(url) => url.chars().count() + 2, LinkedImageSource::Reference(r) => r.chars().count() + 2, };
572 let link_len = match link_target {
573 LinkedImageTarget::Inline(url) => url.chars().count() + 2, LinkedImageTarget::Reference(r) => r.chars().count() + 2, };
576 5 + alt_len + img_len + link_len
579 }
580 Element::FootnoteReference { note } => note.chars().count() + 3, Element::Strikethrough(s) => s.chars().count() + 4, Element::WikiLink(s) => s.chars().count() + 4, Element::InlineMath(s) => s.chars().count() + 2, Element::DisplayMath(s) => s.chars().count() + 4, Element::EmojiShortcode(s) => s.chars().count() + 2, Element::HtmlTag(s) => s.chars().count(), Element::HtmlEntity(s) => s.chars().count(), Element::HugoShortcode(s) => s.chars().count(), Element::Code(s) => s.chars().count() + 2, Element::Bold { content, .. } => content.chars().count() + 4, Element::Italic { content, .. } => content.chars().count() + 2, }
593 }
594}
595
596#[derive(Debug, Clone)]
598struct EmphasisSpan {
599 start: usize,
601 end: usize,
603 content: String,
605 is_strong: bool,
607 is_strikethrough: bool,
609 uses_underscore: bool,
611}
612
613fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
623 let mut spans = Vec::new();
624 let mut options = Options::empty();
625 options.insert(Options::ENABLE_STRIKETHROUGH);
626
627 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
630 let mut strikethrough_stack: Vec<usize> = Vec::new();
631
632 let parser = Parser::new_ext(text, options).into_offset_iter();
633
634 for (event, range) in parser {
635 match event {
636 Event::Start(Tag::Emphasis) => {
637 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
639 emphasis_stack.push((range.start, uses_underscore));
640 }
641 Event::End(TagEnd::Emphasis) => {
642 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
643 let content_start = start_byte + 1;
645 let content_end = range.end - 1;
646 if content_end > content_start
647 && let Some(content) = text.get(content_start..content_end)
648 {
649 spans.push(EmphasisSpan {
650 start: start_byte,
651 end: range.end,
652 content: content.to_string(),
653 is_strong: false,
654 is_strikethrough: false,
655 uses_underscore,
656 });
657 }
658 }
659 }
660 Event::Start(Tag::Strong) => {
661 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
663 strong_stack.push((range.start, uses_underscore));
664 }
665 Event::End(TagEnd::Strong) => {
666 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
667 let content_start = start_byte + 2;
669 let content_end = range.end - 2;
670 if content_end > content_start
671 && let Some(content) = text.get(content_start..content_end)
672 {
673 spans.push(EmphasisSpan {
674 start: start_byte,
675 end: range.end,
676 content: content.to_string(),
677 is_strong: true,
678 is_strikethrough: false,
679 uses_underscore,
680 });
681 }
682 }
683 }
684 Event::Start(Tag::Strikethrough) => {
685 strikethrough_stack.push(range.start);
686 }
687 Event::End(TagEnd::Strikethrough) => {
688 if let Some(start_byte) = strikethrough_stack.pop() {
689 let content_start = start_byte + 2;
691 let content_end = range.end - 2;
692 if content_end > content_start
693 && let Some(content) = text.get(content_start..content_end)
694 {
695 spans.push(EmphasisSpan {
696 start: start_byte,
697 end: range.end,
698 content: content.to_string(),
699 is_strong: false,
700 is_strikethrough: true,
701 uses_underscore: false,
702 });
703 }
704 }
705 }
706 _ => {}
707 }
708 }
709
710 spans.sort_by_key(|s| s.start);
712 spans
713}
714
715fn parse_markdown_elements(text: &str) -> Vec<Element> {
726 let mut elements = Vec::new();
727 let mut remaining = text;
728
729 let emphasis_spans = extract_emphasis_spans(text);
731
732 while !remaining.is_empty() {
733 let current_offset = text.len() - remaining.len();
735 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
737
738 if remaining.contains("[!") {
742 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
744 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
745 {
746 earliest_match = Some((m.start(), "linked_image_ii", m));
747 }
748
749 if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
751 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
752 {
753 earliest_match = Some((m.start(), "linked_image_ri", m));
754 }
755
756 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
758 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
759 {
760 earliest_match = Some((m.start(), "linked_image_ir", m));
761 }
762
763 if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
765 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
766 {
767 earliest_match = Some((m.start(), "linked_image_rr", m));
768 }
769 }
770
771 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
774 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
775 {
776 earliest_match = Some((m.start(), "inline_image", m));
777 }
778
779 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
781 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
782 {
783 earliest_match = Some((m.start(), "ref_image", m));
784 }
785
786 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
788 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
789 {
790 earliest_match = Some((m.start(), "footnote_ref", m));
791 }
792
793 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
795 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
796 {
797 earliest_match = Some((m.start(), "inline_link", m));
798 }
799
800 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
802 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
803 {
804 earliest_match = Some((m.start(), "ref_link", m));
805 }
806
807 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
810 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
811 {
812 earliest_match = Some((m.start(), "shortcut_ref", m));
813 }
814
815 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
817 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
818 {
819 earliest_match = Some((m.start(), "wiki_link", m));
820 }
821
822 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
824 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
825 {
826 earliest_match = Some((m.start(), "display_math", m));
827 }
828
829 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
831 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
832 {
833 earliest_match = Some((m.start(), "inline_math", m));
834 }
835
836 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
840 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
841 {
842 earliest_match = Some((m.start(), "emoji", m));
843 }
844
845 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
847 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
848 {
849 earliest_match = Some((m.start(), "html_entity", m));
850 }
851
852 if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
855 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
856 {
857 earliest_match = Some((m.start(), "hugo_shortcode", m));
858 }
859
860 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
863 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
864 {
865 let matched_text = &remaining[m.start()..m.end()];
867 let is_url_autolink = matched_text.starts_with("<http://")
868 || matched_text.starts_with("<https://")
869 || matched_text.starts_with("<mailto:")
870 || matched_text.starts_with("<ftp://")
871 || matched_text.starts_with("<ftps://");
872
873 let is_email_autolink = {
876 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
877 EMAIL_PATTERN.is_match(content)
878 };
879
880 if !is_url_autolink && !is_email_autolink {
881 earliest_match = Some((m.start(), "html_tag", m));
882 }
883 }
884
885 let mut next_special = remaining.len();
887 let mut special_type = "";
888 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
889
890 if let Some(pos) = remaining.find('`')
892 && pos < next_special
893 {
894 next_special = pos;
895 special_type = "code";
896 }
897
898 for span in &emphasis_spans {
901 if span.start >= current_offset && span.start < current_offset + remaining.len() {
902 let pos_in_remaining = span.start - current_offset;
903 if pos_in_remaining < next_special {
904 next_special = pos_in_remaining;
905 special_type = "pulldown_emphasis";
906 pulldown_emphasis = Some(span);
907 }
908 break; }
910 }
911
912 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
914 pos < next_special
915 } else {
916 false
917 };
918
919 if should_process_markdown_link {
920 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
921
922 if pos > 0 {
924 elements.push(Element::Text(remaining[..pos].to_string()));
925 }
926
927 match pattern_type {
929 "linked_image_ii" => {
931 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
932 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
933 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
934 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
935 elements.push(Element::LinkedImage {
936 alt: alt.to_string(),
937 img_source: LinkedImageSource::Inline(img_url.to_string()),
938 link_target: LinkedImageTarget::Inline(link_url.to_string()),
939 });
940 remaining = &remaining[match_obj.end()..];
941 } else {
942 elements.push(Element::Text("[".to_string()));
943 remaining = &remaining[1..];
944 }
945 }
946 "linked_image_ri" => {
948 if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
949 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
950 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
951 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
952 elements.push(Element::LinkedImage {
953 alt: alt.to_string(),
954 img_source: LinkedImageSource::Reference(img_ref.to_string()),
955 link_target: LinkedImageTarget::Inline(link_url.to_string()),
956 });
957 remaining = &remaining[match_obj.end()..];
958 } else {
959 elements.push(Element::Text("[".to_string()));
960 remaining = &remaining[1..];
961 }
962 }
963 "linked_image_ir" => {
965 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
966 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
967 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
968 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
969 elements.push(Element::LinkedImage {
970 alt: alt.to_string(),
971 img_source: LinkedImageSource::Inline(img_url.to_string()),
972 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
973 });
974 remaining = &remaining[match_obj.end()..];
975 } else {
976 elements.push(Element::Text("[".to_string()));
977 remaining = &remaining[1..];
978 }
979 }
980 "linked_image_rr" => {
982 if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
983 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
984 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
985 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
986 elements.push(Element::LinkedImage {
987 alt: alt.to_string(),
988 img_source: LinkedImageSource::Reference(img_ref.to_string()),
989 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
990 });
991 remaining = &remaining[match_obj.end()..];
992 } else {
993 elements.push(Element::Text("[".to_string()));
994 remaining = &remaining[1..];
995 }
996 }
997 "inline_image" => {
998 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
999 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1000 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1001 elements.push(Element::InlineImage {
1002 alt: alt.to_string(),
1003 url: url.to_string(),
1004 });
1005 remaining = &remaining[match_obj.end()..];
1006 } else {
1007 elements.push(Element::Text("!".to_string()));
1008 remaining = &remaining[1..];
1009 }
1010 }
1011 "ref_image" => {
1012 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
1013 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1014 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1015
1016 if reference.is_empty() {
1017 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1018 } else {
1019 elements.push(Element::ReferenceImage {
1020 alt: alt.to_string(),
1021 reference: reference.to_string(),
1022 });
1023 }
1024 remaining = &remaining[match_obj.end()..];
1025 } else {
1026 elements.push(Element::Text("!".to_string()));
1027 remaining = &remaining[1..];
1028 }
1029 }
1030 "footnote_ref" => {
1031 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
1032 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1033 elements.push(Element::FootnoteReference { note: note.to_string() });
1034 remaining = &remaining[match_obj.end()..];
1035 } else {
1036 elements.push(Element::Text("[".to_string()));
1037 remaining = &remaining[1..];
1038 }
1039 }
1040 "inline_link" => {
1041 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1042 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1043 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1044 elements.push(Element::Link {
1045 text: text.to_string(),
1046 url: url.to_string(),
1047 });
1048 remaining = &remaining[match_obj.end()..];
1049 } else {
1050 elements.push(Element::Text("[".to_string()));
1052 remaining = &remaining[1..];
1053 }
1054 }
1055 "ref_link" => {
1056 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1057 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1058 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1059
1060 if reference.is_empty() {
1061 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1063 } else {
1064 elements.push(Element::ReferenceLink {
1066 text: text.to_string(),
1067 reference: reference.to_string(),
1068 });
1069 }
1070 remaining = &remaining[match_obj.end()..];
1071 } else {
1072 elements.push(Element::Text("[".to_string()));
1074 remaining = &remaining[1..];
1075 }
1076 }
1077 "shortcut_ref" => {
1078 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1079 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1080 elements.push(Element::ShortcutReference {
1081 reference: reference.to_string(),
1082 });
1083 remaining = &remaining[match_obj.end()..];
1084 } else {
1085 elements.push(Element::Text("[".to_string()));
1087 remaining = &remaining[1..];
1088 }
1089 }
1090 "wiki_link" => {
1091 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1092 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1093 elements.push(Element::WikiLink(content.to_string()));
1094 remaining = &remaining[match_obj.end()..];
1095 } else {
1096 elements.push(Element::Text("[[".to_string()));
1097 remaining = &remaining[2..];
1098 }
1099 }
1100 "display_math" => {
1101 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1102 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1103 elements.push(Element::DisplayMath(math.to_string()));
1104 remaining = &remaining[match_obj.end()..];
1105 } else {
1106 elements.push(Element::Text("$$".to_string()));
1107 remaining = &remaining[2..];
1108 }
1109 }
1110 "inline_math" => {
1111 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1112 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1113 elements.push(Element::InlineMath(math.to_string()));
1114 remaining = &remaining[match_obj.end()..];
1115 } else {
1116 elements.push(Element::Text("$".to_string()));
1117 remaining = &remaining[1..];
1118 }
1119 }
1120 "emoji" => {
1122 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1123 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1124 elements.push(Element::EmojiShortcode(emoji.to_string()));
1125 remaining = &remaining[match_obj.end()..];
1126 } else {
1127 elements.push(Element::Text(":".to_string()));
1128 remaining = &remaining[1..];
1129 }
1130 }
1131 "html_entity" => {
1132 elements.push(Element::HtmlEntity(match_obj.as_str().to_string()));
1134 remaining = &remaining[match_obj.end()..];
1135 }
1136 "hugo_shortcode" => {
1137 elements.push(Element::HugoShortcode(match_obj.as_str().to_string()));
1139 remaining = &remaining[match_obj.end()..];
1140 }
1141 "html_tag" => {
1142 elements.push(Element::HtmlTag(match_obj.as_str().to_string()));
1144 remaining = &remaining[match_obj.end()..];
1145 }
1146 _ => {
1147 elements.push(Element::Text("[".to_string()));
1149 remaining = &remaining[1..];
1150 }
1151 }
1152 } else {
1153 if next_special > 0 && next_special < remaining.len() {
1157 elements.push(Element::Text(remaining[..next_special].to_string()));
1158 remaining = &remaining[next_special..];
1159 }
1160
1161 match special_type {
1163 "code" => {
1164 if let Some(code_end) = remaining[1..].find('`') {
1166 let code = &remaining[1..1 + code_end];
1167 elements.push(Element::Code(code.to_string()));
1168 remaining = &remaining[1 + code_end + 1..];
1169 } else {
1170 elements.push(Element::Text(remaining.to_string()));
1172 break;
1173 }
1174 }
1175 "pulldown_emphasis" => {
1176 if let Some(span) = pulldown_emphasis {
1178 let span_len = span.end - span.start;
1179 if span.is_strikethrough {
1180 elements.push(Element::Strikethrough(span.content.clone()));
1181 } else if span.is_strong {
1182 elements.push(Element::Bold {
1183 content: span.content.clone(),
1184 underscore: span.uses_underscore,
1185 });
1186 } else {
1187 elements.push(Element::Italic {
1188 content: span.content.clone(),
1189 underscore: span.uses_underscore,
1190 });
1191 }
1192 remaining = &remaining[span_len..];
1193 } else {
1194 elements.push(Element::Text(remaining[..1].to_string()));
1196 remaining = &remaining[1..];
1197 }
1198 }
1199 _ => {
1200 elements.push(Element::Text(remaining.to_string()));
1202 break;
1203 }
1204 }
1205 }
1206 }
1207
1208 elements
1209}
1210
1211fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1213 let abbreviations = get_abbreviations(custom_abbreviations);
1214 let mut lines = Vec::new();
1215 let mut current_line = String::new();
1216
1217 for (idx, element) in elements.iter().enumerate() {
1218 let element_str = format!("{element}");
1219
1220 if let Element::Text(text) = element {
1222 let combined = format!("{current_line}{text}");
1224 let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1226
1227 if sentences.len() > 1 {
1228 for (i, sentence) in sentences.iter().enumerate() {
1230 if i == 0 {
1231 let trimmed = sentence.trim();
1234
1235 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1236 current_line = sentence.to_string();
1238 } else {
1239 lines.push(sentence.to_string());
1241 current_line.clear();
1242 }
1243 } else if i == sentences.len() - 1 {
1244 let trimmed = sentence.trim();
1246 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1247
1248 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1249 lines.push(sentence.to_string());
1251 current_line.clear();
1252 } else {
1253 current_line = sentence.to_string();
1255 }
1256 } else {
1257 lines.push(sentence.to_string());
1259 }
1260 }
1261 } else {
1262 let trimmed = combined.trim();
1264
1265 if trimmed.is_empty() {
1269 continue;
1270 }
1271
1272 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1273
1274 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1275 lines.push(trimmed.to_string());
1277 current_line.clear();
1278 } else {
1279 current_line = combined;
1281 }
1282 }
1283 } else if let Element::Italic { content, underscore } = element {
1284 let marker = if *underscore { "_" } else { "*" };
1286 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1287 } else if let Element::Bold { content, underscore } = element {
1288 let marker = if *underscore { "__" } else { "**" };
1290 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1291 } else if let Element::Strikethrough(content) = element {
1292 handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1294 } else {
1295 let is_adjacent = if idx > 0 {
1298 match &elements[idx - 1] {
1299 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1300 _ => true,
1301 }
1302 } else {
1303 false
1304 };
1305
1306 if !is_adjacent
1308 && !current_line.is_empty()
1309 && !current_line.ends_with(' ')
1310 && !current_line.ends_with('(')
1311 && !current_line.ends_with('[')
1312 {
1313 current_line.push(' ');
1314 }
1315 current_line.push_str(&element_str);
1316 }
1317 }
1318
1319 if !current_line.is_empty() {
1321 lines.push(current_line.trim().to_string());
1322 }
1323 lines
1324}
1325
1326fn handle_emphasis_sentence_split(
1328 content: &str,
1329 marker: &str,
1330 abbreviations: &HashSet<String>,
1331 current_line: &mut String,
1332 lines: &mut Vec<String>,
1333) {
1334 let sentences = split_into_sentences_with_set(content, abbreviations);
1336
1337 if sentences.len() <= 1 {
1338 if !current_line.is_empty()
1340 && !current_line.ends_with(' ')
1341 && !current_line.ends_with('(')
1342 && !current_line.ends_with('[')
1343 {
1344 current_line.push(' ');
1345 }
1346 current_line.push_str(marker);
1347 current_line.push_str(content);
1348 current_line.push_str(marker);
1349
1350 let trimmed = content.trim();
1352 let ends_with_punct = ends_with_sentence_punct(trimmed);
1353 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1354 lines.push(current_line.clone());
1355 current_line.clear();
1356 }
1357 } else {
1358 for (i, sentence) in sentences.iter().enumerate() {
1360 let trimmed = sentence.trim();
1361 if trimmed.is_empty() {
1362 continue;
1363 }
1364
1365 if i == 0 {
1366 if !current_line.is_empty()
1368 && !current_line.ends_with(' ')
1369 && !current_line.ends_with('(')
1370 && !current_line.ends_with('[')
1371 {
1372 current_line.push(' ');
1373 }
1374 current_line.push_str(marker);
1375 current_line.push_str(trimmed);
1376 current_line.push_str(marker);
1377
1378 let ends_with_punct = ends_with_sentence_punct(trimmed);
1380 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1381 lines.push(current_line.clone());
1382 current_line.clear();
1383 }
1384 } else if i == sentences.len() - 1 {
1385 let ends_with_punct = ends_with_sentence_punct(trimmed);
1387
1388 let mut line = String::new();
1389 line.push_str(marker);
1390 line.push_str(trimmed);
1391 line.push_str(marker);
1392
1393 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1394 lines.push(line);
1395 } else {
1396 *current_line = line;
1398 }
1399 } else {
1400 let mut line = String::new();
1402 line.push_str(marker);
1403 line.push_str(trimmed);
1404 line.push_str(marker);
1405 lines.push(line);
1406 }
1407 }
1408 }
1409}
1410
1411const BREAK_WORDS: &[&str] = &[
1415 "and",
1416 "or",
1417 "but",
1418 "nor",
1419 "yet",
1420 "so",
1421 "for",
1422 "which",
1423 "that",
1424 "because",
1425 "when",
1426 "if",
1427 "while",
1428 "where",
1429 "although",
1430 "though",
1431 "unless",
1432 "since",
1433 "after",
1434 "before",
1435 "until",
1436 "as",
1437 "once",
1438 "whether",
1439 "however",
1440 "therefore",
1441 "moreover",
1442 "furthermore",
1443 "nevertheless",
1444 "whereas",
1445];
1446
1447fn is_clause_punctuation(c: char) -> bool {
1449 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1451
1452fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1456 let mut spans = Vec::new();
1457 let mut offset = 0;
1458 for element in elements {
1459 let rendered = format!("{element}");
1460 let len = rendered.len();
1461 if !matches!(element, Element::Text(_)) {
1462 spans.push((offset, offset + len));
1463 }
1464 offset += len;
1465 }
1466 spans
1467}
1468
1469fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1471 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1472}
1473
1474const MIN_SPLIT_RATIO: f64 = 0.3;
1477
1478fn split_at_clause_punctuation(
1482 text: &str,
1483 line_length: usize,
1484 element_spans: &[(usize, usize)],
1485) -> Option<(String, String)> {
1486 let chars: Vec<char> = text.chars().collect();
1487 let search_end = chars.len().min(line_length);
1488 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1489
1490 let mut best_pos = None;
1491 for i in (0..search_end).rev() {
1492 if is_clause_punctuation(chars[i]) {
1493 let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1495 if !is_inside_element(byte_pos, element_spans) {
1496 best_pos = Some(i);
1497 break;
1498 }
1499 }
1500 }
1501
1502 let pos = best_pos?;
1503
1504 if pos + 1 < min_first_len {
1506 return None;
1507 }
1508
1509 let first: String = chars[..=pos].iter().collect();
1511 let rest: String = chars[pos + 1..].iter().collect();
1512 let rest = rest.trim_start().to_string();
1513
1514 if rest.is_empty() {
1515 return None;
1516 }
1517
1518 Some((first, rest))
1519}
1520
1521fn split_at_break_word(text: &str, line_length: usize, element_spans: &[(usize, usize)]) -> Option<(String, String)> {
1525 let lower = text.to_lowercase();
1526 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1527 let mut best_split: Option<(usize, usize)> = None; for &word in BREAK_WORDS {
1530 let mut search_start = 0;
1531 while let Some(pos) = lower[search_start..].find(word) {
1532 let abs_pos = search_start + pos;
1533
1534 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1536 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1537
1538 if preceded_by_space && followed_by_space {
1539 let first_part_len = text[..abs_pos].trim_end().chars().count();
1541
1542 if first_part_len >= min_first_len
1543 && first_part_len <= line_length
1544 && !is_inside_element(abs_pos, element_spans)
1545 {
1546 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1548 best_split = Some((abs_pos, word.len()));
1549 }
1550 }
1551 }
1552
1553 search_start = abs_pos + word.len();
1554 }
1555 }
1556
1557 let (byte_start, _word_len) = best_split?;
1558
1559 let first = text[..byte_start].trim_end().to_string();
1560 let rest = text[byte_start..].to_string();
1561
1562 if first.is_empty() || rest.trim().is_empty() {
1563 return None;
1564 }
1565
1566 Some((first, rest))
1567}
1568
1569fn cascade_split_line(text: &str, line_length: usize, abbreviations: &Option<Vec<String>>) -> Vec<String> {
1572 if line_length == 0 || text.chars().count() <= line_length {
1573 return vec![text.to_string()];
1574 }
1575
1576 let elements = parse_markdown_elements(text);
1577 let element_spans = compute_element_spans(&elements);
1578
1579 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans) {
1581 let mut result = vec![first];
1582 result.extend(cascade_split_line(&rest, line_length, abbreviations));
1583 return result;
1584 }
1585
1586 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans) {
1588 let mut result = vec![first];
1589 result.extend(cascade_split_line(&rest, line_length, abbreviations));
1590 return result;
1591 }
1592
1593 let options = ReflowOptions {
1595 line_length,
1596 break_on_sentences: false,
1597 preserve_breaks: false,
1598 sentence_per_line: false,
1599 semantic_line_breaks: false,
1600 abbreviations: abbreviations.clone(),
1601 };
1602 reflow_elements(&elements, &options)
1603}
1604
1605fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1609 let sentence_lines = reflow_elements_sentence_per_line(elements, &options.abbreviations);
1611
1612 if options.line_length == 0 {
1615 return sentence_lines;
1616 }
1617
1618 let mut result = Vec::new();
1619 for line in sentence_lines {
1620 if line.chars().count() <= options.line_length {
1621 result.push(line);
1622 } else {
1623 result.extend(cascade_split_line(&line, options.line_length, &options.abbreviations));
1624 }
1625 }
1626
1627 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1630 let mut merged: Vec<String> = Vec::with_capacity(result.len());
1631 for line in result {
1632 if !merged.is_empty() && line.chars().count() < min_line_len && !line.trim().is_empty() {
1633 let prev_ends_at_sentence = {
1635 let trimmed = merged.last().unwrap().trim_end();
1636 trimmed
1637 .chars()
1638 .rev()
1639 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1640 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1641 };
1642
1643 if !prev_ends_at_sentence {
1644 let prev = merged.last_mut().unwrap();
1645 let combined = format!("{prev} {line}");
1646 if combined.chars().count() <= options.line_length + options.line_length / 10 {
1649 *prev = combined;
1650 continue;
1651 }
1652 }
1653 }
1654 merged.push(line);
1655 }
1656 merged
1657}
1658
1659fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1661 let mut lines = Vec::new();
1662 let mut current_line = String::new();
1663 let mut current_length = 0;
1664
1665 for (idx, element) in elements.iter().enumerate() {
1666 let element_str = format!("{element}");
1667 let element_len = element.len();
1668
1669 let is_adjacent_to_prev = if idx > 0 {
1675 match (&elements[idx - 1], element) {
1676 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1677 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1678 _ => true,
1679 }
1680 } else {
1681 false
1682 };
1683
1684 if let Element::Text(text) = element {
1686 let has_leading_space = text.starts_with(char::is_whitespace);
1688 let words: Vec<&str> = text.split_whitespace().collect();
1690
1691 for (i, word) in words.iter().enumerate() {
1692 let word_len = word.chars().count();
1693 let is_trailing_punct = word
1695 .chars()
1696 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1697
1698 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1701
1702 if is_first_adjacent {
1703 if current_length + word_len > options.line_length && current_length > 0 {
1705 if let Some(last_space) = current_line.rfind(' ') {
1707 let before = current_line[..last_space].trim_end().to_string();
1708 let after = current_line[last_space + 1..].to_string();
1709 lines.push(before);
1710 current_line = format!("{after}{word}");
1711 current_length = current_line.chars().count();
1712 } else {
1713 current_line.push_str(word);
1714 current_length += word_len;
1715 }
1716 } else {
1717 current_line.push_str(word);
1718 current_length += word_len;
1719 }
1720 } else if current_length > 0
1721 && current_length + 1 + word_len > options.line_length
1722 && !is_trailing_punct
1723 {
1724 lines.push(current_line.trim().to_string());
1726 current_line = word.to_string();
1727 current_length = word_len;
1728 } else {
1729 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1733 current_line.push(' ');
1734 current_length += 1;
1735 }
1736 current_line.push_str(word);
1737 current_length += word_len;
1738 }
1739 }
1740 } else {
1741 if is_adjacent_to_prev {
1745 if current_length + element_len > options.line_length {
1747 if let Some(last_space) = current_line.rfind(' ') {
1749 let before = current_line[..last_space].trim_end().to_string();
1750 let after = current_line[last_space + 1..].to_string();
1751 lines.push(before);
1752 current_line = format!("{after}{element_str}");
1753 current_length = current_line.chars().count();
1754 } else {
1755 current_line.push_str(&element_str);
1757 current_length += element_len;
1758 }
1759 } else {
1760 current_line.push_str(&element_str);
1761 current_length += element_len;
1762 }
1763 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
1764 lines.push(current_line.trim().to_string());
1766 current_line = element_str;
1767 current_length = element_len;
1768 } else {
1769 let ends_with_opener =
1771 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1772 if current_length > 0 && !ends_with_opener {
1773 current_line.push(' ');
1774 current_length += 1;
1775 }
1776 current_line.push_str(&element_str);
1777 current_length += element_len;
1778 }
1779 }
1780 }
1781
1782 if !current_line.is_empty() {
1784 lines.push(current_line.trim_end().to_string());
1785 }
1786
1787 lines
1788}
1789
1790pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1792 let lines: Vec<&str> = content.lines().collect();
1793 let mut result = Vec::new();
1794 let mut i = 0;
1795
1796 while i < lines.len() {
1797 let line = lines[i];
1798 let trimmed = line.trim();
1799
1800 if trimmed.is_empty() {
1802 result.push(String::new());
1803 i += 1;
1804 continue;
1805 }
1806
1807 if trimmed.starts_with('#') {
1809 result.push(line.to_string());
1810 i += 1;
1811 continue;
1812 }
1813
1814 if trimmed.starts_with(":::") {
1816 result.push(line.to_string());
1817 i += 1;
1818 continue;
1819 }
1820
1821 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1823 result.push(line.to_string());
1824 i += 1;
1825 while i < lines.len() {
1827 result.push(lines[i].to_string());
1828 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
1829 i += 1;
1830 break;
1831 }
1832 i += 1;
1833 }
1834 continue;
1835 }
1836
1837 if ElementCache::calculate_indentation_width_default(line) >= 4 {
1839 result.push(line.to_string());
1841 i += 1;
1842 while i < lines.len() {
1843 let next_line = lines[i];
1844 if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
1846 result.push(next_line.to_string());
1847 i += 1;
1848 } else {
1849 break;
1850 }
1851 }
1852 continue;
1853 }
1854
1855 if trimmed.starts_with('>') {
1857 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
1860 let quote_prefix = line[0..gt_pos + 1].to_string();
1861 let quote_content = &line[quote_prefix.len()..].trim_start();
1862
1863 let reflowed = reflow_line(quote_content, options);
1864 for reflowed_line in reflowed.iter() {
1865 result.push(format!("{quote_prefix} {reflowed_line}"));
1866 }
1867 i += 1;
1868 continue;
1869 }
1870
1871 if is_horizontal_rule(trimmed) {
1873 result.push(line.to_string());
1874 i += 1;
1875 continue;
1876 }
1877
1878 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
1880 let indent = line.len() - line.trim_start().len();
1882 let indent_str = " ".repeat(indent);
1883
1884 let mut marker_end = indent;
1887 let mut content_start = indent;
1888
1889 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
1890 if let Some(period_pos) = line[indent..].find('.') {
1892 marker_end = indent + period_pos + 1; content_start = marker_end;
1894 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1898 content_start += 1;
1899 }
1900 }
1901 } else {
1902 marker_end = indent + 1; content_start = marker_end;
1905 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1909 content_start += 1;
1910 }
1911 }
1912
1913 let marker = &line[indent..marker_end];
1914
1915 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
1918 i += 1;
1919
1920 while i < lines.len() {
1922 let next_line = lines[i];
1923 let next_trimmed = next_line.trim();
1924
1925 if is_block_boundary(next_trimmed) {
1927 break;
1928 }
1929
1930 let next_indent = next_line.len() - next_line.trim_start().len();
1932 if next_indent >= content_start {
1933 let trimmed_start = next_line.trim_start();
1936 list_content.push(trim_preserving_hard_break(trimmed_start));
1937 i += 1;
1938 } else {
1939 break;
1941 }
1942 }
1943
1944 let combined_content = if options.preserve_breaks {
1947 list_content[0].clone()
1948 } else {
1949 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1951 if has_hard_breaks {
1952 list_content.join("\n")
1954 } else {
1955 list_content.join(" ")
1957 }
1958 };
1959
1960 let trimmed_marker = marker;
1962 let continuation_spaces = content_start;
1963
1964 let prefix_length = indent + trimmed_marker.len() + 1;
1966
1967 let adjusted_options = ReflowOptions {
1969 line_length: options.line_length.saturating_sub(prefix_length),
1970 ..options.clone()
1971 };
1972
1973 let reflowed = reflow_line(&combined_content, &adjusted_options);
1974 for (j, reflowed_line) in reflowed.iter().enumerate() {
1975 if j == 0 {
1976 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1977 } else {
1978 let continuation_indent = " ".repeat(continuation_spaces);
1980 result.push(format!("{continuation_indent}{reflowed_line}"));
1981 }
1982 }
1983 continue;
1984 }
1985
1986 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
1988 result.push(line.to_string());
1989 i += 1;
1990 continue;
1991 }
1992
1993 if trimmed.starts_with('[') && line.contains("]:") {
1995 result.push(line.to_string());
1996 i += 1;
1997 continue;
1998 }
1999
2000 if is_definition_list_item(trimmed) {
2002 result.push(line.to_string());
2003 i += 1;
2004 continue;
2005 }
2006
2007 let mut is_single_line_paragraph = true;
2009 if i + 1 < lines.len() {
2010 let next_trimmed = lines[i + 1].trim();
2011 if !is_block_boundary(next_trimmed) {
2013 is_single_line_paragraph = false;
2014 }
2015 }
2016
2017 if is_single_line_paragraph && line.chars().count() <= options.line_length {
2019 result.push(line.to_string());
2020 i += 1;
2021 continue;
2022 }
2023
2024 let mut paragraph_parts = Vec::new();
2026 let mut current_part = vec![line];
2027 i += 1;
2028
2029 if options.preserve_breaks {
2031 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2033 Some("\\")
2034 } else if line.ends_with(" ") {
2035 Some(" ")
2036 } else {
2037 None
2038 };
2039 let reflowed = reflow_line(line, options);
2040
2041 if let Some(break_marker) = hard_break_type {
2043 if !reflowed.is_empty() {
2044 let mut reflowed_with_break = reflowed;
2045 let last_idx = reflowed_with_break.len() - 1;
2046 if !has_hard_break(&reflowed_with_break[last_idx]) {
2047 reflowed_with_break[last_idx].push_str(break_marker);
2048 }
2049 result.extend(reflowed_with_break);
2050 }
2051 } else {
2052 result.extend(reflowed);
2053 }
2054 } else {
2055 while i < lines.len() {
2057 let prev_line = if !current_part.is_empty() {
2058 current_part.last().unwrap()
2059 } else {
2060 ""
2061 };
2062 let next_line = lines[i];
2063 let next_trimmed = next_line.trim();
2064
2065 if is_block_boundary(next_trimmed) {
2067 break;
2068 }
2069
2070 let prev_trimmed = prev_line.trim();
2073 let abbreviations = get_abbreviations(&options.abbreviations);
2074 let ends_with_sentence = (prev_trimmed.ends_with('.')
2075 || prev_trimmed.ends_with('!')
2076 || prev_trimmed.ends_with('?')
2077 || prev_trimmed.ends_with(".*")
2078 || prev_trimmed.ends_with("!*")
2079 || prev_trimmed.ends_with("?*")
2080 || prev_trimmed.ends_with("._")
2081 || prev_trimmed.ends_with("!_")
2082 || prev_trimmed.ends_with("?_")
2083 || prev_trimmed.ends_with(".\"")
2085 || prev_trimmed.ends_with("!\"")
2086 || prev_trimmed.ends_with("?\"")
2087 || prev_trimmed.ends_with(".'")
2088 || prev_trimmed.ends_with("!'")
2089 || prev_trimmed.ends_with("?'")
2090 || prev_trimmed.ends_with(".\u{201D}")
2091 || prev_trimmed.ends_with("!\u{201D}")
2092 || prev_trimmed.ends_with("?\u{201D}")
2093 || prev_trimmed.ends_with(".\u{2019}")
2094 || prev_trimmed.ends_with("!\u{2019}")
2095 || prev_trimmed.ends_with("?\u{2019}"))
2096 && !text_ends_with_abbreviation(
2097 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2098 &abbreviations,
2099 );
2100
2101 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2102 paragraph_parts.push(current_part.join(" "));
2104 current_part = vec![next_line];
2105 } else {
2106 current_part.push(next_line);
2107 }
2108 i += 1;
2109 }
2110
2111 if !current_part.is_empty() {
2113 if current_part.len() == 1 {
2114 paragraph_parts.push(current_part[0].to_string());
2116 } else {
2117 paragraph_parts.push(current_part.join(" "));
2118 }
2119 }
2120
2121 for (j, part) in paragraph_parts.iter().enumerate() {
2123 let reflowed = reflow_line(part, options);
2124 result.extend(reflowed);
2125
2126 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2130 let last_idx = result.len() - 1;
2131 if !has_hard_break(&result[last_idx]) {
2132 result[last_idx].push_str(" ");
2133 }
2134 }
2135 }
2136 }
2137 }
2138
2139 let result_text = result.join("\n");
2141 if content.ends_with('\n') && !result_text.ends_with('\n') {
2142 format!("{result_text}\n")
2143 } else {
2144 result_text
2145 }
2146}
2147
2148#[derive(Debug, Clone)]
2150pub struct ParagraphReflow {
2151 pub start_byte: usize,
2153 pub end_byte: usize,
2155 pub reflowed_text: String,
2157}
2158
2159pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2177 if line_number == 0 {
2178 return None;
2179 }
2180
2181 let lines: Vec<&str> = content.lines().collect();
2182
2183 if line_number > lines.len() {
2185 return None;
2186 }
2187
2188 let target_idx = line_number - 1; let target_line = lines[target_idx];
2190 let trimmed = target_line.trim();
2191
2192 if is_paragraph_boundary(trimmed, target_line) {
2194 return None;
2195 }
2196
2197 let mut para_start = target_idx;
2199 while para_start > 0 {
2200 let prev_idx = para_start - 1;
2201 let prev_line = lines[prev_idx];
2202 let prev_trimmed = prev_line.trim();
2203
2204 if is_paragraph_boundary(prev_trimmed, prev_line) {
2206 break;
2207 }
2208
2209 para_start = prev_idx;
2210 }
2211
2212 let mut para_end = target_idx;
2214 while para_end + 1 < lines.len() {
2215 let next_idx = para_end + 1;
2216 let next_line = lines[next_idx];
2217 let next_trimmed = next_line.trim();
2218
2219 if is_paragraph_boundary(next_trimmed, next_line) {
2221 break;
2222 }
2223
2224 para_end = next_idx;
2225 }
2226
2227 let paragraph_lines = &lines[para_start..=para_end];
2229
2230 let mut start_byte = 0;
2232 for line in lines.iter().take(para_start) {
2233 start_byte += line.len() + 1; }
2235
2236 let mut end_byte = start_byte;
2237 for line in paragraph_lines.iter() {
2238 end_byte += line.len() + 1; }
2240
2241 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2244
2245 if !includes_trailing_newline {
2247 end_byte -= 1;
2248 }
2249
2250 let paragraph_text = paragraph_lines.join("\n");
2252
2253 let options = ReflowOptions {
2255 line_length,
2256 break_on_sentences: true,
2257 preserve_breaks: false,
2258 sentence_per_line: false,
2259 semantic_line_breaks: false,
2260 abbreviations: None,
2261 };
2262
2263 let reflowed = reflow_markdown(¶graph_text, &options);
2265
2266 let reflowed_text = if includes_trailing_newline {
2270 if reflowed.ends_with('\n') {
2272 reflowed
2273 } else {
2274 format!("{reflowed}\n")
2275 }
2276 } else {
2277 if reflowed.ends_with('\n') {
2279 reflowed.trim_end_matches('\n').to_string()
2280 } else {
2281 reflowed
2282 }
2283 };
2284
2285 Some(ParagraphReflow {
2286 start_byte,
2287 end_byte,
2288 reflowed_text,
2289 })
2290}
2291
2292#[cfg(test)]
2293mod tests {
2294 use super::*;
2295
2296 #[test]
2301 fn test_helper_function_text_ends_with_abbreviation() {
2302 let abbreviations = get_abbreviations(&None);
2304
2305 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2307 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2308 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2309 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2310 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2311 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2312 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2313 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2314
2315 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2317 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2318 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2319 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2320 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2321 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
2327
2328 #[test]
2329 fn test_is_unordered_list_marker() {
2330 assert!(is_unordered_list_marker("- item"));
2332 assert!(is_unordered_list_marker("* item"));
2333 assert!(is_unordered_list_marker("+ item"));
2334 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
2336 assert!(is_unordered_list_marker("+"));
2337
2338 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
2349
2350 #[test]
2351 fn test_is_block_boundary() {
2352 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
2374 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
2377 }
2378
2379 #[test]
2380 fn test_definition_list_boundary_in_single_line_paragraph() {
2381 let options = ReflowOptions {
2384 line_length: 80,
2385 ..Default::default()
2386 };
2387 let input = "Term\n: Definition of the term";
2388 let result = reflow_markdown(input, &options);
2389 assert!(
2391 result.contains(": Definition"),
2392 "Definition list item should not be merged into previous line. Got: {result:?}"
2393 );
2394 let lines: Vec<&str> = result.lines().collect();
2395 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
2396 assert_eq!(lines[0], "Term");
2397 assert_eq!(lines[1], ": Definition of the term");
2398 }
2399
2400 #[test]
2401 fn test_is_paragraph_boundary() {
2402 assert!(is_paragraph_boundary("# Heading", "# Heading"));
2404 assert!(is_paragraph_boundary("- item", "- item"));
2405 assert!(is_paragraph_boundary(":::", ":::"));
2406 assert!(is_paragraph_boundary(": definition", ": definition"));
2407
2408 assert!(is_paragraph_boundary("code", " code"));
2410 assert!(is_paragraph_boundary("code", "\tcode"));
2411
2412 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
2414 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
2418 assert!(!is_paragraph_boundary("text", " text")); }
2420
2421 #[test]
2422 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
2423 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
2426 let result = reflow_paragraph_at_line(content, 3, 80);
2428 assert!(result.is_none(), "Div marker line should not be reflowed");
2429 }
2430}