1use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
13 LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
14 REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18 text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27 Chars,
29 #[default]
31 Visual,
32 Bytes,
34}
35
36fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38 match mode {
39 ReflowLengthMode::Chars => s.chars().count(),
40 ReflowLengthMode::Visual => s.width(),
41 ReflowLengthMode::Bytes => s.len(),
42 }
43}
44
45#[derive(Clone)]
47pub struct ReflowOptions {
48 pub line_length: usize,
50 pub break_on_sentences: bool,
52 pub preserve_breaks: bool,
54 pub sentence_per_line: bool,
56 pub semantic_line_breaks: bool,
58 pub abbreviations: Option<Vec<String>>,
62 pub length_mode: ReflowLengthMode,
64}
65
66impl Default for ReflowOptions {
67 fn default() -> Self {
68 Self {
69 line_length: 80,
70 break_on_sentences: true,
71 preserve_breaks: false,
72 sentence_per_line: false,
73 semantic_line_breaks: false,
74 abbreviations: None,
75 length_mode: ReflowLengthMode::default(),
76 }
77 }
78}
79
80fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
84 let chars: Vec<char> = text.chars().collect();
85
86 if pos + 1 >= chars.len() {
87 return false;
88 }
89
90 let c = chars[pos];
91 let next_char = chars[pos + 1];
92
93 if is_cjk_sentence_ending(c) {
96 let mut after_punct_pos = pos + 1;
98 while after_punct_pos < chars.len()
99 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
100 {
101 after_punct_pos += 1;
102 }
103
104 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
106 after_punct_pos += 1;
107 }
108
109 if after_punct_pos >= chars.len() {
111 return false;
112 }
113
114 while after_punct_pos < chars.len()
116 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
117 {
118 after_punct_pos += 1;
119 }
120
121 if after_punct_pos >= chars.len() {
122 return false;
123 }
124
125 return true;
128 }
129
130 if c != '.' && c != '!' && c != '?' {
132 return false;
133 }
134
135 let (_space_pos, after_space_pos) = if next_char == ' ' {
137 (pos + 1, pos + 2)
139 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
140 if chars[pos + 2] == ' ' {
142 (pos + 2, pos + 3)
144 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
145 (pos + 3, pos + 4)
147 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
148 && pos + 4 < chars.len()
149 && chars[pos + 3] == chars[pos + 2]
150 && chars[pos + 4] == ' '
151 {
152 (pos + 4, pos + 5)
154 } else {
155 return false;
156 }
157 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
158 (pos + 2, pos + 3)
160 } else if (next_char == '*' || next_char == '_')
161 && pos + 3 < chars.len()
162 && chars[pos + 2] == next_char
163 && chars[pos + 3] == ' '
164 {
165 (pos + 3, pos + 4)
167 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
168 (pos + 3, pos + 4)
170 } else {
171 return false;
172 };
173
174 let mut next_char_pos = after_space_pos;
176 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
177 next_char_pos += 1;
178 }
179
180 if next_char_pos >= chars.len() {
182 return false;
183 }
184
185 let mut first_letter_pos = next_char_pos;
187 while first_letter_pos < chars.len()
188 && (chars[first_letter_pos] == '*'
189 || chars[first_letter_pos] == '_'
190 || chars[first_letter_pos] == '~'
191 || is_opening_quote(chars[first_letter_pos]))
192 {
193 first_letter_pos += 1;
194 }
195
196 if first_letter_pos >= chars.len() {
198 return false;
199 }
200
201 let first_char = chars[first_letter_pos];
203 if !first_char.is_uppercase() && !is_cjk_char(first_char) {
204 return false;
205 }
206
207 if pos > 0 && c == '.' {
209 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
211 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
212 return false;
213 }
214
215 if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
218 return false;
219 }
220 }
221 true
222}
223
224pub fn split_into_sentences(text: &str) -> Vec<String> {
226 split_into_sentences_custom(text, &None)
227}
228
229pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
231 let abbreviations = get_abbreviations(custom_abbreviations);
232 split_into_sentences_with_set(text, &abbreviations)
233}
234
235fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
238 let mut sentences = Vec::new();
239 let mut current_sentence = String::new();
240 let mut chars = text.chars().peekable();
241 let mut pos = 0;
242
243 while let Some(c) = chars.next() {
244 current_sentence.push(c);
245
246 if is_sentence_boundary(text, pos, abbreviations) {
247 while let Some(&next) = chars.peek() {
249 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
250 current_sentence.push(chars.next().unwrap());
251 pos += 1;
252 } else {
253 break;
254 }
255 }
256
257 if chars.peek() == Some(&' ') {
259 chars.next();
260 pos += 1;
261 }
262
263 sentences.push(current_sentence.trim().to_string());
264 current_sentence.clear();
265 }
266
267 pos += 1;
268 }
269
270 if !current_sentence.trim().is_empty() {
272 sentences.push(current_sentence.trim().to_string());
273 }
274 sentences
275}
276
277fn is_horizontal_rule(line: &str) -> bool {
279 if line.len() < 3 {
280 return false;
281 }
282
283 let chars: Vec<char> = line.chars().collect();
285 if chars.is_empty() {
286 return false;
287 }
288
289 let first_char = chars[0];
290 if first_char != '-' && first_char != '_' && first_char != '*' {
291 return false;
292 }
293
294 for c in &chars {
296 if *c != first_char && *c != ' ' {
297 return false;
298 }
299 }
300
301 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
303 non_space_count >= 3
304}
305
306fn is_numbered_list_item(line: &str) -> bool {
308 let mut chars = line.chars();
309
310 if !chars.next().is_some_and(|c| c.is_numeric()) {
312 return false;
313 }
314
315 while let Some(c) = chars.next() {
317 if c == '.' {
318 return chars.next() == Some(' ');
321 }
322 if !c.is_numeric() {
323 return false;
324 }
325 }
326
327 false
328}
329
330fn is_unordered_list_marker(s: &str) -> bool {
332 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
333 && !is_horizontal_rule(s)
334 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
335}
336
337fn is_block_boundary_core(trimmed: &str) -> bool {
340 trimmed.is_empty()
341 || trimmed.starts_with('#')
342 || trimmed.starts_with("```")
343 || trimmed.starts_with("~~~")
344 || trimmed.starts_with('>')
345 || (trimmed.starts_with('[') && trimmed.contains("]:"))
346 || is_horizontal_rule(trimmed)
347 || is_unordered_list_marker(trimmed)
348 || is_numbered_list_item(trimmed)
349 || is_definition_list_item(trimmed)
350 || trimmed.starts_with(":::")
351}
352
353fn is_block_boundary(trimmed: &str) -> bool {
356 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
357}
358
359fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
363 is_block_boundary_core(trimmed)
364 || ElementCache::calculate_indentation_width_default(line) >= 4
365 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
366}
367
368fn has_hard_break(line: &str) -> bool {
374 let line = line.strip_suffix('\r').unwrap_or(line);
375 line.ends_with(" ") || line.ends_with('\\')
376}
377
378fn ends_with_sentence_punct(text: &str) -> bool {
380 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
381}
382
383fn trim_preserving_hard_break(s: &str) -> String {
389 let s = s.strip_suffix('\r').unwrap_or(s);
391
392 if s.ends_with('\\') {
394 return s.to_string();
396 }
397
398 if s.ends_with(" ") {
400 let content_end = s.trim_end().len();
402 if content_end == 0 {
403 return String::new();
405 }
406 format!("{} ", &s[..content_end])
408 } else {
409 s.trim_end().to_string()
411 }
412}
413
414pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
415 if options.sentence_per_line {
417 let elements = parse_markdown_elements(line);
418 return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
419 }
420
421 if options.semantic_line_breaks {
423 let elements = parse_markdown_elements(line);
424 return reflow_elements_semantic(&elements, options);
425 }
426
427 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
430 return vec![line.to_string()];
431 }
432
433 let elements = parse_markdown_elements(line);
435
436 reflow_elements(&elements, options)
438}
439
440#[derive(Debug, Clone)]
442enum LinkedImageSource {
443 Inline(String),
445 Reference(String),
447}
448
449#[derive(Debug, Clone)]
451enum LinkedImageTarget {
452 Inline(String),
454 Reference(String),
456}
457
458#[derive(Debug, Clone)]
460enum Element {
461 Text(String),
463 Link { text: String, url: String },
465 ReferenceLink { text: String, reference: String },
467 EmptyReferenceLink { text: String },
469 ShortcutReference { reference: String },
471 InlineImage { alt: String, url: String },
473 ReferenceImage { alt: String, reference: String },
475 EmptyReferenceImage { alt: String },
477 LinkedImage {
483 alt: String,
484 img_source: LinkedImageSource,
485 link_target: LinkedImageTarget,
486 },
487 FootnoteReference { note: String },
489 Strikethrough(String),
491 WikiLink(String),
493 InlineMath(String),
495 DisplayMath(String),
497 EmojiShortcode(String),
499 Autolink(String),
501 HtmlTag(String),
503 HtmlEntity(String),
505 HugoShortcode(String),
507 Code(String),
509 Bold {
511 content: String,
512 underscore: bool,
514 },
515 Italic {
517 content: String,
518 underscore: bool,
520 },
521}
522
523impl std::fmt::Display for Element {
524 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
525 match self {
526 Element::Text(s) => write!(f, "{s}"),
527 Element::Link { text, url } => write!(f, "[{text}]({url})"),
528 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
529 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
530 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
531 Element::InlineImage { alt, url } => write!(f, ""),
532 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
533 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
534 Element::LinkedImage {
535 alt,
536 img_source,
537 link_target,
538 } => {
539 let img_part = match img_source {
541 LinkedImageSource::Inline(url) => format!(""),
542 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
543 };
544 match link_target {
546 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
547 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
548 }
549 }
550 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
551 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
552 Element::WikiLink(s) => write!(f, "[[{s}]]"),
553 Element::InlineMath(s) => write!(f, "${s}$"),
554 Element::DisplayMath(s) => write!(f, "$${s}$$"),
555 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
556 Element::Autolink(s) => write!(f, "{s}"),
557 Element::HtmlTag(s) => write!(f, "{s}"),
558 Element::HtmlEntity(s) => write!(f, "{s}"),
559 Element::HugoShortcode(s) => write!(f, "{s}"),
560 Element::Code(s) => write!(f, "`{s}`"),
561 Element::Bold { content, underscore } => {
562 if *underscore {
563 write!(f, "__{content}__")
564 } else {
565 write!(f, "**{content}**")
566 }
567 }
568 Element::Italic { content, underscore } => {
569 if *underscore {
570 write!(f, "_{content}_")
571 } else {
572 write!(f, "*{content}*")
573 }
574 }
575 }
576 }
577}
578
579impl Element {
580 fn display_width(&self, mode: ReflowLengthMode) -> usize {
584 let formatted = format!("{self}");
585 display_len(&formatted, mode)
586 }
587}
588
589#[derive(Debug, Clone)]
591struct EmphasisSpan {
592 start: usize,
594 end: usize,
596 content: String,
598 is_strong: bool,
600 is_strikethrough: bool,
602 uses_underscore: bool,
604}
605
606fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
616 let mut spans = Vec::new();
617 let mut options = Options::empty();
618 options.insert(Options::ENABLE_STRIKETHROUGH);
619
620 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
623 let mut strikethrough_stack: Vec<usize> = Vec::new();
624
625 let parser = Parser::new_ext(text, options).into_offset_iter();
626
627 for (event, range) in parser {
628 match event {
629 Event::Start(Tag::Emphasis) => {
630 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
632 emphasis_stack.push((range.start, uses_underscore));
633 }
634 Event::End(TagEnd::Emphasis) => {
635 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
636 let content_start = start_byte + 1;
638 let content_end = range.end - 1;
639 if content_end > content_start
640 && let Some(content) = text.get(content_start..content_end)
641 {
642 spans.push(EmphasisSpan {
643 start: start_byte,
644 end: range.end,
645 content: content.to_string(),
646 is_strong: false,
647 is_strikethrough: false,
648 uses_underscore,
649 });
650 }
651 }
652 }
653 Event::Start(Tag::Strong) => {
654 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
656 strong_stack.push((range.start, uses_underscore));
657 }
658 Event::End(TagEnd::Strong) => {
659 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
660 let content_start = start_byte + 2;
662 let content_end = range.end - 2;
663 if content_end > content_start
664 && let Some(content) = text.get(content_start..content_end)
665 {
666 spans.push(EmphasisSpan {
667 start: start_byte,
668 end: range.end,
669 content: content.to_string(),
670 is_strong: true,
671 is_strikethrough: false,
672 uses_underscore,
673 });
674 }
675 }
676 }
677 Event::Start(Tag::Strikethrough) => {
678 strikethrough_stack.push(range.start);
679 }
680 Event::End(TagEnd::Strikethrough) => {
681 if let Some(start_byte) = strikethrough_stack.pop() {
682 let content_start = start_byte + 2;
684 let content_end = range.end - 2;
685 if content_end > content_start
686 && let Some(content) = text.get(content_start..content_end)
687 {
688 spans.push(EmphasisSpan {
689 start: start_byte,
690 end: range.end,
691 content: content.to_string(),
692 is_strong: false,
693 is_strikethrough: true,
694 uses_underscore: false,
695 });
696 }
697 }
698 }
699 _ => {}
700 }
701 }
702
703 spans.sort_by_key(|s| s.start);
705 spans
706}
707
708fn parse_markdown_elements(text: &str) -> Vec<Element> {
719 let mut elements = Vec::new();
720 let mut remaining = text;
721
722 let emphasis_spans = extract_emphasis_spans(text);
724
725 while !remaining.is_empty() {
726 let current_offset = text.len() - remaining.len();
728 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
730
731 if remaining.contains("[!") {
735 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
737 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
738 {
739 earliest_match = Some((m.start(), "linked_image_ii", m));
740 }
741
742 if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
744 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
745 {
746 earliest_match = Some((m.start(), "linked_image_ri", m));
747 }
748
749 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
751 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
752 {
753 earliest_match = Some((m.start(), "linked_image_ir", m));
754 }
755
756 if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
758 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
759 {
760 earliest_match = Some((m.start(), "linked_image_rr", m));
761 }
762 }
763
764 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
767 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
768 {
769 earliest_match = Some((m.start(), "inline_image", m));
770 }
771
772 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
774 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
775 {
776 earliest_match = Some((m.start(), "ref_image", m));
777 }
778
779 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
781 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
782 {
783 earliest_match = Some((m.start(), "footnote_ref", m));
784 }
785
786 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
788 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
789 {
790 earliest_match = Some((m.start(), "inline_link", m));
791 }
792
793 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
795 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
796 {
797 earliest_match = Some((m.start(), "ref_link", m));
798 }
799
800 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
803 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
804 {
805 earliest_match = Some((m.start(), "shortcut_ref", m));
806 }
807
808 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
810 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
811 {
812 earliest_match = Some((m.start(), "wiki_link", m));
813 }
814
815 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
817 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
818 {
819 earliest_match = Some((m.start(), "display_math", m));
820 }
821
822 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
824 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
825 {
826 earliest_match = Some((m.start(), "inline_math", m));
827 }
828
829 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
833 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
834 {
835 earliest_match = Some((m.start(), "emoji", m));
836 }
837
838 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
840 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
841 {
842 earliest_match = Some((m.start(), "html_entity", m));
843 }
844
845 if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
848 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
849 {
850 earliest_match = Some((m.start(), "hugo_shortcode", m));
851 }
852
853 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
856 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
857 {
858 let matched_text = &remaining[m.start()..m.end()];
860 let is_url_autolink = matched_text.starts_with("<http://")
861 || matched_text.starts_with("<https://")
862 || matched_text.starts_with("<mailto:")
863 || matched_text.starts_with("<ftp://")
864 || matched_text.starts_with("<ftps://");
865
866 let is_email_autolink = {
869 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
870 EMAIL_PATTERN.is_match(content)
871 };
872
873 if is_url_autolink || is_email_autolink {
874 earliest_match = Some((m.start(), "autolink", m));
875 } else {
876 earliest_match = Some((m.start(), "html_tag", m));
877 }
878 }
879
880 let mut next_special = remaining.len();
882 let mut special_type = "";
883 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
884
885 if let Some(pos) = remaining.find('`')
887 && pos < next_special
888 {
889 next_special = pos;
890 special_type = "code";
891 }
892
893 for span in &emphasis_spans {
896 if span.start >= current_offset && span.start < current_offset + remaining.len() {
897 let pos_in_remaining = span.start - current_offset;
898 if pos_in_remaining < next_special {
899 next_special = pos_in_remaining;
900 special_type = "pulldown_emphasis";
901 pulldown_emphasis = Some(span);
902 }
903 break; }
905 }
906
907 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
909 pos < next_special
910 } else {
911 false
912 };
913
914 if should_process_markdown_link {
915 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
916
917 if pos > 0 {
919 elements.push(Element::Text(remaining[..pos].to_string()));
920 }
921
922 match pattern_type {
924 "linked_image_ii" => {
926 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
927 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
928 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
929 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
930 elements.push(Element::LinkedImage {
931 alt: alt.to_string(),
932 img_source: LinkedImageSource::Inline(img_url.to_string()),
933 link_target: LinkedImageTarget::Inline(link_url.to_string()),
934 });
935 remaining = &remaining[match_obj.end()..];
936 } else {
937 elements.push(Element::Text("[".to_string()));
938 remaining = &remaining[1..];
939 }
940 }
941 "linked_image_ri" => {
943 if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
944 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
945 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
946 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
947 elements.push(Element::LinkedImage {
948 alt: alt.to_string(),
949 img_source: LinkedImageSource::Reference(img_ref.to_string()),
950 link_target: LinkedImageTarget::Inline(link_url.to_string()),
951 });
952 remaining = &remaining[match_obj.end()..];
953 } else {
954 elements.push(Element::Text("[".to_string()));
955 remaining = &remaining[1..];
956 }
957 }
958 "linked_image_ir" => {
960 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
961 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
962 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
963 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
964 elements.push(Element::LinkedImage {
965 alt: alt.to_string(),
966 img_source: LinkedImageSource::Inline(img_url.to_string()),
967 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
968 });
969 remaining = &remaining[match_obj.end()..];
970 } else {
971 elements.push(Element::Text("[".to_string()));
972 remaining = &remaining[1..];
973 }
974 }
975 "linked_image_rr" => {
977 if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
978 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
979 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
980 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
981 elements.push(Element::LinkedImage {
982 alt: alt.to_string(),
983 img_source: LinkedImageSource::Reference(img_ref.to_string()),
984 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
985 });
986 remaining = &remaining[match_obj.end()..];
987 } else {
988 elements.push(Element::Text("[".to_string()));
989 remaining = &remaining[1..];
990 }
991 }
992 "inline_image" => {
993 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
994 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
995 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
996 elements.push(Element::InlineImage {
997 alt: alt.to_string(),
998 url: url.to_string(),
999 });
1000 remaining = &remaining[match_obj.end()..];
1001 } else {
1002 elements.push(Element::Text("!".to_string()));
1003 remaining = &remaining[1..];
1004 }
1005 }
1006 "ref_image" => {
1007 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
1008 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1009 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1010
1011 if reference.is_empty() {
1012 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1013 } else {
1014 elements.push(Element::ReferenceImage {
1015 alt: alt.to_string(),
1016 reference: reference.to_string(),
1017 });
1018 }
1019 remaining = &remaining[match_obj.end()..];
1020 } else {
1021 elements.push(Element::Text("!".to_string()));
1022 remaining = &remaining[1..];
1023 }
1024 }
1025 "footnote_ref" => {
1026 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
1027 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1028 elements.push(Element::FootnoteReference { note: note.to_string() });
1029 remaining = &remaining[match_obj.end()..];
1030 } else {
1031 elements.push(Element::Text("[".to_string()));
1032 remaining = &remaining[1..];
1033 }
1034 }
1035 "inline_link" => {
1036 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1037 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1038 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1039 elements.push(Element::Link {
1040 text: text.to_string(),
1041 url: url.to_string(),
1042 });
1043 remaining = &remaining[match_obj.end()..];
1044 } else {
1045 elements.push(Element::Text("[".to_string()));
1047 remaining = &remaining[1..];
1048 }
1049 }
1050 "ref_link" => {
1051 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1052 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1053 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1054
1055 if reference.is_empty() {
1056 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1058 } else {
1059 elements.push(Element::ReferenceLink {
1061 text: text.to_string(),
1062 reference: reference.to_string(),
1063 });
1064 }
1065 remaining = &remaining[match_obj.end()..];
1066 } else {
1067 elements.push(Element::Text("[".to_string()));
1069 remaining = &remaining[1..];
1070 }
1071 }
1072 "shortcut_ref" => {
1073 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1074 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1075 elements.push(Element::ShortcutReference {
1076 reference: reference.to_string(),
1077 });
1078 remaining = &remaining[match_obj.end()..];
1079 } else {
1080 elements.push(Element::Text("[".to_string()));
1082 remaining = &remaining[1..];
1083 }
1084 }
1085 "wiki_link" => {
1086 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1087 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1088 elements.push(Element::WikiLink(content.to_string()));
1089 remaining = &remaining[match_obj.end()..];
1090 } else {
1091 elements.push(Element::Text("[[".to_string()));
1092 remaining = &remaining[2..];
1093 }
1094 }
1095 "display_math" => {
1096 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1097 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1098 elements.push(Element::DisplayMath(math.to_string()));
1099 remaining = &remaining[match_obj.end()..];
1100 } else {
1101 elements.push(Element::Text("$$".to_string()));
1102 remaining = &remaining[2..];
1103 }
1104 }
1105 "inline_math" => {
1106 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1107 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1108 elements.push(Element::InlineMath(math.to_string()));
1109 remaining = &remaining[match_obj.end()..];
1110 } else {
1111 elements.push(Element::Text("$".to_string()));
1112 remaining = &remaining[1..];
1113 }
1114 }
1115 "emoji" => {
1117 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1118 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1119 elements.push(Element::EmojiShortcode(emoji.to_string()));
1120 remaining = &remaining[match_obj.end()..];
1121 } else {
1122 elements.push(Element::Text(":".to_string()));
1123 remaining = &remaining[1..];
1124 }
1125 }
1126 "html_entity" => {
1127 elements.push(Element::HtmlEntity(match_obj.as_str().to_string()));
1129 remaining = &remaining[match_obj.end()..];
1130 }
1131 "hugo_shortcode" => {
1132 elements.push(Element::HugoShortcode(match_obj.as_str().to_string()));
1134 remaining = &remaining[match_obj.end()..];
1135 }
1136 "autolink" => {
1137 elements.push(Element::Autolink(match_obj.as_str().to_string()));
1139 remaining = &remaining[match_obj.end()..];
1140 }
1141 "html_tag" => {
1142 elements.push(Element::HtmlTag(match_obj.as_str().to_string()));
1144 remaining = &remaining[match_obj.end()..];
1145 }
1146 _ => {
1147 elements.push(Element::Text("[".to_string()));
1149 remaining = &remaining[1..];
1150 }
1151 }
1152 } else {
1153 if next_special > 0 && next_special < remaining.len() {
1157 elements.push(Element::Text(remaining[..next_special].to_string()));
1158 remaining = &remaining[next_special..];
1159 }
1160
1161 match special_type {
1163 "code" => {
1164 if let Some(code_end) = remaining[1..].find('`') {
1166 let code = &remaining[1..1 + code_end];
1167 elements.push(Element::Code(code.to_string()));
1168 remaining = &remaining[1 + code_end + 1..];
1169 } else {
1170 elements.push(Element::Text(remaining.to_string()));
1172 break;
1173 }
1174 }
1175 "pulldown_emphasis" => {
1176 if let Some(span) = pulldown_emphasis {
1178 let span_len = span.end - span.start;
1179 if span.is_strikethrough {
1180 elements.push(Element::Strikethrough(span.content.clone()));
1181 } else if span.is_strong {
1182 elements.push(Element::Bold {
1183 content: span.content.clone(),
1184 underscore: span.uses_underscore,
1185 });
1186 } else {
1187 elements.push(Element::Italic {
1188 content: span.content.clone(),
1189 underscore: span.uses_underscore,
1190 });
1191 }
1192 remaining = &remaining[span_len..];
1193 } else {
1194 elements.push(Element::Text(remaining[..1].to_string()));
1196 remaining = &remaining[1..];
1197 }
1198 }
1199 _ => {
1200 elements.push(Element::Text(remaining.to_string()));
1202 break;
1203 }
1204 }
1205 }
1206 }
1207
1208 elements
1209}
1210
1211fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1213 let abbreviations = get_abbreviations(custom_abbreviations);
1214 let mut lines = Vec::new();
1215 let mut current_line = String::new();
1216
1217 for (idx, element) in elements.iter().enumerate() {
1218 let element_str = format!("{element}");
1219
1220 if let Element::Text(text) = element {
1222 let combined = format!("{current_line}{text}");
1224 let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1226
1227 if sentences.len() > 1 {
1228 for (i, sentence) in sentences.iter().enumerate() {
1230 if i == 0 {
1231 let trimmed = sentence.trim();
1234
1235 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1236 current_line = sentence.to_string();
1238 } else {
1239 lines.push(sentence.to_string());
1241 current_line.clear();
1242 }
1243 } else if i == sentences.len() - 1 {
1244 let trimmed = sentence.trim();
1246 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1247
1248 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1249 lines.push(sentence.to_string());
1251 current_line.clear();
1252 } else {
1253 current_line = sentence.to_string();
1255 }
1256 } else {
1257 lines.push(sentence.to_string());
1259 }
1260 }
1261 } else {
1262 let trimmed = combined.trim();
1264
1265 if trimmed.is_empty() {
1269 continue;
1270 }
1271
1272 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1273
1274 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1275 lines.push(trimmed.to_string());
1277 current_line.clear();
1278 } else {
1279 current_line = combined;
1281 }
1282 }
1283 } else if let Element::Italic { content, underscore } = element {
1284 let marker = if *underscore { "_" } else { "*" };
1286 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1287 } else if let Element::Bold { content, underscore } = element {
1288 let marker = if *underscore { "__" } else { "**" };
1290 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1291 } else if let Element::Strikethrough(content) = element {
1292 handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1294 } else {
1295 let is_adjacent = if idx > 0 {
1298 match &elements[idx - 1] {
1299 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1300 _ => true,
1301 }
1302 } else {
1303 false
1304 };
1305
1306 if !is_adjacent
1308 && !current_line.is_empty()
1309 && !current_line.ends_with(' ')
1310 && !current_line.ends_with('(')
1311 && !current_line.ends_with('[')
1312 {
1313 current_line.push(' ');
1314 }
1315 current_line.push_str(&element_str);
1316 }
1317 }
1318
1319 if !current_line.is_empty() {
1321 lines.push(current_line.trim().to_string());
1322 }
1323 lines
1324}
1325
1326fn handle_emphasis_sentence_split(
1328 content: &str,
1329 marker: &str,
1330 abbreviations: &HashSet<String>,
1331 current_line: &mut String,
1332 lines: &mut Vec<String>,
1333) {
1334 let sentences = split_into_sentences_with_set(content, abbreviations);
1336
1337 if sentences.len() <= 1 {
1338 if !current_line.is_empty()
1340 && !current_line.ends_with(' ')
1341 && !current_line.ends_with('(')
1342 && !current_line.ends_with('[')
1343 {
1344 current_line.push(' ');
1345 }
1346 current_line.push_str(marker);
1347 current_line.push_str(content);
1348 current_line.push_str(marker);
1349
1350 let trimmed = content.trim();
1352 let ends_with_punct = ends_with_sentence_punct(trimmed);
1353 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1354 lines.push(current_line.clone());
1355 current_line.clear();
1356 }
1357 } else {
1358 for (i, sentence) in sentences.iter().enumerate() {
1360 let trimmed = sentence.trim();
1361 if trimmed.is_empty() {
1362 continue;
1363 }
1364
1365 if i == 0 {
1366 if !current_line.is_empty()
1368 && !current_line.ends_with(' ')
1369 && !current_line.ends_with('(')
1370 && !current_line.ends_with('[')
1371 {
1372 current_line.push(' ');
1373 }
1374 current_line.push_str(marker);
1375 current_line.push_str(trimmed);
1376 current_line.push_str(marker);
1377
1378 let ends_with_punct = ends_with_sentence_punct(trimmed);
1380 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1381 lines.push(current_line.clone());
1382 current_line.clear();
1383 }
1384 } else if i == sentences.len() - 1 {
1385 let ends_with_punct = ends_with_sentence_punct(trimmed);
1387
1388 let mut line = String::new();
1389 line.push_str(marker);
1390 line.push_str(trimmed);
1391 line.push_str(marker);
1392
1393 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1394 lines.push(line);
1395 } else {
1396 *current_line = line;
1398 }
1399 } else {
1400 let mut line = String::new();
1402 line.push_str(marker);
1403 line.push_str(trimmed);
1404 line.push_str(marker);
1405 lines.push(line);
1406 }
1407 }
1408 }
1409}
1410
1411const BREAK_WORDS: &[&str] = &[
1415 "and",
1416 "or",
1417 "but",
1418 "nor",
1419 "yet",
1420 "so",
1421 "for",
1422 "which",
1423 "that",
1424 "because",
1425 "when",
1426 "if",
1427 "while",
1428 "where",
1429 "although",
1430 "though",
1431 "unless",
1432 "since",
1433 "after",
1434 "before",
1435 "until",
1436 "as",
1437 "once",
1438 "whether",
1439 "however",
1440 "therefore",
1441 "moreover",
1442 "furthermore",
1443 "nevertheless",
1444 "whereas",
1445];
1446
1447fn is_clause_punctuation(c: char) -> bool {
1449 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1451
1452fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1456 let mut spans = Vec::new();
1457 let mut offset = 0;
1458 for element in elements {
1459 let rendered = format!("{element}");
1460 let len = rendered.len();
1461 if !matches!(element, Element::Text(_)) {
1462 spans.push((offset, offset + len));
1463 }
1464 offset += len;
1465 }
1466 spans
1467}
1468
1469fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1471 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1472}
1473
1474const MIN_SPLIT_RATIO: f64 = 0.3;
1477
1478fn split_at_clause_punctuation(
1482 text: &str,
1483 line_length: usize,
1484 element_spans: &[(usize, usize)],
1485 length_mode: ReflowLengthMode,
1486) -> Option<(String, String)> {
1487 let chars: Vec<char> = text.chars().collect();
1488 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1489
1490 let mut width_acc = 0;
1492 let mut search_end_char = 0;
1493 for (idx, &c) in chars.iter().enumerate() {
1494 let c_width = display_len(&c.to_string(), length_mode);
1495 if width_acc + c_width > line_length {
1496 break;
1497 }
1498 width_acc += c_width;
1499 search_end_char = idx + 1;
1500 }
1501
1502 let mut best_pos = None;
1503 for i in (0..search_end_char).rev() {
1504 if is_clause_punctuation(chars[i]) {
1505 let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1507 if !is_inside_element(byte_pos, element_spans) {
1508 best_pos = Some(i);
1509 break;
1510 }
1511 }
1512 }
1513
1514 let pos = best_pos?;
1515
1516 let first: String = chars[..=pos].iter().collect();
1518 let first_display_len = display_len(&first, length_mode);
1519 if first_display_len < min_first_len {
1520 return None;
1521 }
1522
1523 let rest: String = chars[pos + 1..].iter().collect();
1525 let rest = rest.trim_start().to_string();
1526
1527 if rest.is_empty() {
1528 return None;
1529 }
1530
1531 Some((first, rest))
1532}
1533
1534fn split_at_break_word(
1538 text: &str,
1539 line_length: usize,
1540 element_spans: &[(usize, usize)],
1541 length_mode: ReflowLengthMode,
1542) -> Option<(String, String)> {
1543 let lower = text.to_lowercase();
1544 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1545 let mut best_split: Option<(usize, usize)> = None; for &word in BREAK_WORDS {
1548 let mut search_start = 0;
1549 while let Some(pos) = lower[search_start..].find(word) {
1550 let abs_pos = search_start + pos;
1551
1552 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1554 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1555
1556 if preceded_by_space && followed_by_space {
1557 let first_part = text[..abs_pos].trim_end();
1559 let first_part_len = display_len(first_part, length_mode);
1560
1561 if first_part_len >= min_first_len
1562 && first_part_len <= line_length
1563 && !is_inside_element(abs_pos, element_spans)
1564 {
1565 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1567 best_split = Some((abs_pos, word.len()));
1568 }
1569 }
1570 }
1571
1572 search_start = abs_pos + word.len();
1573 }
1574 }
1575
1576 let (byte_start, _word_len) = best_split?;
1577
1578 let first = text[..byte_start].trim_end().to_string();
1579 let rest = text[byte_start..].to_string();
1580
1581 if first.is_empty() || rest.trim().is_empty() {
1582 return None;
1583 }
1584
1585 Some((first, rest))
1586}
1587
1588fn cascade_split_line(
1591 text: &str,
1592 line_length: usize,
1593 abbreviations: &Option<Vec<String>>,
1594 length_mode: ReflowLengthMode,
1595) -> Vec<String> {
1596 if line_length == 0 || display_len(text, length_mode) <= line_length {
1597 return vec![text.to_string()];
1598 }
1599
1600 let elements = parse_markdown_elements(text);
1601 let element_spans = compute_element_spans(&elements);
1602
1603 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
1605 let mut result = vec![first];
1606 result.extend(cascade_split_line(&rest, line_length, abbreviations, length_mode));
1607 return result;
1608 }
1609
1610 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
1612 let mut result = vec![first];
1613 result.extend(cascade_split_line(&rest, line_length, abbreviations, length_mode));
1614 return result;
1615 }
1616
1617 let options = ReflowOptions {
1619 line_length,
1620 break_on_sentences: false,
1621 preserve_breaks: false,
1622 sentence_per_line: false,
1623 semantic_line_breaks: false,
1624 abbreviations: abbreviations.clone(),
1625 length_mode,
1626 };
1627 reflow_elements(&elements, &options)
1628}
1629
1630fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1634 let sentence_lines = reflow_elements_sentence_per_line(elements, &options.abbreviations);
1636
1637 if options.line_length == 0 {
1640 return sentence_lines;
1641 }
1642
1643 let length_mode = options.length_mode;
1644 let mut result = Vec::new();
1645 for line in sentence_lines {
1646 if display_len(&line, length_mode) <= options.line_length {
1647 result.push(line);
1648 } else {
1649 result.extend(cascade_split_line(
1650 &line,
1651 options.line_length,
1652 &options.abbreviations,
1653 length_mode,
1654 ));
1655 }
1656 }
1657
1658 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1661 let mut merged: Vec<String> = Vec::with_capacity(result.len());
1662 for line in result {
1663 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
1664 let prev_ends_at_sentence = {
1666 let trimmed = merged.last().unwrap().trim_end();
1667 trimmed
1668 .chars()
1669 .rev()
1670 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1671 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1672 };
1673
1674 if !prev_ends_at_sentence {
1675 let prev = merged.last_mut().unwrap();
1676 let combined = format!("{prev} {line}");
1677 if display_len(&combined, length_mode) <= options.line_length {
1679 *prev = combined;
1680 continue;
1681 }
1682 }
1683 }
1684 merged.push(line);
1685 }
1686 merged
1687}
1688
1689fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
1697 line.char_indices()
1698 .rev()
1699 .map(|(pos, _)| pos)
1700 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
1701}
1702
1703fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1705 let mut lines = Vec::new();
1706 let mut current_line = String::new();
1707 let mut current_length = 0;
1708 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
1710 let length_mode = options.length_mode;
1711
1712 for (idx, element) in elements.iter().enumerate() {
1713 let element_str = format!("{element}");
1714 let element_len = element.display_width(length_mode);
1715
1716 let is_adjacent_to_prev = if idx > 0 {
1722 match (&elements[idx - 1], element) {
1723 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1724 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1725 _ => true,
1726 }
1727 } else {
1728 false
1729 };
1730
1731 if let Element::Text(text) = element {
1733 let has_leading_space = text.starts_with(char::is_whitespace);
1735 let words: Vec<&str> = text.split_whitespace().collect();
1737
1738 for (i, word) in words.iter().enumerate() {
1739 let word_len = display_len(word, length_mode);
1740 let is_trailing_punct = word
1742 .chars()
1743 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1744
1745 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1748
1749 if is_first_adjacent {
1750 if current_length + word_len > options.line_length && current_length > 0 {
1752 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1755 let before = current_line[..last_space].trim_end().to_string();
1756 let after = current_line[last_space + 1..].to_string();
1757 lines.push(before);
1758 current_line = format!("{after}{word}");
1759 current_length = display_len(¤t_line, length_mode);
1760 current_line_element_spans.clear();
1761 } else {
1762 current_line.push_str(word);
1763 current_length += word_len;
1764 }
1765 } else {
1766 current_line.push_str(word);
1767 current_length += word_len;
1768 }
1769 } else if current_length > 0
1770 && current_length + 1 + word_len > options.line_length
1771 && !is_trailing_punct
1772 {
1773 lines.push(current_line.trim().to_string());
1775 current_line = word.to_string();
1776 current_length = word_len;
1777 current_line_element_spans.clear();
1778 } else {
1779 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1783 current_line.push(' ');
1784 current_length += 1;
1785 }
1786 current_line.push_str(word);
1787 current_length += word_len;
1788 }
1789 }
1790 } else if matches!(
1791 element,
1792 Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
1793 ) && element_len > options.line_length
1794 {
1795 let (content, marker): (&str, &str) = match element {
1799 Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
1800 Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
1801 Element::Strikethrough(content) => (content.as_str(), "~~"),
1802 _ => unreachable!(),
1803 };
1804
1805 let words: Vec<&str> = content.split_whitespace().collect();
1806 let n = words.len();
1807
1808 if n == 0 {
1809 let full = format!("{marker}{marker}");
1811 let full_len = display_len(&full, length_mode);
1812 if !is_adjacent_to_prev && current_length > 0 {
1813 current_line.push(' ');
1814 current_length += 1;
1815 }
1816 current_line.push_str(&full);
1817 current_length += full_len;
1818 } else {
1819 for (i, word) in words.iter().enumerate() {
1820 let is_first = i == 0;
1821 let is_last = i == n - 1;
1822 let word_str: String = match (is_first, is_last) {
1823 (true, true) => format!("{marker}{word}{marker}"),
1824 (true, false) => format!("{marker}{word}"),
1825 (false, true) => format!("{word}{marker}"),
1826 (false, false) => word.to_string(),
1827 };
1828 let word_len = display_len(&word_str, length_mode);
1829
1830 let needs_space = if is_first {
1831 !is_adjacent_to_prev && current_length > 0
1832 } else {
1833 current_length > 0
1834 };
1835
1836 if needs_space && current_length + 1 + word_len > options.line_length {
1837 lines.push(current_line.trim_end().to_string());
1838 current_line = word_str;
1839 current_length = word_len;
1840 current_line_element_spans.clear();
1841 } else {
1842 if needs_space {
1843 current_line.push(' ');
1844 current_length += 1;
1845 }
1846 current_line.push_str(&word_str);
1847 current_length += word_len;
1848 }
1849 }
1850 }
1851 } else {
1852 if is_adjacent_to_prev {
1856 if current_length + element_len > options.line_length {
1858 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1861 let before = current_line[..last_space].trim_end().to_string();
1862 let after = current_line[last_space + 1..].to_string();
1863 lines.push(before);
1864 current_line = format!("{after}{element_str}");
1865 current_length = display_len(¤t_line, length_mode);
1866 current_line_element_spans.clear();
1867 let start = after.len();
1869 current_line_element_spans.push((start, start + element_str.len()));
1870 } else {
1871 let start = current_line.len();
1873 current_line.push_str(&element_str);
1874 current_length += element_len;
1875 current_line_element_spans.push((start, current_line.len()));
1876 }
1877 } else {
1878 let start = current_line.len();
1879 current_line.push_str(&element_str);
1880 current_length += element_len;
1881 current_line_element_spans.push((start, current_line.len()));
1882 }
1883 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
1884 lines.push(current_line.trim().to_string());
1886 current_line = element_str.clone();
1887 current_length = element_len;
1888 current_line_element_spans.clear();
1889 current_line_element_spans.push((0, element_str.len()));
1890 } else {
1891 let ends_with_opener =
1893 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1894 if current_length > 0 && !ends_with_opener {
1895 current_line.push(' ');
1896 current_length += 1;
1897 }
1898 let start = current_line.len();
1899 current_line.push_str(&element_str);
1900 current_length += element_len;
1901 current_line_element_spans.push((start, current_line.len()));
1902 }
1903 }
1904 }
1905
1906 if !current_line.is_empty() {
1908 lines.push(current_line.trim_end().to_string());
1909 }
1910
1911 lines
1912}
1913
1914pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1916 let lines: Vec<&str> = content.lines().collect();
1917 let mut result = Vec::new();
1918 let mut i = 0;
1919
1920 while i < lines.len() {
1921 let line = lines[i];
1922 let trimmed = line.trim();
1923
1924 if trimmed.is_empty() {
1926 result.push(String::new());
1927 i += 1;
1928 continue;
1929 }
1930
1931 if trimmed.starts_with('#') {
1933 result.push(line.to_string());
1934 i += 1;
1935 continue;
1936 }
1937
1938 if trimmed.starts_with(":::") {
1940 result.push(line.to_string());
1941 i += 1;
1942 continue;
1943 }
1944
1945 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1947 result.push(line.to_string());
1948 i += 1;
1949 while i < lines.len() {
1951 result.push(lines[i].to_string());
1952 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
1953 i += 1;
1954 break;
1955 }
1956 i += 1;
1957 }
1958 continue;
1959 }
1960
1961 if ElementCache::calculate_indentation_width_default(line) >= 4 {
1963 result.push(line.to_string());
1965 i += 1;
1966 while i < lines.len() {
1967 let next_line = lines[i];
1968 if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
1970 result.push(next_line.to_string());
1971 i += 1;
1972 } else {
1973 break;
1974 }
1975 }
1976 continue;
1977 }
1978
1979 if trimmed.starts_with('>') {
1981 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
1984 let quote_prefix = line[0..gt_pos + 1].to_string();
1985 let quote_content = &line[quote_prefix.len()..].trim_start();
1986
1987 let reflowed = reflow_line(quote_content, options);
1988 for reflowed_line in reflowed.iter() {
1989 result.push(format!("{quote_prefix} {reflowed_line}"));
1990 }
1991 i += 1;
1992 continue;
1993 }
1994
1995 if is_horizontal_rule(trimmed) {
1997 result.push(line.to_string());
1998 i += 1;
1999 continue;
2000 }
2001
2002 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2004 let indent = line.len() - line.trim_start().len();
2006 let indent_str = " ".repeat(indent);
2007
2008 let mut marker_end = indent;
2011 let mut content_start = indent;
2012
2013 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
2014 if let Some(period_pos) = line[indent..].find('.') {
2016 marker_end = indent + period_pos + 1; content_start = marker_end;
2018 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2022 content_start += 1;
2023 }
2024 }
2025 } else {
2026 marker_end = indent + 1; content_start = marker_end;
2029 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2033 content_start += 1;
2034 }
2035 }
2036
2037 let marker = &line[indent..marker_end];
2038
2039 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2042 i += 1;
2043
2044 while i < lines.len() {
2046 let next_line = lines[i];
2047 let next_trimmed = next_line.trim();
2048
2049 if is_block_boundary(next_trimmed) {
2051 break;
2052 }
2053
2054 let next_indent = next_line.len() - next_line.trim_start().len();
2056 if next_indent >= content_start {
2057 let trimmed_start = next_line.trim_start();
2060 list_content.push(trim_preserving_hard_break(trimmed_start));
2061 i += 1;
2062 } else {
2063 break;
2065 }
2066 }
2067
2068 let combined_content = if options.preserve_breaks {
2071 list_content[0].clone()
2072 } else {
2073 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2075 if has_hard_breaks {
2076 list_content.join("\n")
2078 } else {
2079 list_content.join(" ")
2081 }
2082 };
2083
2084 let trimmed_marker = marker;
2086 let continuation_spaces = content_start;
2087
2088 let prefix_length = indent + trimmed_marker.len() + 1;
2090
2091 let adjusted_options = ReflowOptions {
2093 line_length: options.line_length.saturating_sub(prefix_length),
2094 ..options.clone()
2095 };
2096
2097 let reflowed = reflow_line(&combined_content, &adjusted_options);
2098 for (j, reflowed_line) in reflowed.iter().enumerate() {
2099 if j == 0 {
2100 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2101 } else {
2102 let continuation_indent = " ".repeat(continuation_spaces);
2104 result.push(format!("{continuation_indent}{reflowed_line}"));
2105 }
2106 }
2107 continue;
2108 }
2109
2110 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2112 result.push(line.to_string());
2113 i += 1;
2114 continue;
2115 }
2116
2117 if trimmed.starts_with('[') && line.contains("]:") {
2119 result.push(line.to_string());
2120 i += 1;
2121 continue;
2122 }
2123
2124 if is_definition_list_item(trimmed) {
2126 result.push(line.to_string());
2127 i += 1;
2128 continue;
2129 }
2130
2131 let mut is_single_line_paragraph = true;
2133 if i + 1 < lines.len() {
2134 let next_trimmed = lines[i + 1].trim();
2135 if !is_block_boundary(next_trimmed) {
2137 is_single_line_paragraph = false;
2138 }
2139 }
2140
2141 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2143 result.push(line.to_string());
2144 i += 1;
2145 continue;
2146 }
2147
2148 let mut paragraph_parts = Vec::new();
2150 let mut current_part = vec![line];
2151 i += 1;
2152
2153 if options.preserve_breaks {
2155 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2157 Some("\\")
2158 } else if line.ends_with(" ") {
2159 Some(" ")
2160 } else {
2161 None
2162 };
2163 let reflowed = reflow_line(line, options);
2164
2165 if let Some(break_marker) = hard_break_type {
2167 if !reflowed.is_empty() {
2168 let mut reflowed_with_break = reflowed;
2169 let last_idx = reflowed_with_break.len() - 1;
2170 if !has_hard_break(&reflowed_with_break[last_idx]) {
2171 reflowed_with_break[last_idx].push_str(break_marker);
2172 }
2173 result.extend(reflowed_with_break);
2174 }
2175 } else {
2176 result.extend(reflowed);
2177 }
2178 } else {
2179 while i < lines.len() {
2181 let prev_line = if !current_part.is_empty() {
2182 current_part.last().unwrap()
2183 } else {
2184 ""
2185 };
2186 let next_line = lines[i];
2187 let next_trimmed = next_line.trim();
2188
2189 if is_block_boundary(next_trimmed) {
2191 break;
2192 }
2193
2194 let prev_trimmed = prev_line.trim();
2197 let abbreviations = get_abbreviations(&options.abbreviations);
2198 let ends_with_sentence = (prev_trimmed.ends_with('.')
2199 || prev_trimmed.ends_with('!')
2200 || prev_trimmed.ends_with('?')
2201 || prev_trimmed.ends_with(".*")
2202 || prev_trimmed.ends_with("!*")
2203 || prev_trimmed.ends_with("?*")
2204 || prev_trimmed.ends_with("._")
2205 || prev_trimmed.ends_with("!_")
2206 || prev_trimmed.ends_with("?_")
2207 || prev_trimmed.ends_with(".\"")
2209 || prev_trimmed.ends_with("!\"")
2210 || prev_trimmed.ends_with("?\"")
2211 || prev_trimmed.ends_with(".'")
2212 || prev_trimmed.ends_with("!'")
2213 || prev_trimmed.ends_with("?'")
2214 || prev_trimmed.ends_with(".\u{201D}")
2215 || prev_trimmed.ends_with("!\u{201D}")
2216 || prev_trimmed.ends_with("?\u{201D}")
2217 || prev_trimmed.ends_with(".\u{2019}")
2218 || prev_trimmed.ends_with("!\u{2019}")
2219 || prev_trimmed.ends_with("?\u{2019}"))
2220 && !text_ends_with_abbreviation(
2221 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2222 &abbreviations,
2223 );
2224
2225 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2226 paragraph_parts.push(current_part.join(" "));
2228 current_part = vec![next_line];
2229 } else {
2230 current_part.push(next_line);
2231 }
2232 i += 1;
2233 }
2234
2235 if !current_part.is_empty() {
2237 if current_part.len() == 1 {
2238 paragraph_parts.push(current_part[0].to_string());
2240 } else {
2241 paragraph_parts.push(current_part.join(" "));
2242 }
2243 }
2244
2245 for (j, part) in paragraph_parts.iter().enumerate() {
2247 let reflowed = reflow_line(part, options);
2248 result.extend(reflowed);
2249
2250 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2254 let last_idx = result.len() - 1;
2255 if !has_hard_break(&result[last_idx]) {
2256 result[last_idx].push_str(" ");
2257 }
2258 }
2259 }
2260 }
2261 }
2262
2263 let result_text = result.join("\n");
2265 if content.ends_with('\n') && !result_text.ends_with('\n') {
2266 format!("{result_text}\n")
2267 } else {
2268 result_text
2269 }
2270}
2271
2272#[derive(Debug, Clone)]
2274pub struct ParagraphReflow {
2275 pub start_byte: usize,
2277 pub end_byte: usize,
2279 pub reflowed_text: String,
2281}
2282
2283#[derive(Debug, Clone)]
2289pub struct BlockquoteLineData {
2290 pub(crate) content: String,
2292 pub(crate) is_explicit: bool,
2294 pub(crate) prefix: Option<String>,
2296}
2297
2298impl BlockquoteLineData {
2299 pub fn explicit(content: String, prefix: String) -> Self {
2301 Self {
2302 content,
2303 is_explicit: true,
2304 prefix: Some(prefix),
2305 }
2306 }
2307
2308 pub fn lazy(content: String) -> Self {
2310 Self {
2311 content,
2312 is_explicit: false,
2313 prefix: None,
2314 }
2315 }
2316}
2317
2318#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2320pub enum BlockquoteContinuationStyle {
2321 Explicit,
2322 Lazy,
2323}
2324
2325pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2333 let mut explicit_count = 0usize;
2334 let mut lazy_count = 0usize;
2335
2336 for line in lines.iter().skip(1) {
2337 if line.is_explicit {
2338 explicit_count += 1;
2339 } else {
2340 lazy_count += 1;
2341 }
2342 }
2343
2344 if explicit_count > 0 && lazy_count == 0 {
2345 BlockquoteContinuationStyle::Explicit
2346 } else if lazy_count > 0 && explicit_count == 0 {
2347 BlockquoteContinuationStyle::Lazy
2348 } else if explicit_count >= lazy_count {
2349 BlockquoteContinuationStyle::Explicit
2350 } else {
2351 BlockquoteContinuationStyle::Lazy
2352 }
2353}
2354
2355pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2360 let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2361
2362 for (idx, line) in lines.iter().enumerate() {
2363 let Some(prefix) = line.prefix.as_ref() else {
2364 continue;
2365 };
2366 counts
2367 .entry(prefix.clone())
2368 .and_modify(|entry| entry.0 += 1)
2369 .or_insert((1, idx));
2370 }
2371
2372 counts
2373 .into_iter()
2374 .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2375 count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2376 })
2377 .map(|(prefix, _)| prefix)
2378 .unwrap_or_else(|| fallback.to_string())
2379}
2380
2381pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2386 let trimmed = content_line.trim_start();
2387 trimmed.starts_with('>')
2388 || trimmed.starts_with('#')
2389 || trimmed.starts_with("```")
2390 || trimmed.starts_with("~~~")
2391 || is_unordered_list_marker(trimmed)
2392 || is_numbered_list_item(trimmed)
2393 || is_horizontal_rule(trimmed)
2394 || is_definition_list_item(trimmed)
2395 || (trimmed.starts_with('[') && trimmed.contains("]:"))
2396 || trimmed.starts_with(":::")
2397 || (trimmed.starts_with('<')
2398 && !trimmed.starts_with("<http")
2399 && !trimmed.starts_with("<https")
2400 && !trimmed.starts_with("<mailto:"))
2401}
2402
2403pub fn reflow_blockquote_content(
2412 lines: &[BlockquoteLineData],
2413 explicit_prefix: &str,
2414 continuation_style: BlockquoteContinuationStyle,
2415 options: &ReflowOptions,
2416) -> Vec<String> {
2417 let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2418 let segments = split_into_segments_strs(&content_strs);
2419 let mut reflowed_content_lines: Vec<String> = Vec::new();
2420
2421 for segment in segments {
2422 let hard_break_type = segment.last().and_then(|&line| {
2423 let line = line.strip_suffix('\r').unwrap_or(line);
2424 if line.ends_with('\\') {
2425 Some("\\")
2426 } else if line.ends_with(" ") {
2427 Some(" ")
2428 } else {
2429 None
2430 }
2431 });
2432
2433 let pieces: Vec<&str> = segment
2434 .iter()
2435 .map(|&line| {
2436 if let Some(l) = line.strip_suffix('\\') {
2437 l.trim_end()
2438 } else if let Some(l) = line.strip_suffix(" ") {
2439 l.trim_end()
2440 } else {
2441 line.trim_end()
2442 }
2443 })
2444 .collect();
2445
2446 let segment_text = pieces.join(" ");
2447 let segment_text = segment_text.trim();
2448 if segment_text.is_empty() {
2449 continue;
2450 }
2451
2452 let mut reflowed = reflow_line(segment_text, options);
2453 if let Some(break_marker) = hard_break_type
2454 && !reflowed.is_empty()
2455 {
2456 let last_idx = reflowed.len() - 1;
2457 if !has_hard_break(&reflowed[last_idx]) {
2458 reflowed[last_idx].push_str(break_marker);
2459 }
2460 }
2461 reflowed_content_lines.extend(reflowed);
2462 }
2463
2464 let mut styled_lines: Vec<String> = Vec::new();
2465 for (idx, line) in reflowed_content_lines.iter().enumerate() {
2466 let force_explicit = idx == 0
2467 || continuation_style == BlockquoteContinuationStyle::Explicit
2468 || should_force_explicit_blockquote_line(line);
2469 if force_explicit {
2470 styled_lines.push(format!("{explicit_prefix}{line}"));
2471 } else {
2472 styled_lines.push(line.clone());
2473 }
2474 }
2475
2476 styled_lines
2477}
2478
2479fn is_blockquote_content_boundary(content: &str) -> bool {
2480 let trimmed = content.trim();
2481 trimmed.is_empty()
2482 || is_block_boundary(trimmed)
2483 || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2484 || trimmed.starts_with(":::")
2485 || crate::utils::is_template_directive_only(content)
2486 || is_standalone_attr_list(content)
2487 || is_snippet_block_delimiter(content)
2488}
2489
2490fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2491 let mut segments = Vec::new();
2492 let mut current = Vec::new();
2493
2494 for &line in lines {
2495 current.push(line);
2496 if has_hard_break(line) {
2497 segments.push(current);
2498 current = Vec::new();
2499 }
2500 }
2501
2502 if !current.is_empty() {
2503 segments.push(current);
2504 }
2505
2506 segments
2507}
2508
2509fn reflow_blockquote_paragraph_at_line(
2510 content: &str,
2511 lines: &[&str],
2512 target_idx: usize,
2513 options: &ReflowOptions,
2514) -> Option<ParagraphReflow> {
2515 let mut anchor_idx = target_idx;
2516 let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2517 parsed.nesting_level
2518 } else {
2519 let mut found = None;
2520 let mut idx = target_idx;
2521 loop {
2522 if lines[idx].trim().is_empty() {
2523 break;
2524 }
2525 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2526 found = Some((idx, parsed.nesting_level));
2527 break;
2528 }
2529 if idx == 0 {
2530 break;
2531 }
2532 idx -= 1;
2533 }
2534 let (idx, level) = found?;
2535 anchor_idx = idx;
2536 level
2537 };
2538
2539 let mut para_start = anchor_idx;
2541 while para_start > 0 {
2542 let prev_idx = para_start - 1;
2543 let prev_line = lines[prev_idx];
2544
2545 if prev_line.trim().is_empty() {
2546 break;
2547 }
2548
2549 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2550 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2551 break;
2552 }
2553 para_start = prev_idx;
2554 continue;
2555 }
2556
2557 let prev_lazy = prev_line.trim_start();
2558 if is_blockquote_content_boundary(prev_lazy) {
2559 break;
2560 }
2561 para_start = prev_idx;
2562 }
2563
2564 while para_start < lines.len() {
2566 let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
2567 para_start += 1;
2568 continue;
2569 };
2570 target_level = parsed.nesting_level;
2571 break;
2572 }
2573
2574 if para_start >= lines.len() || para_start > target_idx {
2575 return None;
2576 }
2577
2578 let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
2581 let mut idx = para_start;
2582 while idx < lines.len() {
2583 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
2584 break;
2585 }
2586
2587 let line = lines[idx];
2588 if line.trim().is_empty() {
2589 break;
2590 }
2591
2592 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
2593 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2594 break;
2595 }
2596 collected.push((
2597 idx,
2598 BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
2599 ));
2600 idx += 1;
2601 continue;
2602 }
2603
2604 let lazy_content = line.trim_start();
2605 if is_blockquote_content_boundary(lazy_content) {
2606 break;
2607 }
2608
2609 collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
2610 idx += 1;
2611 }
2612
2613 if collected.is_empty() {
2614 return None;
2615 }
2616
2617 let para_end = collected[collected.len() - 1].0;
2618 if target_idx < para_start || target_idx > para_end {
2619 return None;
2620 }
2621
2622 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
2623
2624 let fallback_prefix = line_data
2625 .iter()
2626 .find_map(|d| d.prefix.clone())
2627 .unwrap_or_else(|| "> ".to_string());
2628 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
2629 let continuation_style = blockquote_continuation_style(&line_data);
2630
2631 let adjusted_line_length = options
2632 .line_length
2633 .saturating_sub(display_len(&explicit_prefix, options.length_mode))
2634 .max(1);
2635
2636 let adjusted_options = ReflowOptions {
2637 line_length: adjusted_line_length,
2638 ..options.clone()
2639 };
2640
2641 let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
2642
2643 if styled_lines.is_empty() {
2644 return None;
2645 }
2646
2647 let mut start_byte = 0;
2649 for line in lines.iter().take(para_start) {
2650 start_byte += line.len() + 1;
2651 }
2652
2653 let mut end_byte = start_byte;
2654 for line in lines.iter().take(para_end + 1).skip(para_start) {
2655 end_byte += line.len() + 1;
2656 }
2657
2658 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2659 if !includes_trailing_newline {
2660 end_byte -= 1;
2661 }
2662
2663 let reflowed_joined = styled_lines.join("\n");
2664 let reflowed_text = if includes_trailing_newline {
2665 if reflowed_joined.ends_with('\n') {
2666 reflowed_joined
2667 } else {
2668 format!("{reflowed_joined}\n")
2669 }
2670 } else if reflowed_joined.ends_with('\n') {
2671 reflowed_joined.trim_end_matches('\n').to_string()
2672 } else {
2673 reflowed_joined
2674 };
2675
2676 Some(ParagraphReflow {
2677 start_byte,
2678 end_byte,
2679 reflowed_text,
2680 })
2681}
2682
2683pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2701 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
2702}
2703
2704pub fn reflow_paragraph_at_line_with_mode(
2706 content: &str,
2707 line_number: usize,
2708 line_length: usize,
2709 length_mode: ReflowLengthMode,
2710) -> Option<ParagraphReflow> {
2711 let options = ReflowOptions {
2712 line_length,
2713 length_mode,
2714 ..Default::default()
2715 };
2716 reflow_paragraph_at_line_with_options(content, line_number, &options)
2717}
2718
2719pub fn reflow_paragraph_at_line_with_options(
2730 content: &str,
2731 line_number: usize,
2732 options: &ReflowOptions,
2733) -> Option<ParagraphReflow> {
2734 if line_number == 0 {
2735 return None;
2736 }
2737
2738 let lines: Vec<&str> = content.lines().collect();
2739
2740 if line_number > lines.len() {
2742 return None;
2743 }
2744
2745 let target_idx = line_number - 1; let target_line = lines[target_idx];
2747 let trimmed = target_line.trim();
2748
2749 if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
2752 return Some(blockquote_reflow);
2753 }
2754
2755 if is_paragraph_boundary(trimmed, target_line) {
2757 return None;
2758 }
2759
2760 let mut para_start = target_idx;
2762 while para_start > 0 {
2763 let prev_idx = para_start - 1;
2764 let prev_line = lines[prev_idx];
2765 let prev_trimmed = prev_line.trim();
2766
2767 if is_paragraph_boundary(prev_trimmed, prev_line) {
2769 break;
2770 }
2771
2772 para_start = prev_idx;
2773 }
2774
2775 let mut para_end = target_idx;
2777 while para_end + 1 < lines.len() {
2778 let next_idx = para_end + 1;
2779 let next_line = lines[next_idx];
2780 let next_trimmed = next_line.trim();
2781
2782 if is_paragraph_boundary(next_trimmed, next_line) {
2784 break;
2785 }
2786
2787 para_end = next_idx;
2788 }
2789
2790 let paragraph_lines = &lines[para_start..=para_end];
2792
2793 let mut start_byte = 0;
2795 for line in lines.iter().take(para_start) {
2796 start_byte += line.len() + 1; }
2798
2799 let mut end_byte = start_byte;
2800 for line in paragraph_lines.iter() {
2801 end_byte += line.len() + 1; }
2803
2804 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2807
2808 if !includes_trailing_newline {
2810 end_byte -= 1;
2811 }
2812
2813 let paragraph_text = paragraph_lines.join("\n");
2815
2816 let reflowed = reflow_markdown(¶graph_text, options);
2818
2819 let reflowed_text = if includes_trailing_newline {
2823 if reflowed.ends_with('\n') {
2825 reflowed
2826 } else {
2827 format!("{reflowed}\n")
2828 }
2829 } else {
2830 if reflowed.ends_with('\n') {
2832 reflowed.trim_end_matches('\n').to_string()
2833 } else {
2834 reflowed
2835 }
2836 };
2837
2838 Some(ParagraphReflow {
2839 start_byte,
2840 end_byte,
2841 reflowed_text,
2842 })
2843}
2844
2845#[cfg(test)]
2846mod tests {
2847 use super::*;
2848
2849 #[test]
2854 fn test_helper_function_text_ends_with_abbreviation() {
2855 let abbreviations = get_abbreviations(&None);
2857
2858 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2860 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2861 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2862 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2863 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2864 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2865 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2866 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2867
2868 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2870 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2871 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2872 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2873 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2874 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
2880
2881 #[test]
2882 fn test_is_unordered_list_marker() {
2883 assert!(is_unordered_list_marker("- item"));
2885 assert!(is_unordered_list_marker("* item"));
2886 assert!(is_unordered_list_marker("+ item"));
2887 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
2889 assert!(is_unordered_list_marker("+"));
2890
2891 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
2902
2903 #[test]
2904 fn test_is_block_boundary() {
2905 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
2927 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
2930 }
2931
2932 #[test]
2933 fn test_definition_list_boundary_in_single_line_paragraph() {
2934 let options = ReflowOptions {
2937 line_length: 80,
2938 ..Default::default()
2939 };
2940 let input = "Term\n: Definition of the term";
2941 let result = reflow_markdown(input, &options);
2942 assert!(
2944 result.contains(": Definition"),
2945 "Definition list item should not be merged into previous line. Got: {result:?}"
2946 );
2947 let lines: Vec<&str> = result.lines().collect();
2948 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
2949 assert_eq!(lines[0], "Term");
2950 assert_eq!(lines[1], ": Definition of the term");
2951 }
2952
2953 #[test]
2954 fn test_is_paragraph_boundary() {
2955 assert!(is_paragraph_boundary("# Heading", "# Heading"));
2957 assert!(is_paragraph_boundary("- item", "- item"));
2958 assert!(is_paragraph_boundary(":::", ":::"));
2959 assert!(is_paragraph_boundary(": definition", ": definition"));
2960
2961 assert!(is_paragraph_boundary("code", " code"));
2963 assert!(is_paragraph_boundary("code", "\tcode"));
2964
2965 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
2967 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
2971 assert!(!is_paragraph_boundary("text", " text")); }
2973
2974 #[test]
2975 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
2976 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
2979 let result = reflow_paragraph_at_line(content, 3, 80);
2981 assert!(result.is_none(), "Div marker line should not be reflowed");
2982 }
2983}