1use crate::utils::calculate_indentation_width_default;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
13 LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
14 SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18 text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27 Chars,
29 #[default]
31 Visual,
32 Bytes,
34}
35
36fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38 match mode {
39 ReflowLengthMode::Chars => s.chars().count(),
40 ReflowLengthMode::Visual => s.width(),
41 ReflowLengthMode::Bytes => s.len(),
42 }
43}
44
45#[derive(Clone)]
47pub struct ReflowOptions {
48 pub line_length: usize,
50 pub break_on_sentences: bool,
52 pub preserve_breaks: bool,
54 pub sentence_per_line: bool,
56 pub semantic_line_breaks: bool,
58 pub abbreviations: Option<Vec<String>>,
62 pub length_mode: ReflowLengthMode,
64 pub attr_lists: bool,
67 pub require_sentence_capital: bool,
72 pub max_list_continuation_indent: Option<usize>,
76}
77
78impl Default for ReflowOptions {
79 fn default() -> Self {
80 Self {
81 line_length: 80,
82 break_on_sentences: true,
83 preserve_breaks: false,
84 sentence_per_line: false,
85 semantic_line_breaks: false,
86 abbreviations: None,
87 length_mode: ReflowLengthMode::default(),
88 attr_lists: false,
89 require_sentence_capital: true,
90 max_list_continuation_indent: None,
91 }
92 }
93}
94
95fn is_sentence_boundary(
99 text: &str,
100 pos: usize,
101 abbreviations: &HashSet<String>,
102 require_sentence_capital: bool,
103) -> bool {
104 let chars: Vec<char> = text.chars().collect();
105
106 if pos + 1 >= chars.len() {
107 return false;
108 }
109
110 let c = chars[pos];
111 let next_char = chars[pos + 1];
112
113 if is_cjk_sentence_ending(c) {
116 let mut after_punct_pos = pos + 1;
118 while after_punct_pos < chars.len()
119 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
120 {
121 after_punct_pos += 1;
122 }
123
124 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
126 after_punct_pos += 1;
127 }
128
129 if after_punct_pos >= chars.len() {
131 return false;
132 }
133
134 while after_punct_pos < chars.len()
136 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
137 {
138 after_punct_pos += 1;
139 }
140
141 if after_punct_pos >= chars.len() {
142 return false;
143 }
144
145 return true;
148 }
149
150 if c != '.' && c != '!' && c != '?' {
152 return false;
153 }
154
155 let (_space_pos, after_space_pos) = if next_char == ' ' {
157 (pos + 1, pos + 2)
159 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
160 if chars[pos + 2] == ' ' {
162 (pos + 2, pos + 3)
164 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
165 (pos + 3, pos + 4)
167 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
168 && pos + 4 < chars.len()
169 && chars[pos + 3] == chars[pos + 2]
170 && chars[pos + 4] == ' '
171 {
172 (pos + 4, pos + 5)
174 } else {
175 return false;
176 }
177 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
178 (pos + 2, pos + 3)
180 } else if (next_char == '*' || next_char == '_')
181 && pos + 3 < chars.len()
182 && chars[pos + 2] == next_char
183 && chars[pos + 3] == ' '
184 {
185 (pos + 3, pos + 4)
187 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
188 (pos + 3, pos + 4)
190 } else {
191 return false;
192 };
193
194 let mut next_char_pos = after_space_pos;
196 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
197 next_char_pos += 1;
198 }
199
200 if next_char_pos >= chars.len() {
202 return false;
203 }
204
205 let mut first_letter_pos = next_char_pos;
207 while first_letter_pos < chars.len()
208 && (chars[first_letter_pos] == '*'
209 || chars[first_letter_pos] == '_'
210 || chars[first_letter_pos] == '~'
211 || is_opening_quote(chars[first_letter_pos]))
212 {
213 first_letter_pos += 1;
214 }
215
216 if first_letter_pos >= chars.len() {
218 return false;
219 }
220
221 let first_char = chars[first_letter_pos];
222
223 if c == '!' || c == '?' {
225 return true;
226 }
227
228 if pos > 0 {
232 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
234 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
235 return false;
236 }
237
238 if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
240 return false;
241 }
242
243 if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
247 return false;
248 }
249 }
250
251 if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
254 return false;
255 }
256
257 true
258}
259
260pub fn split_into_sentences(text: &str) -> Vec<String> {
262 split_into_sentences_custom(text, &None)
263}
264
265pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
267 let abbreviations = get_abbreviations(custom_abbreviations);
268 split_into_sentences_with_set(text, &abbreviations, true)
269}
270
271fn split_into_sentences_with_set(
274 text: &str,
275 abbreviations: &HashSet<String>,
276 require_sentence_capital: bool,
277) -> Vec<String> {
278 let mut sentences = Vec::new();
279 let mut current_sentence = String::new();
280 let mut chars = text.chars().peekable();
281 let mut pos = 0;
282
283 while let Some(c) = chars.next() {
284 current_sentence.push(c);
285
286 if is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
287 while let Some(&next) = chars.peek() {
289 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
290 current_sentence.push(chars.next().unwrap());
291 pos += 1;
292 } else {
293 break;
294 }
295 }
296
297 if chars.peek() == Some(&' ') {
299 chars.next();
300 pos += 1;
301 }
302
303 sentences.push(current_sentence.trim().to_string());
304 current_sentence.clear();
305 }
306
307 pos += 1;
308 }
309
310 if !current_sentence.trim().is_empty() {
312 sentences.push(current_sentence.trim().to_string());
313 }
314 sentences
315}
316
317fn is_horizontal_rule(line: &str) -> bool {
319 if line.len() < 3 {
320 return false;
321 }
322
323 let chars: Vec<char> = line.chars().collect();
325 if chars.is_empty() {
326 return false;
327 }
328
329 let first_char = chars[0];
330 if first_char != '-' && first_char != '_' && first_char != '*' {
331 return false;
332 }
333
334 for c in &chars {
336 if *c != first_char && *c != ' ' {
337 return false;
338 }
339 }
340
341 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
343 non_space_count >= 3
344}
345
346fn is_numbered_list_item(line: &str) -> bool {
348 let mut chars = line.chars();
349
350 if !chars.next().is_some_and(|c| c.is_numeric()) {
352 return false;
353 }
354
355 while let Some(c) = chars.next() {
357 if c == '.' {
358 return chars.next() == Some(' ');
361 }
362 if !c.is_numeric() {
363 return false;
364 }
365 }
366
367 false
368}
369
370fn is_unordered_list_marker(s: &str) -> bool {
372 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
373 && !is_horizontal_rule(s)
374 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
375}
376
377fn is_block_boundary_core(trimmed: &str) -> bool {
380 trimmed.is_empty()
381 || trimmed.starts_with('#')
382 || trimmed.starts_with("```")
383 || trimmed.starts_with("~~~")
384 || trimmed.starts_with('>')
385 || (trimmed.starts_with('[') && trimmed.contains("]:"))
386 || is_horizontal_rule(trimmed)
387 || is_unordered_list_marker(trimmed)
388 || is_numbered_list_item(trimmed)
389 || is_definition_list_item(trimmed)
390 || trimmed.starts_with(":::")
391}
392
393fn is_block_boundary(trimmed: &str) -> bool {
396 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
397}
398
399fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
403 is_block_boundary_core(trimmed)
404 || calculate_indentation_width_default(line) >= 4
405 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
406}
407
408fn has_hard_break(line: &str) -> bool {
414 let line = line.strip_suffix('\r').unwrap_or(line);
415 line.ends_with(" ") || line.ends_with('\\')
416}
417
418fn ends_with_sentence_punct(text: &str) -> bool {
420 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
421}
422
423fn trim_preserving_hard_break(s: &str) -> String {
429 let s = s.strip_suffix('\r').unwrap_or(s);
431
432 if s.ends_with('\\') {
434 return s.to_string();
436 }
437
438 if s.ends_with(" ") {
440 let content_end = s.trim_end().len();
442 if content_end == 0 {
443 return String::new();
445 }
446 format!("{} ", &s[..content_end])
448 } else {
449 s.trim_end().to_string()
451 }
452}
453
454fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
456 if options.attr_lists {
457 parse_markdown_elements_with_attr_lists(text)
458 } else {
459 parse_markdown_elements(text)
460 }
461}
462
463pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
464 if options.sentence_per_line {
466 let elements = parse_elements(line, options);
467 return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
468 }
469
470 if options.semantic_line_breaks {
472 let elements = parse_elements(line, options);
473 return reflow_elements_semantic(&elements, options);
474 }
475
476 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
479 return vec![line.to_string()];
480 }
481
482 let elements = parse_elements(line, options);
484
485 reflow_elements(&elements, options)
487}
488
489#[derive(Debug, Clone)]
491enum LinkedImageSource {
492 Inline(String),
494 Reference(String),
496}
497
498#[derive(Debug, Clone)]
500enum LinkedImageTarget {
501 Inline(String),
503 Reference(String),
505}
506
507#[derive(Debug, Clone)]
509enum Element {
510 Text(String),
512 Link { text: String, url: String },
514 ReferenceLink { text: String, reference: String },
516 EmptyReferenceLink { text: String },
518 ShortcutReference { reference: String },
520 InlineImage { alt: String, url: String },
522 ReferenceImage { alt: String, reference: String },
524 EmptyReferenceImage { alt: String },
526 LinkedImage {
532 alt: String,
533 img_source: LinkedImageSource,
534 link_target: LinkedImageTarget,
535 },
536 FootnoteReference { note: String },
538 Strikethrough(String),
540 WikiLink(String),
542 InlineMath(String),
544 DisplayMath(String),
546 EmojiShortcode(String),
548 Autolink(String),
550 HtmlTag(String),
552 HtmlEntity(String),
554 HugoShortcode(String),
556 AttrList(String),
558 Code(String),
560 Bold {
562 content: String,
563 underscore: bool,
565 },
566 Italic {
568 content: String,
569 underscore: bool,
571 },
572}
573
574impl std::fmt::Display for Element {
575 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
576 match self {
577 Element::Text(s) => write!(f, "{s}"),
578 Element::Link { text, url } => write!(f, "[{text}]({url})"),
579 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
580 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
581 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
582 Element::InlineImage { alt, url } => write!(f, ""),
583 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
584 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
585 Element::LinkedImage {
586 alt,
587 img_source,
588 link_target,
589 } => {
590 let img_part = match img_source {
592 LinkedImageSource::Inline(url) => format!(""),
593 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
594 };
595 match link_target {
597 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
598 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
599 }
600 }
601 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
602 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
603 Element::WikiLink(s) => write!(f, "[[{s}]]"),
604 Element::InlineMath(s) => write!(f, "${s}$"),
605 Element::DisplayMath(s) => write!(f, "$${s}$$"),
606 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
607 Element::Autolink(s) => write!(f, "{s}"),
608 Element::HtmlTag(s) => write!(f, "{s}"),
609 Element::HtmlEntity(s) => write!(f, "{s}"),
610 Element::HugoShortcode(s) => write!(f, "{s}"),
611 Element::AttrList(s) => write!(f, "{s}"),
612 Element::Code(s) => write!(f, "`{s}`"),
613 Element::Bold { content, underscore } => {
614 if *underscore {
615 write!(f, "__{content}__")
616 } else {
617 write!(f, "**{content}**")
618 }
619 }
620 Element::Italic { content, underscore } => {
621 if *underscore {
622 write!(f, "_{content}_")
623 } else {
624 write!(f, "*{content}*")
625 }
626 }
627 }
628 }
629}
630
631impl Element {
632 fn display_width(&self, mode: ReflowLengthMode) -> usize {
636 let formatted = format!("{self}");
637 display_len(&formatted, mode)
638 }
639}
640
641#[derive(Debug, Clone)]
643struct EmphasisSpan {
644 start: usize,
646 end: usize,
648 content: String,
650 is_strong: bool,
652 is_strikethrough: bool,
654 uses_underscore: bool,
656}
657
658fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
668 let mut spans = Vec::new();
669 let mut options = Options::empty();
670 options.insert(Options::ENABLE_STRIKETHROUGH);
671
672 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
675 let mut strikethrough_stack: Vec<usize> = Vec::new();
676
677 let parser = Parser::new_ext(text, options).into_offset_iter();
678
679 for (event, range) in parser {
680 match event {
681 Event::Start(Tag::Emphasis) => {
682 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
684 emphasis_stack.push((range.start, uses_underscore));
685 }
686 Event::End(TagEnd::Emphasis) => {
687 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
688 let content_start = start_byte + 1;
690 let content_end = range.end - 1;
691 if content_end > content_start
692 && let Some(content) = text.get(content_start..content_end)
693 {
694 spans.push(EmphasisSpan {
695 start: start_byte,
696 end: range.end,
697 content: content.to_string(),
698 is_strong: false,
699 is_strikethrough: false,
700 uses_underscore,
701 });
702 }
703 }
704 }
705 Event::Start(Tag::Strong) => {
706 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
708 strong_stack.push((range.start, uses_underscore));
709 }
710 Event::End(TagEnd::Strong) => {
711 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
712 let content_start = start_byte + 2;
714 let content_end = range.end - 2;
715 if content_end > content_start
716 && let Some(content) = text.get(content_start..content_end)
717 {
718 spans.push(EmphasisSpan {
719 start: start_byte,
720 end: range.end,
721 content: content.to_string(),
722 is_strong: true,
723 is_strikethrough: false,
724 uses_underscore,
725 });
726 }
727 }
728 }
729 Event::Start(Tag::Strikethrough) => {
730 strikethrough_stack.push(range.start);
731 }
732 Event::End(TagEnd::Strikethrough) => {
733 if let Some(start_byte) = strikethrough_stack.pop() {
734 let content_start = start_byte + 2;
736 let content_end = range.end - 2;
737 if content_end > content_start
738 && let Some(content) = text.get(content_start..content_end)
739 {
740 spans.push(EmphasisSpan {
741 start: start_byte,
742 end: range.end,
743 content: content.to_string(),
744 is_strong: false,
745 is_strikethrough: true,
746 uses_underscore: false,
747 });
748 }
749 }
750 }
751 _ => {}
752 }
753 }
754
755 spans.sort_by_key(|s| s.start);
757 spans
758}
759
760fn parse_markdown_elements(text: &str) -> Vec<Element> {
771 parse_markdown_elements_inner(text, false)
772}
773
774fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
775 parse_markdown_elements_inner(text, true)
776}
777
778fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
779 let mut elements = Vec::new();
780 let mut remaining = text;
781
782 let emphasis_spans = extract_emphasis_spans(text);
784
785 while !remaining.is_empty() {
786 let current_offset = text.len() - remaining.len();
788 let mut earliest_match: Option<(usize, usize, &str)> = None;
791
792 if remaining.contains("[!") {
796 if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
798 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
799 {
800 earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
801 }
802
803 if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
805 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
806 {
807 earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
808 }
809
810 if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
812 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
813 {
814 earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
815 }
816
817 if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
819 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
820 {
821 earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
822 }
823 }
824
825 if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
828 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
829 {
830 earliest_match = Some((m.start(), m.end(), "inline_image"));
831 }
832
833 if let Some(m) = REF_IMAGE_REGEX.find(remaining)
835 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
836 {
837 earliest_match = Some((m.start(), m.end(), "ref_image"));
838 }
839
840 if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
842 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
843 {
844 earliest_match = Some((m.start(), m.end(), "footnote_ref"));
845 }
846
847 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
849 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
850 {
851 earliest_match = Some((m.start(), m.end(), "inline_link"));
852 }
853
854 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
856 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
857 {
858 earliest_match = Some((m.start(), m.end(), "ref_link"));
859 }
860
861 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
864 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
865 {
866 earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
867 }
868
869 if let Some(m) = WIKI_LINK_REGEX.find(remaining)
871 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
872 {
873 earliest_match = Some((m.start(), m.end(), "wiki_link"));
874 }
875
876 if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
878 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
879 {
880 earliest_match = Some((m.start(), m.end(), "display_math"));
881 }
882
883 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
885 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
886 {
887 earliest_match = Some((m.start(), m.end(), "inline_math"));
888 }
889
890 if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
894 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
895 {
896 earliest_match = Some((m.start(), m.end(), "emoji"));
897 }
898
899 if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
901 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
902 {
903 earliest_match = Some((m.start(), m.end(), "html_entity"));
904 }
905
906 if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
909 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
910 {
911 earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
912 }
913
914 if let Some(m) = HTML_TAG_PATTERN.find(remaining)
917 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
918 {
919 let matched_text = &remaining[m.start()..m.end()];
921 let is_url_autolink = matched_text.starts_with("<http://")
922 || matched_text.starts_with("<https://")
923 || matched_text.starts_with("<mailto:")
924 || matched_text.starts_with("<ftp://")
925 || matched_text.starts_with("<ftps://");
926
927 let is_email_autolink = {
930 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
931 EMAIL_PATTERN.is_match(content)
932 };
933
934 if is_url_autolink || is_email_autolink {
935 earliest_match = Some((m.start(), m.end(), "autolink"));
936 } else {
937 earliest_match = Some((m.start(), m.end(), "html_tag"));
938 }
939 }
940
941 let mut next_special = remaining.len();
943 let mut special_type = "";
944 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
945 let mut attr_list_len: usize = 0;
946
947 if let Some(pos) = remaining.find('`')
949 && pos < next_special
950 {
951 next_special = pos;
952 special_type = "code";
953 }
954
955 if attr_lists
957 && let Some(pos) = remaining.find('{')
958 && pos < next_special
959 && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
960 && m.start() == 0
961 {
962 next_special = pos;
963 special_type = "attr_list";
964 attr_list_len = m.end();
965 }
966
967 for span in &emphasis_spans {
970 if span.start >= current_offset && span.start < current_offset + remaining.len() {
971 let pos_in_remaining = span.start - current_offset;
972 if pos_in_remaining < next_special {
973 next_special = pos_in_remaining;
974 special_type = "pulldown_emphasis";
975 pulldown_emphasis = Some(span);
976 }
977 break; }
979 }
980
981 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
983 pos < next_special
984 } else {
985 false
986 };
987
988 if should_process_markdown_link {
989 let (pos, match_end, pattern_type) = earliest_match.unwrap();
990
991 if pos > 0 {
993 elements.push(Element::Text(remaining[..pos].to_string()));
994 }
995
996 match pattern_type {
998 "linked_image_ii" => {
1000 if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
1001 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1002 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1003 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1004 elements.push(Element::LinkedImage {
1005 alt: alt.to_string(),
1006 img_source: LinkedImageSource::Inline(img_url.to_string()),
1007 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1008 });
1009 remaining = &remaining[match_end..];
1010 } else {
1011 elements.push(Element::Text("[".to_string()));
1012 remaining = &remaining[1..];
1013 }
1014 }
1015 "linked_image_ri" => {
1017 if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
1018 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1019 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1020 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1021 elements.push(Element::LinkedImage {
1022 alt: alt.to_string(),
1023 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1024 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1025 });
1026 remaining = &remaining[match_end..];
1027 } else {
1028 elements.push(Element::Text("[".to_string()));
1029 remaining = &remaining[1..];
1030 }
1031 }
1032 "linked_image_ir" => {
1034 if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1035 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1036 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1037 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1038 elements.push(Element::LinkedImage {
1039 alt: alt.to_string(),
1040 img_source: LinkedImageSource::Inline(img_url.to_string()),
1041 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1042 });
1043 remaining = &remaining[match_end..];
1044 } else {
1045 elements.push(Element::Text("[".to_string()));
1046 remaining = &remaining[1..];
1047 }
1048 }
1049 "linked_image_rr" => {
1051 if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
1052 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1053 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1054 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1055 elements.push(Element::LinkedImage {
1056 alt: alt.to_string(),
1057 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1058 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1059 });
1060 remaining = &remaining[match_end..];
1061 } else {
1062 elements.push(Element::Text("[".to_string()));
1063 remaining = &remaining[1..];
1064 }
1065 }
1066 "inline_image" => {
1067 if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
1068 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1069 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1070 elements.push(Element::InlineImage {
1071 alt: alt.to_string(),
1072 url: url.to_string(),
1073 });
1074 remaining = &remaining[match_end..];
1075 } else {
1076 elements.push(Element::Text("!".to_string()));
1077 remaining = &remaining[1..];
1078 }
1079 }
1080 "ref_image" => {
1081 if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
1082 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1083 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1084
1085 if reference.is_empty() {
1086 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1087 } else {
1088 elements.push(Element::ReferenceImage {
1089 alt: alt.to_string(),
1090 reference: reference.to_string(),
1091 });
1092 }
1093 remaining = &remaining[match_end..];
1094 } else {
1095 elements.push(Element::Text("!".to_string()));
1096 remaining = &remaining[1..];
1097 }
1098 }
1099 "footnote_ref" => {
1100 if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
1101 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1102 elements.push(Element::FootnoteReference { note: note.to_string() });
1103 remaining = &remaining[match_end..];
1104 } else {
1105 elements.push(Element::Text("[".to_string()));
1106 remaining = &remaining[1..];
1107 }
1108 }
1109 "inline_link" => {
1110 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1111 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1112 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1113 elements.push(Element::Link {
1114 text: text.to_string(),
1115 url: url.to_string(),
1116 });
1117 remaining = &remaining[match_end..];
1118 } else {
1119 elements.push(Element::Text("[".to_string()));
1121 remaining = &remaining[1..];
1122 }
1123 }
1124 "ref_link" => {
1125 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1126 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1127 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1128
1129 if reference.is_empty() {
1130 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1132 } else {
1133 elements.push(Element::ReferenceLink {
1135 text: text.to_string(),
1136 reference: reference.to_string(),
1137 });
1138 }
1139 remaining = &remaining[match_end..];
1140 } else {
1141 elements.push(Element::Text("[".to_string()));
1143 remaining = &remaining[1..];
1144 }
1145 }
1146 "shortcut_ref" => {
1147 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1148 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1149 elements.push(Element::ShortcutReference {
1150 reference: reference.to_string(),
1151 });
1152 remaining = &remaining[match_end..];
1153 } else {
1154 elements.push(Element::Text("[".to_string()));
1156 remaining = &remaining[1..];
1157 }
1158 }
1159 "wiki_link" => {
1160 if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
1161 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1162 elements.push(Element::WikiLink(content.to_string()));
1163 remaining = &remaining[match_end..];
1164 } else {
1165 elements.push(Element::Text("[[".to_string()));
1166 remaining = &remaining[2..];
1167 }
1168 }
1169 "display_math" => {
1170 if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
1171 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1172 elements.push(Element::DisplayMath(math.to_string()));
1173 remaining = &remaining[match_end..];
1174 } else {
1175 elements.push(Element::Text("$$".to_string()));
1176 remaining = &remaining[2..];
1177 }
1178 }
1179 "inline_math" => {
1180 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1181 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1182 elements.push(Element::InlineMath(math.to_string()));
1183 remaining = &remaining[match_end..];
1184 } else {
1185 elements.push(Element::Text("$".to_string()));
1186 remaining = &remaining[1..];
1187 }
1188 }
1189 "emoji" => {
1191 if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1192 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1193 elements.push(Element::EmojiShortcode(emoji.to_string()));
1194 remaining = &remaining[match_end..];
1195 } else {
1196 elements.push(Element::Text(":".to_string()));
1197 remaining = &remaining[1..];
1198 }
1199 }
1200 "html_entity" => {
1201 elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
1203 remaining = &remaining[match_end..];
1204 }
1205 "hugo_shortcode" => {
1206 elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
1208 remaining = &remaining[match_end..];
1209 }
1210 "autolink" => {
1211 elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
1213 remaining = &remaining[match_end..];
1214 }
1215 "html_tag" => {
1216 elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
1218 remaining = &remaining[match_end..];
1219 }
1220 _ => {
1221 elements.push(Element::Text("[".to_string()));
1223 remaining = &remaining[1..];
1224 }
1225 }
1226 } else {
1227 if next_special > 0 && next_special < remaining.len() {
1231 elements.push(Element::Text(remaining[..next_special].to_string()));
1232 remaining = &remaining[next_special..];
1233 }
1234
1235 match special_type {
1237 "code" => {
1238 if let Some(code_end) = remaining[1..].find('`') {
1240 let code = &remaining[1..1 + code_end];
1241 elements.push(Element::Code(code.to_string()));
1242 remaining = &remaining[1 + code_end + 1..];
1243 } else {
1244 elements.push(Element::Text(remaining.to_string()));
1246 break;
1247 }
1248 }
1249 "attr_list" => {
1250 elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1251 remaining = &remaining[attr_list_len..];
1252 }
1253 "pulldown_emphasis" => {
1254 if let Some(span) = pulldown_emphasis {
1256 let span_len = span.end - span.start;
1257 if span.is_strikethrough {
1258 elements.push(Element::Strikethrough(span.content.clone()));
1259 } else if span.is_strong {
1260 elements.push(Element::Bold {
1261 content: span.content.clone(),
1262 underscore: span.uses_underscore,
1263 });
1264 } else {
1265 elements.push(Element::Italic {
1266 content: span.content.clone(),
1267 underscore: span.uses_underscore,
1268 });
1269 }
1270 remaining = &remaining[span_len..];
1271 } else {
1272 elements.push(Element::Text(remaining[..1].to_string()));
1274 remaining = &remaining[1..];
1275 }
1276 }
1277 _ => {
1278 elements.push(Element::Text(remaining.to_string()));
1280 break;
1281 }
1282 }
1283 }
1284 }
1285
1286 elements
1287}
1288
1289fn reflow_elements_sentence_per_line(
1291 elements: &[Element],
1292 custom_abbreviations: &Option<Vec<String>>,
1293 require_sentence_capital: bool,
1294) -> Vec<String> {
1295 let abbreviations = get_abbreviations(custom_abbreviations);
1296 let mut lines = Vec::new();
1297 let mut current_line = String::new();
1298
1299 for (idx, element) in elements.iter().enumerate() {
1300 let element_str = format!("{element}");
1301
1302 if let Element::Text(text) = element {
1304 let combined = format!("{current_line}{text}");
1306 let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
1308
1309 if sentences.len() > 1 {
1310 for (i, sentence) in sentences.iter().enumerate() {
1312 if i == 0 {
1313 let trimmed = sentence.trim();
1316
1317 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1318 current_line = sentence.to_string();
1320 } else {
1321 lines.push(sentence.to_string());
1323 current_line.clear();
1324 }
1325 } else if i == sentences.len() - 1 {
1326 let trimmed = sentence.trim();
1328 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1329
1330 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1331 lines.push(sentence.to_string());
1333 current_line.clear();
1334 } else {
1335 current_line = sentence.to_string();
1337 }
1338 } else {
1339 lines.push(sentence.to_string());
1341 }
1342 }
1343 } else {
1344 let trimmed = combined.trim();
1346
1347 if trimmed.is_empty() {
1351 continue;
1352 }
1353
1354 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1355
1356 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1357 lines.push(trimmed.to_string());
1359 current_line.clear();
1360 } else {
1361 current_line = combined;
1363 }
1364 }
1365 } else if let Element::Italic { content, underscore } = element {
1366 let marker = if *underscore { "_" } else { "*" };
1368 handle_emphasis_sentence_split(
1369 content,
1370 marker,
1371 &abbreviations,
1372 require_sentence_capital,
1373 &mut current_line,
1374 &mut lines,
1375 );
1376 } else if let Element::Bold { content, underscore } = element {
1377 let marker = if *underscore { "__" } else { "**" };
1379 handle_emphasis_sentence_split(
1380 content,
1381 marker,
1382 &abbreviations,
1383 require_sentence_capital,
1384 &mut current_line,
1385 &mut lines,
1386 );
1387 } else if let Element::Strikethrough(content) = element {
1388 handle_emphasis_sentence_split(
1390 content,
1391 "~~",
1392 &abbreviations,
1393 require_sentence_capital,
1394 &mut current_line,
1395 &mut lines,
1396 );
1397 } else {
1398 let is_adjacent = if idx > 0 {
1401 match &elements[idx - 1] {
1402 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1403 _ => true,
1404 }
1405 } else {
1406 false
1407 };
1408
1409 if !is_adjacent
1411 && !current_line.is_empty()
1412 && !current_line.ends_with(' ')
1413 && !current_line.ends_with('(')
1414 && !current_line.ends_with('[')
1415 {
1416 current_line.push(' ');
1417 }
1418 current_line.push_str(&element_str);
1419 }
1420 }
1421
1422 if !current_line.is_empty() {
1424 lines.push(current_line.trim().to_string());
1425 }
1426 lines
1427}
1428
1429fn handle_emphasis_sentence_split(
1431 content: &str,
1432 marker: &str,
1433 abbreviations: &HashSet<String>,
1434 require_sentence_capital: bool,
1435 current_line: &mut String,
1436 lines: &mut Vec<String>,
1437) {
1438 let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
1440
1441 if sentences.len() <= 1 {
1442 if !current_line.is_empty()
1444 && !current_line.ends_with(' ')
1445 && !current_line.ends_with('(')
1446 && !current_line.ends_with('[')
1447 {
1448 current_line.push(' ');
1449 }
1450 current_line.push_str(marker);
1451 current_line.push_str(content);
1452 current_line.push_str(marker);
1453
1454 let trimmed = content.trim();
1456 let ends_with_punct = ends_with_sentence_punct(trimmed);
1457 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1458 lines.push(current_line.clone());
1459 current_line.clear();
1460 }
1461 } else {
1462 for (i, sentence) in sentences.iter().enumerate() {
1464 let trimmed = sentence.trim();
1465 if trimmed.is_empty() {
1466 continue;
1467 }
1468
1469 if i == 0 {
1470 if !current_line.is_empty()
1472 && !current_line.ends_with(' ')
1473 && !current_line.ends_with('(')
1474 && !current_line.ends_with('[')
1475 {
1476 current_line.push(' ');
1477 }
1478 current_line.push_str(marker);
1479 current_line.push_str(trimmed);
1480 current_line.push_str(marker);
1481
1482 let ends_with_punct = ends_with_sentence_punct(trimmed);
1484 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1485 lines.push(current_line.clone());
1486 current_line.clear();
1487 }
1488 } else if i == sentences.len() - 1 {
1489 let ends_with_punct = ends_with_sentence_punct(trimmed);
1491
1492 let mut line = String::new();
1493 line.push_str(marker);
1494 line.push_str(trimmed);
1495 line.push_str(marker);
1496
1497 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1498 lines.push(line);
1499 } else {
1500 *current_line = line;
1502 }
1503 } else {
1504 let mut line = String::new();
1506 line.push_str(marker);
1507 line.push_str(trimmed);
1508 line.push_str(marker);
1509 lines.push(line);
1510 }
1511 }
1512 }
1513}
1514
1515const BREAK_WORDS: &[&str] = &[
1519 "and",
1520 "or",
1521 "but",
1522 "nor",
1523 "yet",
1524 "so",
1525 "for",
1526 "which",
1527 "that",
1528 "because",
1529 "when",
1530 "if",
1531 "while",
1532 "where",
1533 "although",
1534 "though",
1535 "unless",
1536 "since",
1537 "after",
1538 "before",
1539 "until",
1540 "as",
1541 "once",
1542 "whether",
1543 "however",
1544 "therefore",
1545 "moreover",
1546 "furthermore",
1547 "nevertheless",
1548 "whereas",
1549];
1550
1551fn is_clause_punctuation(c: char) -> bool {
1553 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1555
1556fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1560 let mut spans = Vec::new();
1561 let mut offset = 0;
1562 for element in elements {
1563 let rendered = format!("{element}");
1564 let len = rendered.len();
1565 if !matches!(element, Element::Text(_)) {
1566 spans.push((offset, offset + len));
1567 }
1568 offset += len;
1569 }
1570 spans
1571}
1572
1573fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1575 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1576}
1577
1578const MIN_SPLIT_RATIO: f64 = 0.3;
1581
1582fn split_at_clause_punctuation(
1586 text: &str,
1587 line_length: usize,
1588 element_spans: &[(usize, usize)],
1589 length_mode: ReflowLengthMode,
1590) -> Option<(String, String)> {
1591 let chars: Vec<char> = text.chars().collect();
1592 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1593
1594 let mut width_acc = 0;
1596 let mut search_end_char = 0;
1597 for (idx, &c) in chars.iter().enumerate() {
1598 let c_width = display_len(&c.to_string(), length_mode);
1599 if width_acc + c_width > line_length {
1600 break;
1601 }
1602 width_acc += c_width;
1603 search_end_char = idx + 1;
1604 }
1605
1606 let mut best_pos = None;
1607 for i in (0..search_end_char).rev() {
1608 if is_clause_punctuation(chars[i]) {
1609 let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1611 if !is_inside_element(byte_pos, element_spans) {
1612 best_pos = Some(i);
1613 break;
1614 }
1615 }
1616 }
1617
1618 let pos = best_pos?;
1619
1620 let first: String = chars[..=pos].iter().collect();
1622 let first_display_len = display_len(&first, length_mode);
1623 if first_display_len < min_first_len {
1624 return None;
1625 }
1626
1627 let rest: String = chars[pos + 1..].iter().collect();
1629 let rest = rest.trim_start().to_string();
1630
1631 if rest.is_empty() {
1632 return None;
1633 }
1634
1635 Some((first, rest))
1636}
1637
1638fn split_at_break_word(
1642 text: &str,
1643 line_length: usize,
1644 element_spans: &[(usize, usize)],
1645 length_mode: ReflowLengthMode,
1646) -> Option<(String, String)> {
1647 let lower = text.to_lowercase();
1648 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1649 let mut best_split: Option<(usize, usize)> = None; for &word in BREAK_WORDS {
1652 let mut search_start = 0;
1653 while let Some(pos) = lower[search_start..].find(word) {
1654 let abs_pos = search_start + pos;
1655
1656 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1658 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1659
1660 if preceded_by_space && followed_by_space {
1661 let first_part = text[..abs_pos].trim_end();
1663 let first_part_len = display_len(first_part, length_mode);
1664
1665 if first_part_len >= min_first_len
1666 && first_part_len <= line_length
1667 && !is_inside_element(abs_pos, element_spans)
1668 {
1669 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1671 best_split = Some((abs_pos, word.len()));
1672 }
1673 }
1674 }
1675
1676 search_start = abs_pos + word.len();
1677 }
1678 }
1679
1680 let (byte_start, _word_len) = best_split?;
1681
1682 let first = text[..byte_start].trim_end().to_string();
1683 let rest = text[byte_start..].to_string();
1684
1685 if first.is_empty() || rest.trim().is_empty() {
1686 return None;
1687 }
1688
1689 Some((first, rest))
1690}
1691
1692fn cascade_split_line(
1695 text: &str,
1696 line_length: usize,
1697 abbreviations: &Option<Vec<String>>,
1698 length_mode: ReflowLengthMode,
1699 attr_lists: bool,
1700) -> Vec<String> {
1701 if line_length == 0 || display_len(text, length_mode) <= line_length {
1702 return vec![text.to_string()];
1703 }
1704
1705 let elements = parse_markdown_elements_inner(text, attr_lists);
1706 let element_spans = compute_element_spans(&elements);
1707
1708 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
1710 let mut result = vec![first];
1711 result.extend(cascade_split_line(
1712 &rest,
1713 line_length,
1714 abbreviations,
1715 length_mode,
1716 attr_lists,
1717 ));
1718 return result;
1719 }
1720
1721 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
1723 let mut result = vec![first];
1724 result.extend(cascade_split_line(
1725 &rest,
1726 line_length,
1727 abbreviations,
1728 length_mode,
1729 attr_lists,
1730 ));
1731 return result;
1732 }
1733
1734 let options = ReflowOptions {
1736 line_length,
1737 break_on_sentences: false,
1738 preserve_breaks: false,
1739 sentence_per_line: false,
1740 semantic_line_breaks: false,
1741 abbreviations: abbreviations.clone(),
1742 length_mode,
1743 attr_lists,
1744 require_sentence_capital: true,
1745 max_list_continuation_indent: None,
1746 };
1747 reflow_elements(&elements, &options)
1748}
1749
1750fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1754 let sentence_lines =
1756 reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
1757
1758 if options.line_length == 0 {
1761 return sentence_lines;
1762 }
1763
1764 let length_mode = options.length_mode;
1765 let mut result = Vec::new();
1766 for line in sentence_lines {
1767 if display_len(&line, length_mode) <= options.line_length {
1768 result.push(line);
1769 } else {
1770 result.extend(cascade_split_line(
1771 &line,
1772 options.line_length,
1773 &options.abbreviations,
1774 length_mode,
1775 options.attr_lists,
1776 ));
1777 }
1778 }
1779
1780 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1783 let mut merged: Vec<String> = Vec::with_capacity(result.len());
1784 for line in result {
1785 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
1786 let prev_ends_at_sentence = {
1788 let trimmed = merged.last().unwrap().trim_end();
1789 trimmed
1790 .chars()
1791 .rev()
1792 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1793 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1794 };
1795
1796 if !prev_ends_at_sentence {
1797 let prev = merged.last_mut().unwrap();
1798 let combined = format!("{prev} {line}");
1799 if display_len(&combined, length_mode) <= options.line_length {
1801 *prev = combined;
1802 continue;
1803 }
1804 }
1805 }
1806 merged.push(line);
1807 }
1808 merged
1809}
1810
1811fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
1819 line.char_indices()
1820 .rev()
1821 .map(|(pos, _)| pos)
1822 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
1823}
1824
1825fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1827 let mut lines = Vec::new();
1828 let mut current_line = String::new();
1829 let mut current_length = 0;
1830 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
1832 let length_mode = options.length_mode;
1833
1834 for (idx, element) in elements.iter().enumerate() {
1835 let element_str = format!("{element}");
1836 let element_len = element.display_width(length_mode);
1837
1838 let is_adjacent_to_prev = if idx > 0 {
1844 match (&elements[idx - 1], element) {
1845 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1846 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1847 _ => true,
1848 }
1849 } else {
1850 false
1851 };
1852
1853 if let Element::Text(text) = element {
1855 let has_leading_space = text.starts_with(char::is_whitespace);
1857 let words: Vec<&str> = text.split_whitespace().collect();
1859
1860 for (i, word) in words.iter().enumerate() {
1861 let word_len = display_len(word, length_mode);
1862 let is_trailing_punct = word
1864 .chars()
1865 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1866
1867 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1870
1871 if is_first_adjacent {
1872 if current_length + word_len > options.line_length && current_length > 0 {
1874 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1877 let before = current_line[..last_space].trim_end().to_string();
1878 let after = current_line[last_space + 1..].to_string();
1879 lines.push(before);
1880 current_line = format!("{after}{word}");
1881 current_length = display_len(¤t_line, length_mode);
1882 current_line_element_spans.clear();
1883 } else {
1884 current_line.push_str(word);
1885 current_length += word_len;
1886 }
1887 } else {
1888 current_line.push_str(word);
1889 current_length += word_len;
1890 }
1891 } else if current_length > 0
1892 && current_length + 1 + word_len > options.line_length
1893 && !is_trailing_punct
1894 {
1895 lines.push(current_line.trim().to_string());
1897 current_line = word.to_string();
1898 current_length = word_len;
1899 current_line_element_spans.clear();
1900 } else {
1901 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1905 current_line.push(' ');
1906 current_length += 1;
1907 }
1908 current_line.push_str(word);
1909 current_length += word_len;
1910 }
1911 }
1912 } else if matches!(
1913 element,
1914 Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
1915 ) && element_len > options.line_length
1916 {
1917 let (content, marker): (&str, &str) = match element {
1921 Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
1922 Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
1923 Element::Strikethrough(content) => (content.as_str(), "~~"),
1924 _ => unreachable!(),
1925 };
1926
1927 let words: Vec<&str> = content.split_whitespace().collect();
1928 let n = words.len();
1929
1930 if n == 0 {
1931 let full = format!("{marker}{marker}");
1933 let full_len = display_len(&full, length_mode);
1934 if !is_adjacent_to_prev && current_length > 0 {
1935 current_line.push(' ');
1936 current_length += 1;
1937 }
1938 current_line.push_str(&full);
1939 current_length += full_len;
1940 } else {
1941 for (i, word) in words.iter().enumerate() {
1942 let is_first = i == 0;
1943 let is_last = i == n - 1;
1944 let word_str: String = match (is_first, is_last) {
1945 (true, true) => format!("{marker}{word}{marker}"),
1946 (true, false) => format!("{marker}{word}"),
1947 (false, true) => format!("{word}{marker}"),
1948 (false, false) => word.to_string(),
1949 };
1950 let word_len = display_len(&word_str, length_mode);
1951
1952 let needs_space = if is_first {
1953 !is_adjacent_to_prev && current_length > 0
1954 } else {
1955 current_length > 0
1956 };
1957
1958 if needs_space && current_length + 1 + word_len > options.line_length {
1959 lines.push(current_line.trim_end().to_string());
1960 current_line = word_str;
1961 current_length = word_len;
1962 current_line_element_spans.clear();
1963 } else {
1964 if needs_space {
1965 current_line.push(' ');
1966 current_length += 1;
1967 }
1968 current_line.push_str(&word_str);
1969 current_length += word_len;
1970 }
1971 }
1972 }
1973 } else {
1974 if is_adjacent_to_prev {
1978 if current_length + element_len > options.line_length {
1980 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1983 let before = current_line[..last_space].trim_end().to_string();
1984 let after = current_line[last_space + 1..].to_string();
1985 lines.push(before);
1986 current_line = format!("{after}{element_str}");
1987 current_length = display_len(¤t_line, length_mode);
1988 current_line_element_spans.clear();
1989 let start = after.len();
1991 current_line_element_spans.push((start, start + element_str.len()));
1992 } else {
1993 let start = current_line.len();
1995 current_line.push_str(&element_str);
1996 current_length += element_len;
1997 current_line_element_spans.push((start, current_line.len()));
1998 }
1999 } else {
2000 let start = current_line.len();
2001 current_line.push_str(&element_str);
2002 current_length += element_len;
2003 current_line_element_spans.push((start, current_line.len()));
2004 }
2005 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
2006 lines.push(current_line.trim().to_string());
2008 current_line = element_str.clone();
2009 current_length = element_len;
2010 current_line_element_spans.clear();
2011 current_line_element_spans.push((0, element_str.len()));
2012 } else {
2013 let ends_with_opener =
2015 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
2016 if current_length > 0 && !ends_with_opener {
2017 current_line.push(' ');
2018 current_length += 1;
2019 }
2020 let start = current_line.len();
2021 current_line.push_str(&element_str);
2022 current_length += element_len;
2023 current_line_element_spans.push((start, current_line.len()));
2024 }
2025 }
2026 }
2027
2028 if !current_line.is_empty() {
2030 lines.push(current_line.trim_end().to_string());
2031 }
2032
2033 lines
2034}
2035
2036pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
2038 let lines: Vec<&str> = content.lines().collect();
2039 let mut result = Vec::new();
2040 let mut i = 0;
2041
2042 while i < lines.len() {
2043 let line = lines[i];
2044 let trimmed = line.trim();
2045
2046 if trimmed.is_empty() {
2048 result.push(String::new());
2049 i += 1;
2050 continue;
2051 }
2052
2053 if trimmed.starts_with('#') {
2055 result.push(line.to_string());
2056 i += 1;
2057 continue;
2058 }
2059
2060 if trimmed.starts_with(":::") {
2062 result.push(line.to_string());
2063 i += 1;
2064 continue;
2065 }
2066
2067 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2069 result.push(line.to_string());
2070 i += 1;
2071 while i < lines.len() {
2073 result.push(lines[i].to_string());
2074 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2075 i += 1;
2076 break;
2077 }
2078 i += 1;
2079 }
2080 continue;
2081 }
2082
2083 if calculate_indentation_width_default(line) >= 4 {
2085 result.push(line.to_string());
2087 i += 1;
2088 while i < lines.len() {
2089 let next_line = lines[i];
2090 if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2092 result.push(next_line.to_string());
2093 i += 1;
2094 } else {
2095 break;
2096 }
2097 }
2098 continue;
2099 }
2100
2101 if trimmed.starts_with('>') {
2103 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2106 let quote_prefix = line[0..gt_pos + 1].to_string();
2107 let quote_content = &line[quote_prefix.len()..].trim_start();
2108
2109 let reflowed = reflow_line(quote_content, options);
2110 for reflowed_line in reflowed.iter() {
2111 result.push(format!("{quote_prefix} {reflowed_line}"));
2112 }
2113 i += 1;
2114 continue;
2115 }
2116
2117 if is_horizontal_rule(trimmed) {
2119 result.push(line.to_string());
2120 i += 1;
2121 continue;
2122 }
2123
2124 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2126 let indent = line.len() - line.trim_start().len();
2128 let indent_str = " ".repeat(indent);
2129
2130 let mut marker_end = indent;
2133 let mut content_start = indent;
2134
2135 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
2136 if let Some(period_pos) = line[indent..].find('.') {
2138 marker_end = indent + period_pos + 1; content_start = marker_end;
2140 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2144 content_start += 1;
2145 }
2146 }
2147 } else {
2148 marker_end = indent + 1; content_start = marker_end;
2151 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2155 content_start += 1;
2156 }
2157 }
2158
2159 let min_continuation_indent = content_start;
2161
2162 let rest = &line[content_start..];
2165 if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
2166 marker_end = content_start + 3; content_start += 4; }
2169
2170 let marker = &line[indent..marker_end];
2171
2172 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2175 i += 1;
2176
2177 while i < lines.len() {
2181 let next_line = lines[i];
2182 let next_trimmed = next_line.trim();
2183
2184 if is_block_boundary(next_trimmed) {
2186 break;
2187 }
2188
2189 let next_indent = next_line.len() - next_line.trim_start().len();
2191 if next_indent >= min_continuation_indent {
2192 let trimmed_start = next_line.trim_start();
2195 list_content.push(trim_preserving_hard_break(trimmed_start));
2196 i += 1;
2197 } else {
2198 break;
2200 }
2201 }
2202
2203 let combined_content = if options.preserve_breaks {
2206 list_content[0].clone()
2207 } else {
2208 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2210 if has_hard_breaks {
2211 list_content.join("\n")
2213 } else {
2214 list_content.join(" ")
2216 }
2217 };
2218
2219 let trimmed_marker = marker;
2221 let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
2222 indent + (content_start - indent).min(max_indent)
2225 } else {
2226 content_start
2227 };
2228
2229 let prefix_length = indent + trimmed_marker.len() + 1;
2231
2232 let adjusted_options = ReflowOptions {
2234 line_length: options.line_length.saturating_sub(prefix_length),
2235 ..options.clone()
2236 };
2237
2238 let reflowed = reflow_line(&combined_content, &adjusted_options);
2239 for (j, reflowed_line) in reflowed.iter().enumerate() {
2240 if j == 0 {
2241 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2242 } else {
2243 let continuation_indent = " ".repeat(continuation_spaces);
2245 result.push(format!("{continuation_indent}{reflowed_line}"));
2246 }
2247 }
2248 continue;
2249 }
2250
2251 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2253 result.push(line.to_string());
2254 i += 1;
2255 continue;
2256 }
2257
2258 if trimmed.starts_with('[') && line.contains("]:") {
2260 result.push(line.to_string());
2261 i += 1;
2262 continue;
2263 }
2264
2265 if is_definition_list_item(trimmed) {
2267 result.push(line.to_string());
2268 i += 1;
2269 continue;
2270 }
2271
2272 let mut is_single_line_paragraph = true;
2274 if i + 1 < lines.len() {
2275 let next_trimmed = lines[i + 1].trim();
2276 if !is_block_boundary(next_trimmed) {
2278 is_single_line_paragraph = false;
2279 }
2280 }
2281
2282 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2284 result.push(line.to_string());
2285 i += 1;
2286 continue;
2287 }
2288
2289 let mut paragraph_parts = Vec::new();
2291 let mut current_part = vec![line];
2292 i += 1;
2293
2294 if options.preserve_breaks {
2296 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2298 Some("\\")
2299 } else if line.ends_with(" ") {
2300 Some(" ")
2301 } else {
2302 None
2303 };
2304 let reflowed = reflow_line(line, options);
2305
2306 if let Some(break_marker) = hard_break_type {
2308 if !reflowed.is_empty() {
2309 let mut reflowed_with_break = reflowed;
2310 let last_idx = reflowed_with_break.len() - 1;
2311 if !has_hard_break(&reflowed_with_break[last_idx]) {
2312 reflowed_with_break[last_idx].push_str(break_marker);
2313 }
2314 result.extend(reflowed_with_break);
2315 }
2316 } else {
2317 result.extend(reflowed);
2318 }
2319 } else {
2320 while i < lines.len() {
2322 let prev_line = if !current_part.is_empty() {
2323 current_part.last().unwrap()
2324 } else {
2325 ""
2326 };
2327 let next_line = lines[i];
2328 let next_trimmed = next_line.trim();
2329
2330 if is_block_boundary(next_trimmed) {
2332 break;
2333 }
2334
2335 let prev_trimmed = prev_line.trim();
2338 let abbreviations = get_abbreviations(&options.abbreviations);
2339 let ends_with_sentence = (prev_trimmed.ends_with('.')
2340 || prev_trimmed.ends_with('!')
2341 || prev_trimmed.ends_with('?')
2342 || prev_trimmed.ends_with(".*")
2343 || prev_trimmed.ends_with("!*")
2344 || prev_trimmed.ends_with("?*")
2345 || prev_trimmed.ends_with("._")
2346 || prev_trimmed.ends_with("!_")
2347 || prev_trimmed.ends_with("?_")
2348 || prev_trimmed.ends_with(".\"")
2350 || prev_trimmed.ends_with("!\"")
2351 || prev_trimmed.ends_with("?\"")
2352 || prev_trimmed.ends_with(".'")
2353 || prev_trimmed.ends_with("!'")
2354 || prev_trimmed.ends_with("?'")
2355 || prev_trimmed.ends_with(".\u{201D}")
2356 || prev_trimmed.ends_with("!\u{201D}")
2357 || prev_trimmed.ends_with("?\u{201D}")
2358 || prev_trimmed.ends_with(".\u{2019}")
2359 || prev_trimmed.ends_with("!\u{2019}")
2360 || prev_trimmed.ends_with("?\u{2019}"))
2361 && !text_ends_with_abbreviation(
2362 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2363 &abbreviations,
2364 );
2365
2366 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2367 paragraph_parts.push(current_part.join(" "));
2369 current_part = vec![next_line];
2370 } else {
2371 current_part.push(next_line);
2372 }
2373 i += 1;
2374 }
2375
2376 if !current_part.is_empty() {
2378 if current_part.len() == 1 {
2379 paragraph_parts.push(current_part[0].to_string());
2381 } else {
2382 paragraph_parts.push(current_part.join(" "));
2383 }
2384 }
2385
2386 for (j, part) in paragraph_parts.iter().enumerate() {
2388 let reflowed = reflow_line(part, options);
2389 result.extend(reflowed);
2390
2391 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2395 let last_idx = result.len() - 1;
2396 if !has_hard_break(&result[last_idx]) {
2397 result[last_idx].push_str(" ");
2398 }
2399 }
2400 }
2401 }
2402 }
2403
2404 let result_text = result.join("\n");
2406 if content.ends_with('\n') && !result_text.ends_with('\n') {
2407 format!("{result_text}\n")
2408 } else {
2409 result_text
2410 }
2411}
2412
2413#[derive(Debug, Clone)]
2415pub struct ParagraphReflow {
2416 pub start_byte: usize,
2418 pub end_byte: usize,
2420 pub reflowed_text: String,
2422}
2423
2424#[derive(Debug, Clone)]
2430pub struct BlockquoteLineData {
2431 pub(crate) content: String,
2433 pub(crate) is_explicit: bool,
2435 pub(crate) prefix: Option<String>,
2437}
2438
2439impl BlockquoteLineData {
2440 pub fn explicit(content: String, prefix: String) -> Self {
2442 Self {
2443 content,
2444 is_explicit: true,
2445 prefix: Some(prefix),
2446 }
2447 }
2448
2449 pub fn lazy(content: String) -> Self {
2451 Self {
2452 content,
2453 is_explicit: false,
2454 prefix: None,
2455 }
2456 }
2457}
2458
2459#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2461pub enum BlockquoteContinuationStyle {
2462 Explicit,
2463 Lazy,
2464}
2465
2466pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2474 let mut explicit_count = 0usize;
2475 let mut lazy_count = 0usize;
2476
2477 for line in lines.iter().skip(1) {
2478 if line.is_explicit {
2479 explicit_count += 1;
2480 } else {
2481 lazy_count += 1;
2482 }
2483 }
2484
2485 if explicit_count > 0 && lazy_count == 0 {
2486 BlockquoteContinuationStyle::Explicit
2487 } else if lazy_count > 0 && explicit_count == 0 {
2488 BlockquoteContinuationStyle::Lazy
2489 } else if explicit_count >= lazy_count {
2490 BlockquoteContinuationStyle::Explicit
2491 } else {
2492 BlockquoteContinuationStyle::Lazy
2493 }
2494}
2495
2496pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2501 let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2502
2503 for (idx, line) in lines.iter().enumerate() {
2504 let Some(prefix) = line.prefix.as_ref() else {
2505 continue;
2506 };
2507 counts
2508 .entry(prefix.clone())
2509 .and_modify(|entry| entry.0 += 1)
2510 .or_insert((1, idx));
2511 }
2512
2513 counts
2514 .into_iter()
2515 .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2516 count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2517 })
2518 .map(|(prefix, _)| prefix)
2519 .unwrap_or_else(|| fallback.to_string())
2520}
2521
2522pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2527 let trimmed = content_line.trim_start();
2528 trimmed.starts_with('>')
2529 || trimmed.starts_with('#')
2530 || trimmed.starts_with("```")
2531 || trimmed.starts_with("~~~")
2532 || is_unordered_list_marker(trimmed)
2533 || is_numbered_list_item(trimmed)
2534 || is_horizontal_rule(trimmed)
2535 || is_definition_list_item(trimmed)
2536 || (trimmed.starts_with('[') && trimmed.contains("]:"))
2537 || trimmed.starts_with(":::")
2538 || (trimmed.starts_with('<')
2539 && !trimmed.starts_with("<http")
2540 && !trimmed.starts_with("<https")
2541 && !trimmed.starts_with("<mailto:"))
2542}
2543
2544pub fn reflow_blockquote_content(
2553 lines: &[BlockquoteLineData],
2554 explicit_prefix: &str,
2555 continuation_style: BlockquoteContinuationStyle,
2556 options: &ReflowOptions,
2557) -> Vec<String> {
2558 let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2559 let segments = split_into_segments_strs(&content_strs);
2560 let mut reflowed_content_lines: Vec<String> = Vec::new();
2561
2562 for segment in segments {
2563 let hard_break_type = segment.last().and_then(|&line| {
2564 let line = line.strip_suffix('\r').unwrap_or(line);
2565 if line.ends_with('\\') {
2566 Some("\\")
2567 } else if line.ends_with(" ") {
2568 Some(" ")
2569 } else {
2570 None
2571 }
2572 });
2573
2574 let pieces: Vec<&str> = segment
2575 .iter()
2576 .map(|&line| {
2577 if let Some(l) = line.strip_suffix('\\') {
2578 l.trim_end()
2579 } else if let Some(l) = line.strip_suffix(" ") {
2580 l.trim_end()
2581 } else {
2582 line.trim_end()
2583 }
2584 })
2585 .collect();
2586
2587 let segment_text = pieces.join(" ");
2588 let segment_text = segment_text.trim();
2589 if segment_text.is_empty() {
2590 continue;
2591 }
2592
2593 let mut reflowed = reflow_line(segment_text, options);
2594 if let Some(break_marker) = hard_break_type
2595 && !reflowed.is_empty()
2596 {
2597 let last_idx = reflowed.len() - 1;
2598 if !has_hard_break(&reflowed[last_idx]) {
2599 reflowed[last_idx].push_str(break_marker);
2600 }
2601 }
2602 reflowed_content_lines.extend(reflowed);
2603 }
2604
2605 let mut styled_lines: Vec<String> = Vec::new();
2606 for (idx, line) in reflowed_content_lines.iter().enumerate() {
2607 let force_explicit = idx == 0
2608 || continuation_style == BlockquoteContinuationStyle::Explicit
2609 || should_force_explicit_blockquote_line(line);
2610 if force_explicit {
2611 styled_lines.push(format!("{explicit_prefix}{line}"));
2612 } else {
2613 styled_lines.push(line.clone());
2614 }
2615 }
2616
2617 styled_lines
2618}
2619
2620fn is_blockquote_content_boundary(content: &str) -> bool {
2621 let trimmed = content.trim();
2622 trimmed.is_empty()
2623 || is_block_boundary(trimmed)
2624 || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2625 || trimmed.starts_with(":::")
2626 || crate::utils::is_template_directive_only(content)
2627 || is_standalone_attr_list(content)
2628 || is_snippet_block_delimiter(content)
2629}
2630
2631fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2632 let mut segments = Vec::new();
2633 let mut current = Vec::new();
2634
2635 for &line in lines {
2636 current.push(line);
2637 if has_hard_break(line) {
2638 segments.push(current);
2639 current = Vec::new();
2640 }
2641 }
2642
2643 if !current.is_empty() {
2644 segments.push(current);
2645 }
2646
2647 segments
2648}
2649
2650fn reflow_blockquote_paragraph_at_line(
2651 content: &str,
2652 lines: &[&str],
2653 target_idx: usize,
2654 options: &ReflowOptions,
2655) -> Option<ParagraphReflow> {
2656 let mut anchor_idx = target_idx;
2657 let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2658 parsed.nesting_level
2659 } else {
2660 let mut found = None;
2661 let mut idx = target_idx;
2662 loop {
2663 if lines[idx].trim().is_empty() {
2664 break;
2665 }
2666 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2667 found = Some((idx, parsed.nesting_level));
2668 break;
2669 }
2670 if idx == 0 {
2671 break;
2672 }
2673 idx -= 1;
2674 }
2675 let (idx, level) = found?;
2676 anchor_idx = idx;
2677 level
2678 };
2679
2680 let mut para_start = anchor_idx;
2682 while para_start > 0 {
2683 let prev_idx = para_start - 1;
2684 let prev_line = lines[prev_idx];
2685
2686 if prev_line.trim().is_empty() {
2687 break;
2688 }
2689
2690 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2691 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2692 break;
2693 }
2694 para_start = prev_idx;
2695 continue;
2696 }
2697
2698 let prev_lazy = prev_line.trim_start();
2699 if is_blockquote_content_boundary(prev_lazy) {
2700 break;
2701 }
2702 para_start = prev_idx;
2703 }
2704
2705 while para_start < lines.len() {
2707 let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
2708 para_start += 1;
2709 continue;
2710 };
2711 target_level = parsed.nesting_level;
2712 break;
2713 }
2714
2715 if para_start >= lines.len() || para_start > target_idx {
2716 return None;
2717 }
2718
2719 let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
2722 let mut idx = para_start;
2723 while idx < lines.len() {
2724 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
2725 break;
2726 }
2727
2728 let line = lines[idx];
2729 if line.trim().is_empty() {
2730 break;
2731 }
2732
2733 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
2734 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2735 break;
2736 }
2737 collected.push((
2738 idx,
2739 BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
2740 ));
2741 idx += 1;
2742 continue;
2743 }
2744
2745 let lazy_content = line.trim_start();
2746 if is_blockquote_content_boundary(lazy_content) {
2747 break;
2748 }
2749
2750 collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
2751 idx += 1;
2752 }
2753
2754 if collected.is_empty() {
2755 return None;
2756 }
2757
2758 let para_end = collected[collected.len() - 1].0;
2759 if target_idx < para_start || target_idx > para_end {
2760 return None;
2761 }
2762
2763 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
2764
2765 let fallback_prefix = line_data
2766 .iter()
2767 .find_map(|d| d.prefix.clone())
2768 .unwrap_or_else(|| "> ".to_string());
2769 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
2770 let continuation_style = blockquote_continuation_style(&line_data);
2771
2772 let adjusted_line_length = options
2773 .line_length
2774 .saturating_sub(display_len(&explicit_prefix, options.length_mode))
2775 .max(1);
2776
2777 let adjusted_options = ReflowOptions {
2778 line_length: adjusted_line_length,
2779 ..options.clone()
2780 };
2781
2782 let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
2783
2784 if styled_lines.is_empty() {
2785 return None;
2786 }
2787
2788 let mut start_byte = 0;
2790 for line in lines.iter().take(para_start) {
2791 start_byte += line.len() + 1;
2792 }
2793
2794 let mut end_byte = start_byte;
2795 for line in lines.iter().take(para_end + 1).skip(para_start) {
2796 end_byte += line.len() + 1;
2797 }
2798
2799 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2800 if !includes_trailing_newline {
2801 end_byte -= 1;
2802 }
2803
2804 let reflowed_joined = styled_lines.join("\n");
2805 let reflowed_text = if includes_trailing_newline {
2806 if reflowed_joined.ends_with('\n') {
2807 reflowed_joined
2808 } else {
2809 format!("{reflowed_joined}\n")
2810 }
2811 } else if reflowed_joined.ends_with('\n') {
2812 reflowed_joined.trim_end_matches('\n').to_string()
2813 } else {
2814 reflowed_joined
2815 };
2816
2817 Some(ParagraphReflow {
2818 start_byte,
2819 end_byte,
2820 reflowed_text,
2821 })
2822}
2823
2824pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2842 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
2843}
2844
2845pub fn reflow_paragraph_at_line_with_mode(
2847 content: &str,
2848 line_number: usize,
2849 line_length: usize,
2850 length_mode: ReflowLengthMode,
2851) -> Option<ParagraphReflow> {
2852 let options = ReflowOptions {
2853 line_length,
2854 length_mode,
2855 ..Default::default()
2856 };
2857 reflow_paragraph_at_line_with_options(content, line_number, &options)
2858}
2859
2860pub fn reflow_paragraph_at_line_with_options(
2871 content: &str,
2872 line_number: usize,
2873 options: &ReflowOptions,
2874) -> Option<ParagraphReflow> {
2875 if line_number == 0 {
2876 return None;
2877 }
2878
2879 let lines: Vec<&str> = content.lines().collect();
2880
2881 if line_number > lines.len() {
2883 return None;
2884 }
2885
2886 let target_idx = line_number - 1; let target_line = lines[target_idx];
2888 let trimmed = target_line.trim();
2889
2890 if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
2893 return Some(blockquote_reflow);
2894 }
2895
2896 if is_paragraph_boundary(trimmed, target_line) {
2898 return None;
2899 }
2900
2901 let mut para_start = target_idx;
2903 while para_start > 0 {
2904 let prev_idx = para_start - 1;
2905 let prev_line = lines[prev_idx];
2906 let prev_trimmed = prev_line.trim();
2907
2908 if is_paragraph_boundary(prev_trimmed, prev_line) {
2910 break;
2911 }
2912
2913 para_start = prev_idx;
2914 }
2915
2916 let mut para_end = target_idx;
2918 while para_end + 1 < lines.len() {
2919 let next_idx = para_end + 1;
2920 let next_line = lines[next_idx];
2921 let next_trimmed = next_line.trim();
2922
2923 if is_paragraph_boundary(next_trimmed, next_line) {
2925 break;
2926 }
2927
2928 para_end = next_idx;
2929 }
2930
2931 let paragraph_lines = &lines[para_start..=para_end];
2933
2934 let mut start_byte = 0;
2936 for line in lines.iter().take(para_start) {
2937 start_byte += line.len() + 1; }
2939
2940 let mut end_byte = start_byte;
2941 for line in paragraph_lines.iter() {
2942 end_byte += line.len() + 1; }
2944
2945 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2948
2949 if !includes_trailing_newline {
2951 end_byte -= 1;
2952 }
2953
2954 let paragraph_text = paragraph_lines.join("\n");
2956
2957 let reflowed = reflow_markdown(¶graph_text, options);
2959
2960 let reflowed_text = if includes_trailing_newline {
2964 if reflowed.ends_with('\n') {
2966 reflowed
2967 } else {
2968 format!("{reflowed}\n")
2969 }
2970 } else {
2971 if reflowed.ends_with('\n') {
2973 reflowed.trim_end_matches('\n').to_string()
2974 } else {
2975 reflowed
2976 }
2977 };
2978
2979 Some(ParagraphReflow {
2980 start_byte,
2981 end_byte,
2982 reflowed_text,
2983 })
2984}
2985
2986#[cfg(test)]
2987mod tests {
2988 use super::*;
2989
2990 #[test]
2995 fn test_helper_function_text_ends_with_abbreviation() {
2996 let abbreviations = get_abbreviations(&None);
2998
2999 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
3001 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
3002 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
3003 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
3004 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
3005 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
3006 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
3007 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
3008
3009 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
3011 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
3012 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
3013 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
3014 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
3015 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
3021
3022 #[test]
3023 fn test_is_unordered_list_marker() {
3024 assert!(is_unordered_list_marker("- item"));
3026 assert!(is_unordered_list_marker("* item"));
3027 assert!(is_unordered_list_marker("+ item"));
3028 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
3030 assert!(is_unordered_list_marker("+"));
3031
3032 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
3043
3044 #[test]
3045 fn test_is_block_boundary() {
3046 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
3068 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
3071 }
3072
3073 #[test]
3074 fn test_definition_list_boundary_in_single_line_paragraph() {
3075 let options = ReflowOptions {
3078 line_length: 80,
3079 ..Default::default()
3080 };
3081 let input = "Term\n: Definition of the term";
3082 let result = reflow_markdown(input, &options);
3083 assert!(
3085 result.contains(": Definition"),
3086 "Definition list item should not be merged into previous line. Got: {result:?}"
3087 );
3088 let lines: Vec<&str> = result.lines().collect();
3089 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3090 assert_eq!(lines[0], "Term");
3091 assert_eq!(lines[1], ": Definition of the term");
3092 }
3093
3094 #[test]
3095 fn test_is_paragraph_boundary() {
3096 assert!(is_paragraph_boundary("# Heading", "# Heading"));
3098 assert!(is_paragraph_boundary("- item", "- item"));
3099 assert!(is_paragraph_boundary(":::", ":::"));
3100 assert!(is_paragraph_boundary(": definition", ": definition"));
3101
3102 assert!(is_paragraph_boundary("code", " code"));
3104 assert!(is_paragraph_boundary("code", "\tcode"));
3105
3106 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3108 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
3112 assert!(!is_paragraph_boundary("text", " text")); }
3114
3115 #[test]
3116 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3117 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3120 let result = reflow_paragraph_at_line(content, 3, 80);
3122 assert!(result.is_none(), "Div marker line should not be reflowed");
3123 }
3124}