1use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
13 LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
14 REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18 text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27 Chars,
29 #[default]
31 Visual,
32 Bytes,
34}
35
36fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38 match mode {
39 ReflowLengthMode::Chars => s.chars().count(),
40 ReflowLengthMode::Visual => s.width(),
41 ReflowLengthMode::Bytes => s.len(),
42 }
43}
44
45#[derive(Clone)]
47pub struct ReflowOptions {
48 pub line_length: usize,
50 pub break_on_sentences: bool,
52 pub preserve_breaks: bool,
54 pub sentence_per_line: bool,
56 pub semantic_line_breaks: bool,
58 pub abbreviations: Option<Vec<String>>,
62 pub length_mode: ReflowLengthMode,
64 pub attr_lists: bool,
67 pub require_sentence_capital: bool,
72}
73
74impl Default for ReflowOptions {
75 fn default() -> Self {
76 Self {
77 line_length: 80,
78 break_on_sentences: true,
79 preserve_breaks: false,
80 sentence_per_line: false,
81 semantic_line_breaks: false,
82 abbreviations: None,
83 length_mode: ReflowLengthMode::default(),
84 attr_lists: false,
85 require_sentence_capital: true,
86 }
87 }
88}
89
90fn is_sentence_boundary(
94 text: &str,
95 pos: usize,
96 abbreviations: &HashSet<String>,
97 require_sentence_capital: bool,
98) -> bool {
99 let chars: Vec<char> = text.chars().collect();
100
101 if pos + 1 >= chars.len() {
102 return false;
103 }
104
105 let c = chars[pos];
106 let next_char = chars[pos + 1];
107
108 if is_cjk_sentence_ending(c) {
111 let mut after_punct_pos = pos + 1;
113 while after_punct_pos < chars.len()
114 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
115 {
116 after_punct_pos += 1;
117 }
118
119 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
121 after_punct_pos += 1;
122 }
123
124 if after_punct_pos >= chars.len() {
126 return false;
127 }
128
129 while after_punct_pos < chars.len()
131 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
132 {
133 after_punct_pos += 1;
134 }
135
136 if after_punct_pos >= chars.len() {
137 return false;
138 }
139
140 return true;
143 }
144
145 if c != '.' && c != '!' && c != '?' {
147 return false;
148 }
149
150 let (_space_pos, after_space_pos) = if next_char == ' ' {
152 (pos + 1, pos + 2)
154 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
155 if chars[pos + 2] == ' ' {
157 (pos + 2, pos + 3)
159 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
160 (pos + 3, pos + 4)
162 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
163 && pos + 4 < chars.len()
164 && chars[pos + 3] == chars[pos + 2]
165 && chars[pos + 4] == ' '
166 {
167 (pos + 4, pos + 5)
169 } else {
170 return false;
171 }
172 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
173 (pos + 2, pos + 3)
175 } else if (next_char == '*' || next_char == '_')
176 && pos + 3 < chars.len()
177 && chars[pos + 2] == next_char
178 && chars[pos + 3] == ' '
179 {
180 (pos + 3, pos + 4)
182 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
183 (pos + 3, pos + 4)
185 } else {
186 return false;
187 };
188
189 let mut next_char_pos = after_space_pos;
191 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
192 next_char_pos += 1;
193 }
194
195 if next_char_pos >= chars.len() {
197 return false;
198 }
199
200 let mut first_letter_pos = next_char_pos;
202 while first_letter_pos < chars.len()
203 && (chars[first_letter_pos] == '*'
204 || chars[first_letter_pos] == '_'
205 || chars[first_letter_pos] == '~'
206 || is_opening_quote(chars[first_letter_pos]))
207 {
208 first_letter_pos += 1;
209 }
210
211 if first_letter_pos >= chars.len() {
213 return false;
214 }
215
216 let first_char = chars[first_letter_pos];
217
218 if c == '!' || c == '?' {
220 return true;
221 }
222
223 if pos > 0 {
227 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
229 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
230 return false;
231 }
232
233 if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
235 return false;
236 }
237
238 if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
242 return false;
243 }
244 }
245
246 if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
249 return false;
250 }
251
252 true
253}
254
255pub fn split_into_sentences(text: &str) -> Vec<String> {
257 split_into_sentences_custom(text, &None)
258}
259
260pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
262 let abbreviations = get_abbreviations(custom_abbreviations);
263 split_into_sentences_with_set(text, &abbreviations, true)
264}
265
266fn split_into_sentences_with_set(
269 text: &str,
270 abbreviations: &HashSet<String>,
271 require_sentence_capital: bool,
272) -> Vec<String> {
273 let mut sentences = Vec::new();
274 let mut current_sentence = String::new();
275 let mut chars = text.chars().peekable();
276 let mut pos = 0;
277
278 while let Some(c) = chars.next() {
279 current_sentence.push(c);
280
281 if is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
282 while let Some(&next) = chars.peek() {
284 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
285 current_sentence.push(chars.next().unwrap());
286 pos += 1;
287 } else {
288 break;
289 }
290 }
291
292 if chars.peek() == Some(&' ') {
294 chars.next();
295 pos += 1;
296 }
297
298 sentences.push(current_sentence.trim().to_string());
299 current_sentence.clear();
300 }
301
302 pos += 1;
303 }
304
305 if !current_sentence.trim().is_empty() {
307 sentences.push(current_sentence.trim().to_string());
308 }
309 sentences
310}
311
312fn is_horizontal_rule(line: &str) -> bool {
314 if line.len() < 3 {
315 return false;
316 }
317
318 let chars: Vec<char> = line.chars().collect();
320 if chars.is_empty() {
321 return false;
322 }
323
324 let first_char = chars[0];
325 if first_char != '-' && first_char != '_' && first_char != '*' {
326 return false;
327 }
328
329 for c in &chars {
331 if *c != first_char && *c != ' ' {
332 return false;
333 }
334 }
335
336 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
338 non_space_count >= 3
339}
340
341fn is_numbered_list_item(line: &str) -> bool {
343 let mut chars = line.chars();
344
345 if !chars.next().is_some_and(|c| c.is_numeric()) {
347 return false;
348 }
349
350 while let Some(c) = chars.next() {
352 if c == '.' {
353 return chars.next() == Some(' ');
356 }
357 if !c.is_numeric() {
358 return false;
359 }
360 }
361
362 false
363}
364
365fn is_unordered_list_marker(s: &str) -> bool {
367 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
368 && !is_horizontal_rule(s)
369 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
370}
371
372fn is_block_boundary_core(trimmed: &str) -> bool {
375 trimmed.is_empty()
376 || trimmed.starts_with('#')
377 || trimmed.starts_with("```")
378 || trimmed.starts_with("~~~")
379 || trimmed.starts_with('>')
380 || (trimmed.starts_with('[') && trimmed.contains("]:"))
381 || is_horizontal_rule(trimmed)
382 || is_unordered_list_marker(trimmed)
383 || is_numbered_list_item(trimmed)
384 || is_definition_list_item(trimmed)
385 || trimmed.starts_with(":::")
386}
387
388fn is_block_boundary(trimmed: &str) -> bool {
391 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
392}
393
394fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
398 is_block_boundary_core(trimmed)
399 || ElementCache::calculate_indentation_width_default(line) >= 4
400 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
401}
402
403fn has_hard_break(line: &str) -> bool {
409 let line = line.strip_suffix('\r').unwrap_or(line);
410 line.ends_with(" ") || line.ends_with('\\')
411}
412
413fn ends_with_sentence_punct(text: &str) -> bool {
415 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
416}
417
418fn trim_preserving_hard_break(s: &str) -> String {
424 let s = s.strip_suffix('\r').unwrap_or(s);
426
427 if s.ends_with('\\') {
429 return s.to_string();
431 }
432
433 if s.ends_with(" ") {
435 let content_end = s.trim_end().len();
437 if content_end == 0 {
438 return String::new();
440 }
441 format!("{} ", &s[..content_end])
443 } else {
444 s.trim_end().to_string()
446 }
447}
448
449fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
451 if options.attr_lists {
452 parse_markdown_elements_with_attr_lists(text)
453 } else {
454 parse_markdown_elements(text)
455 }
456}
457
458pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
459 if options.sentence_per_line {
461 let elements = parse_elements(line, options);
462 return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
463 }
464
465 if options.semantic_line_breaks {
467 let elements = parse_elements(line, options);
468 return reflow_elements_semantic(&elements, options);
469 }
470
471 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
474 return vec![line.to_string()];
475 }
476
477 let elements = parse_elements(line, options);
479
480 reflow_elements(&elements, options)
482}
483
484#[derive(Debug, Clone)]
486enum LinkedImageSource {
487 Inline(String),
489 Reference(String),
491}
492
493#[derive(Debug, Clone)]
495enum LinkedImageTarget {
496 Inline(String),
498 Reference(String),
500}
501
502#[derive(Debug, Clone)]
504enum Element {
505 Text(String),
507 Link { text: String, url: String },
509 ReferenceLink { text: String, reference: String },
511 EmptyReferenceLink { text: String },
513 ShortcutReference { reference: String },
515 InlineImage { alt: String, url: String },
517 ReferenceImage { alt: String, reference: String },
519 EmptyReferenceImage { alt: String },
521 LinkedImage {
527 alt: String,
528 img_source: LinkedImageSource,
529 link_target: LinkedImageTarget,
530 },
531 FootnoteReference { note: String },
533 Strikethrough(String),
535 WikiLink(String),
537 InlineMath(String),
539 DisplayMath(String),
541 EmojiShortcode(String),
543 Autolink(String),
545 HtmlTag(String),
547 HtmlEntity(String),
549 HugoShortcode(String),
551 AttrList(String),
553 Code(String),
555 Bold {
557 content: String,
558 underscore: bool,
560 },
561 Italic {
563 content: String,
564 underscore: bool,
566 },
567}
568
569impl std::fmt::Display for Element {
570 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
571 match self {
572 Element::Text(s) => write!(f, "{s}"),
573 Element::Link { text, url } => write!(f, "[{text}]({url})"),
574 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
575 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
576 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
577 Element::InlineImage { alt, url } => write!(f, ""),
578 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
579 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
580 Element::LinkedImage {
581 alt,
582 img_source,
583 link_target,
584 } => {
585 let img_part = match img_source {
587 LinkedImageSource::Inline(url) => format!(""),
588 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
589 };
590 match link_target {
592 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
593 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
594 }
595 }
596 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
597 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
598 Element::WikiLink(s) => write!(f, "[[{s}]]"),
599 Element::InlineMath(s) => write!(f, "${s}$"),
600 Element::DisplayMath(s) => write!(f, "$${s}$$"),
601 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
602 Element::Autolink(s) => write!(f, "{s}"),
603 Element::HtmlTag(s) => write!(f, "{s}"),
604 Element::HtmlEntity(s) => write!(f, "{s}"),
605 Element::HugoShortcode(s) => write!(f, "{s}"),
606 Element::AttrList(s) => write!(f, "{s}"),
607 Element::Code(s) => write!(f, "`{s}`"),
608 Element::Bold { content, underscore } => {
609 if *underscore {
610 write!(f, "__{content}__")
611 } else {
612 write!(f, "**{content}**")
613 }
614 }
615 Element::Italic { content, underscore } => {
616 if *underscore {
617 write!(f, "_{content}_")
618 } else {
619 write!(f, "*{content}*")
620 }
621 }
622 }
623 }
624}
625
626impl Element {
627 fn display_width(&self, mode: ReflowLengthMode) -> usize {
631 let formatted = format!("{self}");
632 display_len(&formatted, mode)
633 }
634}
635
636#[derive(Debug, Clone)]
638struct EmphasisSpan {
639 start: usize,
641 end: usize,
643 content: String,
645 is_strong: bool,
647 is_strikethrough: bool,
649 uses_underscore: bool,
651}
652
653fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
663 let mut spans = Vec::new();
664 let mut options = Options::empty();
665 options.insert(Options::ENABLE_STRIKETHROUGH);
666
667 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
670 let mut strikethrough_stack: Vec<usize> = Vec::new();
671
672 let parser = Parser::new_ext(text, options).into_offset_iter();
673
674 for (event, range) in parser {
675 match event {
676 Event::Start(Tag::Emphasis) => {
677 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
679 emphasis_stack.push((range.start, uses_underscore));
680 }
681 Event::End(TagEnd::Emphasis) => {
682 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
683 let content_start = start_byte + 1;
685 let content_end = range.end - 1;
686 if content_end > content_start
687 && let Some(content) = text.get(content_start..content_end)
688 {
689 spans.push(EmphasisSpan {
690 start: start_byte,
691 end: range.end,
692 content: content.to_string(),
693 is_strong: false,
694 is_strikethrough: false,
695 uses_underscore,
696 });
697 }
698 }
699 }
700 Event::Start(Tag::Strong) => {
701 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
703 strong_stack.push((range.start, uses_underscore));
704 }
705 Event::End(TagEnd::Strong) => {
706 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
707 let content_start = start_byte + 2;
709 let content_end = range.end - 2;
710 if content_end > content_start
711 && let Some(content) = text.get(content_start..content_end)
712 {
713 spans.push(EmphasisSpan {
714 start: start_byte,
715 end: range.end,
716 content: content.to_string(),
717 is_strong: true,
718 is_strikethrough: false,
719 uses_underscore,
720 });
721 }
722 }
723 }
724 Event::Start(Tag::Strikethrough) => {
725 strikethrough_stack.push(range.start);
726 }
727 Event::End(TagEnd::Strikethrough) => {
728 if let Some(start_byte) = strikethrough_stack.pop() {
729 let content_start = start_byte + 2;
731 let content_end = range.end - 2;
732 if content_end > content_start
733 && let Some(content) = text.get(content_start..content_end)
734 {
735 spans.push(EmphasisSpan {
736 start: start_byte,
737 end: range.end,
738 content: content.to_string(),
739 is_strong: false,
740 is_strikethrough: true,
741 uses_underscore: false,
742 });
743 }
744 }
745 }
746 _ => {}
747 }
748 }
749
750 spans.sort_by_key(|s| s.start);
752 spans
753}
754
755fn parse_markdown_elements(text: &str) -> Vec<Element> {
766 parse_markdown_elements_inner(text, false)
767}
768
769fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
770 parse_markdown_elements_inner(text, true)
771}
772
773fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
774 let mut elements = Vec::new();
775 let mut remaining = text;
776
777 let emphasis_spans = extract_emphasis_spans(text);
779
780 while !remaining.is_empty() {
781 let current_offset = text.len() - remaining.len();
783 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
785
786 if remaining.contains("[!") {
790 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
792 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
793 {
794 earliest_match = Some((m.start(), "linked_image_ii", m));
795 }
796
797 if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
799 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
800 {
801 earliest_match = Some((m.start(), "linked_image_ri", m));
802 }
803
804 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
806 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
807 {
808 earliest_match = Some((m.start(), "linked_image_ir", m));
809 }
810
811 if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
813 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
814 {
815 earliest_match = Some((m.start(), "linked_image_rr", m));
816 }
817 }
818
819 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
822 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
823 {
824 earliest_match = Some((m.start(), "inline_image", m));
825 }
826
827 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
829 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
830 {
831 earliest_match = Some((m.start(), "ref_image", m));
832 }
833
834 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
836 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
837 {
838 earliest_match = Some((m.start(), "footnote_ref", m));
839 }
840
841 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
843 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
844 {
845 earliest_match = Some((m.start(), "inline_link", m));
846 }
847
848 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
850 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
851 {
852 earliest_match = Some((m.start(), "ref_link", m));
853 }
854
855 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
858 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
859 {
860 earliest_match = Some((m.start(), "shortcut_ref", m));
861 }
862
863 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
865 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
866 {
867 earliest_match = Some((m.start(), "wiki_link", m));
868 }
869
870 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
872 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
873 {
874 earliest_match = Some((m.start(), "display_math", m));
875 }
876
877 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
879 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
880 {
881 earliest_match = Some((m.start(), "inline_math", m));
882 }
883
884 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
888 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
889 {
890 earliest_match = Some((m.start(), "emoji", m));
891 }
892
893 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
895 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
896 {
897 earliest_match = Some((m.start(), "html_entity", m));
898 }
899
900 if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
903 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
904 {
905 earliest_match = Some((m.start(), "hugo_shortcode", m));
906 }
907
908 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
911 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
912 {
913 let matched_text = &remaining[m.start()..m.end()];
915 let is_url_autolink = matched_text.starts_with("<http://")
916 || matched_text.starts_with("<https://")
917 || matched_text.starts_with("<mailto:")
918 || matched_text.starts_with("<ftp://")
919 || matched_text.starts_with("<ftps://");
920
921 let is_email_autolink = {
924 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
925 EMAIL_PATTERN.is_match(content)
926 };
927
928 if is_url_autolink || is_email_autolink {
929 earliest_match = Some((m.start(), "autolink", m));
930 } else {
931 earliest_match = Some((m.start(), "html_tag", m));
932 }
933 }
934
935 let mut next_special = remaining.len();
937 let mut special_type = "";
938 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
939 let mut attr_list_len: usize = 0;
940
941 if let Some(pos) = remaining.find('`')
943 && pos < next_special
944 {
945 next_special = pos;
946 special_type = "code";
947 }
948
949 if attr_lists
951 && let Some(pos) = remaining.find('{')
952 && pos < next_special
953 && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
954 && m.start() == 0
955 {
956 next_special = pos;
957 special_type = "attr_list";
958 attr_list_len = m.end();
959 }
960
961 for span in &emphasis_spans {
964 if span.start >= current_offset && span.start < current_offset + remaining.len() {
965 let pos_in_remaining = span.start - current_offset;
966 if pos_in_remaining < next_special {
967 next_special = pos_in_remaining;
968 special_type = "pulldown_emphasis";
969 pulldown_emphasis = Some(span);
970 }
971 break; }
973 }
974
975 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
977 pos < next_special
978 } else {
979 false
980 };
981
982 if should_process_markdown_link {
983 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
984
985 if pos > 0 {
987 elements.push(Element::Text(remaining[..pos].to_string()));
988 }
989
990 match pattern_type {
992 "linked_image_ii" => {
994 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
995 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
996 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
997 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
998 elements.push(Element::LinkedImage {
999 alt: alt.to_string(),
1000 img_source: LinkedImageSource::Inline(img_url.to_string()),
1001 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1002 });
1003 remaining = &remaining[match_obj.end()..];
1004 } else {
1005 elements.push(Element::Text("[".to_string()));
1006 remaining = &remaining[1..];
1007 }
1008 }
1009 "linked_image_ri" => {
1011 if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
1012 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1013 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1014 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1015 elements.push(Element::LinkedImage {
1016 alt: alt.to_string(),
1017 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1018 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1019 });
1020 remaining = &remaining[match_obj.end()..];
1021 } else {
1022 elements.push(Element::Text("[".to_string()));
1023 remaining = &remaining[1..];
1024 }
1025 }
1026 "linked_image_ir" => {
1028 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1029 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1030 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1031 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1032 elements.push(Element::LinkedImage {
1033 alt: alt.to_string(),
1034 img_source: LinkedImageSource::Inline(img_url.to_string()),
1035 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1036 });
1037 remaining = &remaining[match_obj.end()..];
1038 } else {
1039 elements.push(Element::Text("[".to_string()));
1040 remaining = &remaining[1..];
1041 }
1042 }
1043 "linked_image_rr" => {
1045 if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
1046 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1047 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1048 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1049 elements.push(Element::LinkedImage {
1050 alt: alt.to_string(),
1051 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1052 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1053 });
1054 remaining = &remaining[match_obj.end()..];
1055 } else {
1056 elements.push(Element::Text("[".to_string()));
1057 remaining = &remaining[1..];
1058 }
1059 }
1060 "inline_image" => {
1061 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
1062 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1063 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1064 elements.push(Element::InlineImage {
1065 alt: alt.to_string(),
1066 url: url.to_string(),
1067 });
1068 remaining = &remaining[match_obj.end()..];
1069 } else {
1070 elements.push(Element::Text("!".to_string()));
1071 remaining = &remaining[1..];
1072 }
1073 }
1074 "ref_image" => {
1075 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
1076 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1077 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1078
1079 if reference.is_empty() {
1080 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1081 } else {
1082 elements.push(Element::ReferenceImage {
1083 alt: alt.to_string(),
1084 reference: reference.to_string(),
1085 });
1086 }
1087 remaining = &remaining[match_obj.end()..];
1088 } else {
1089 elements.push(Element::Text("!".to_string()));
1090 remaining = &remaining[1..];
1091 }
1092 }
1093 "footnote_ref" => {
1094 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
1095 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1096 elements.push(Element::FootnoteReference { note: note.to_string() });
1097 remaining = &remaining[match_obj.end()..];
1098 } else {
1099 elements.push(Element::Text("[".to_string()));
1100 remaining = &remaining[1..];
1101 }
1102 }
1103 "inline_link" => {
1104 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1105 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1106 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1107 elements.push(Element::Link {
1108 text: text.to_string(),
1109 url: url.to_string(),
1110 });
1111 remaining = &remaining[match_obj.end()..];
1112 } else {
1113 elements.push(Element::Text("[".to_string()));
1115 remaining = &remaining[1..];
1116 }
1117 }
1118 "ref_link" => {
1119 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1120 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1121 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1122
1123 if reference.is_empty() {
1124 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1126 } else {
1127 elements.push(Element::ReferenceLink {
1129 text: text.to_string(),
1130 reference: reference.to_string(),
1131 });
1132 }
1133 remaining = &remaining[match_obj.end()..];
1134 } else {
1135 elements.push(Element::Text("[".to_string()));
1137 remaining = &remaining[1..];
1138 }
1139 }
1140 "shortcut_ref" => {
1141 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1142 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1143 elements.push(Element::ShortcutReference {
1144 reference: reference.to_string(),
1145 });
1146 remaining = &remaining[match_obj.end()..];
1147 } else {
1148 elements.push(Element::Text("[".to_string()));
1150 remaining = &remaining[1..];
1151 }
1152 }
1153 "wiki_link" => {
1154 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1155 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1156 elements.push(Element::WikiLink(content.to_string()));
1157 remaining = &remaining[match_obj.end()..];
1158 } else {
1159 elements.push(Element::Text("[[".to_string()));
1160 remaining = &remaining[2..];
1161 }
1162 }
1163 "display_math" => {
1164 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1165 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1166 elements.push(Element::DisplayMath(math.to_string()));
1167 remaining = &remaining[match_obj.end()..];
1168 } else {
1169 elements.push(Element::Text("$$".to_string()));
1170 remaining = &remaining[2..];
1171 }
1172 }
1173 "inline_math" => {
1174 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1175 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1176 elements.push(Element::InlineMath(math.to_string()));
1177 remaining = &remaining[match_obj.end()..];
1178 } else {
1179 elements.push(Element::Text("$".to_string()));
1180 remaining = &remaining[1..];
1181 }
1182 }
1183 "emoji" => {
1185 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1186 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1187 elements.push(Element::EmojiShortcode(emoji.to_string()));
1188 remaining = &remaining[match_obj.end()..];
1189 } else {
1190 elements.push(Element::Text(":".to_string()));
1191 remaining = &remaining[1..];
1192 }
1193 }
1194 "html_entity" => {
1195 elements.push(Element::HtmlEntity(match_obj.as_str().to_string()));
1197 remaining = &remaining[match_obj.end()..];
1198 }
1199 "hugo_shortcode" => {
1200 elements.push(Element::HugoShortcode(match_obj.as_str().to_string()));
1202 remaining = &remaining[match_obj.end()..];
1203 }
1204 "autolink" => {
1205 elements.push(Element::Autolink(match_obj.as_str().to_string()));
1207 remaining = &remaining[match_obj.end()..];
1208 }
1209 "html_tag" => {
1210 elements.push(Element::HtmlTag(match_obj.as_str().to_string()));
1212 remaining = &remaining[match_obj.end()..];
1213 }
1214 _ => {
1215 elements.push(Element::Text("[".to_string()));
1217 remaining = &remaining[1..];
1218 }
1219 }
1220 } else {
1221 if next_special > 0 && next_special < remaining.len() {
1225 elements.push(Element::Text(remaining[..next_special].to_string()));
1226 remaining = &remaining[next_special..];
1227 }
1228
1229 match special_type {
1231 "code" => {
1232 if let Some(code_end) = remaining[1..].find('`') {
1234 let code = &remaining[1..1 + code_end];
1235 elements.push(Element::Code(code.to_string()));
1236 remaining = &remaining[1 + code_end + 1..];
1237 } else {
1238 elements.push(Element::Text(remaining.to_string()));
1240 break;
1241 }
1242 }
1243 "attr_list" => {
1244 elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1245 remaining = &remaining[attr_list_len..];
1246 }
1247 "pulldown_emphasis" => {
1248 if let Some(span) = pulldown_emphasis {
1250 let span_len = span.end - span.start;
1251 if span.is_strikethrough {
1252 elements.push(Element::Strikethrough(span.content.clone()));
1253 } else if span.is_strong {
1254 elements.push(Element::Bold {
1255 content: span.content.clone(),
1256 underscore: span.uses_underscore,
1257 });
1258 } else {
1259 elements.push(Element::Italic {
1260 content: span.content.clone(),
1261 underscore: span.uses_underscore,
1262 });
1263 }
1264 remaining = &remaining[span_len..];
1265 } else {
1266 elements.push(Element::Text(remaining[..1].to_string()));
1268 remaining = &remaining[1..];
1269 }
1270 }
1271 _ => {
1272 elements.push(Element::Text(remaining.to_string()));
1274 break;
1275 }
1276 }
1277 }
1278 }
1279
1280 elements
1281}
1282
1283fn reflow_elements_sentence_per_line(
1285 elements: &[Element],
1286 custom_abbreviations: &Option<Vec<String>>,
1287 require_sentence_capital: bool,
1288) -> Vec<String> {
1289 let abbreviations = get_abbreviations(custom_abbreviations);
1290 let mut lines = Vec::new();
1291 let mut current_line = String::new();
1292
1293 for (idx, element) in elements.iter().enumerate() {
1294 let element_str = format!("{element}");
1295
1296 if let Element::Text(text) = element {
1298 let combined = format!("{current_line}{text}");
1300 let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
1302
1303 if sentences.len() > 1 {
1304 for (i, sentence) in sentences.iter().enumerate() {
1306 if i == 0 {
1307 let trimmed = sentence.trim();
1310
1311 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1312 current_line = sentence.to_string();
1314 } else {
1315 lines.push(sentence.to_string());
1317 current_line.clear();
1318 }
1319 } else if i == sentences.len() - 1 {
1320 let trimmed = sentence.trim();
1322 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1323
1324 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1325 lines.push(sentence.to_string());
1327 current_line.clear();
1328 } else {
1329 current_line = sentence.to_string();
1331 }
1332 } else {
1333 lines.push(sentence.to_string());
1335 }
1336 }
1337 } else {
1338 let trimmed = combined.trim();
1340
1341 if trimmed.is_empty() {
1345 continue;
1346 }
1347
1348 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1349
1350 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1351 lines.push(trimmed.to_string());
1353 current_line.clear();
1354 } else {
1355 current_line = combined;
1357 }
1358 }
1359 } else if let Element::Italic { content, underscore } = element {
1360 let marker = if *underscore { "_" } else { "*" };
1362 handle_emphasis_sentence_split(
1363 content,
1364 marker,
1365 &abbreviations,
1366 require_sentence_capital,
1367 &mut current_line,
1368 &mut lines,
1369 );
1370 } else if let Element::Bold { content, underscore } = element {
1371 let marker = if *underscore { "__" } else { "**" };
1373 handle_emphasis_sentence_split(
1374 content,
1375 marker,
1376 &abbreviations,
1377 require_sentence_capital,
1378 &mut current_line,
1379 &mut lines,
1380 );
1381 } else if let Element::Strikethrough(content) = element {
1382 handle_emphasis_sentence_split(
1384 content,
1385 "~~",
1386 &abbreviations,
1387 require_sentence_capital,
1388 &mut current_line,
1389 &mut lines,
1390 );
1391 } else {
1392 let is_adjacent = if idx > 0 {
1395 match &elements[idx - 1] {
1396 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1397 _ => true,
1398 }
1399 } else {
1400 false
1401 };
1402
1403 if !is_adjacent
1405 && !current_line.is_empty()
1406 && !current_line.ends_with(' ')
1407 && !current_line.ends_with('(')
1408 && !current_line.ends_with('[')
1409 {
1410 current_line.push(' ');
1411 }
1412 current_line.push_str(&element_str);
1413 }
1414 }
1415
1416 if !current_line.is_empty() {
1418 lines.push(current_line.trim().to_string());
1419 }
1420 lines
1421}
1422
1423fn handle_emphasis_sentence_split(
1425 content: &str,
1426 marker: &str,
1427 abbreviations: &HashSet<String>,
1428 require_sentence_capital: bool,
1429 current_line: &mut String,
1430 lines: &mut Vec<String>,
1431) {
1432 let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
1434
1435 if sentences.len() <= 1 {
1436 if !current_line.is_empty()
1438 && !current_line.ends_with(' ')
1439 && !current_line.ends_with('(')
1440 && !current_line.ends_with('[')
1441 {
1442 current_line.push(' ');
1443 }
1444 current_line.push_str(marker);
1445 current_line.push_str(content);
1446 current_line.push_str(marker);
1447
1448 let trimmed = content.trim();
1450 let ends_with_punct = ends_with_sentence_punct(trimmed);
1451 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1452 lines.push(current_line.clone());
1453 current_line.clear();
1454 }
1455 } else {
1456 for (i, sentence) in sentences.iter().enumerate() {
1458 let trimmed = sentence.trim();
1459 if trimmed.is_empty() {
1460 continue;
1461 }
1462
1463 if i == 0 {
1464 if !current_line.is_empty()
1466 && !current_line.ends_with(' ')
1467 && !current_line.ends_with('(')
1468 && !current_line.ends_with('[')
1469 {
1470 current_line.push(' ');
1471 }
1472 current_line.push_str(marker);
1473 current_line.push_str(trimmed);
1474 current_line.push_str(marker);
1475
1476 let ends_with_punct = ends_with_sentence_punct(trimmed);
1478 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1479 lines.push(current_line.clone());
1480 current_line.clear();
1481 }
1482 } else if i == sentences.len() - 1 {
1483 let ends_with_punct = ends_with_sentence_punct(trimmed);
1485
1486 let mut line = String::new();
1487 line.push_str(marker);
1488 line.push_str(trimmed);
1489 line.push_str(marker);
1490
1491 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1492 lines.push(line);
1493 } else {
1494 *current_line = line;
1496 }
1497 } else {
1498 let mut line = String::new();
1500 line.push_str(marker);
1501 line.push_str(trimmed);
1502 line.push_str(marker);
1503 lines.push(line);
1504 }
1505 }
1506 }
1507}
1508
1509const BREAK_WORDS: &[&str] = &[
1513 "and",
1514 "or",
1515 "but",
1516 "nor",
1517 "yet",
1518 "so",
1519 "for",
1520 "which",
1521 "that",
1522 "because",
1523 "when",
1524 "if",
1525 "while",
1526 "where",
1527 "although",
1528 "though",
1529 "unless",
1530 "since",
1531 "after",
1532 "before",
1533 "until",
1534 "as",
1535 "once",
1536 "whether",
1537 "however",
1538 "therefore",
1539 "moreover",
1540 "furthermore",
1541 "nevertheless",
1542 "whereas",
1543];
1544
1545fn is_clause_punctuation(c: char) -> bool {
1547 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1549
1550fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1554 let mut spans = Vec::new();
1555 let mut offset = 0;
1556 for element in elements {
1557 let rendered = format!("{element}");
1558 let len = rendered.len();
1559 if !matches!(element, Element::Text(_)) {
1560 spans.push((offset, offset + len));
1561 }
1562 offset += len;
1563 }
1564 spans
1565}
1566
1567fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1569 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1570}
1571
1572const MIN_SPLIT_RATIO: f64 = 0.3;
1575
1576fn split_at_clause_punctuation(
1580 text: &str,
1581 line_length: usize,
1582 element_spans: &[(usize, usize)],
1583 length_mode: ReflowLengthMode,
1584) -> Option<(String, String)> {
1585 let chars: Vec<char> = text.chars().collect();
1586 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1587
1588 let mut width_acc = 0;
1590 let mut search_end_char = 0;
1591 for (idx, &c) in chars.iter().enumerate() {
1592 let c_width = display_len(&c.to_string(), length_mode);
1593 if width_acc + c_width > line_length {
1594 break;
1595 }
1596 width_acc += c_width;
1597 search_end_char = idx + 1;
1598 }
1599
1600 let mut best_pos = None;
1601 for i in (0..search_end_char).rev() {
1602 if is_clause_punctuation(chars[i]) {
1603 let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1605 if !is_inside_element(byte_pos, element_spans) {
1606 best_pos = Some(i);
1607 break;
1608 }
1609 }
1610 }
1611
1612 let pos = best_pos?;
1613
1614 let first: String = chars[..=pos].iter().collect();
1616 let first_display_len = display_len(&first, length_mode);
1617 if first_display_len < min_first_len {
1618 return None;
1619 }
1620
1621 let rest: String = chars[pos + 1..].iter().collect();
1623 let rest = rest.trim_start().to_string();
1624
1625 if rest.is_empty() {
1626 return None;
1627 }
1628
1629 Some((first, rest))
1630}
1631
1632fn split_at_break_word(
1636 text: &str,
1637 line_length: usize,
1638 element_spans: &[(usize, usize)],
1639 length_mode: ReflowLengthMode,
1640) -> Option<(String, String)> {
1641 let lower = text.to_lowercase();
1642 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1643 let mut best_split: Option<(usize, usize)> = None; for &word in BREAK_WORDS {
1646 let mut search_start = 0;
1647 while let Some(pos) = lower[search_start..].find(word) {
1648 let abs_pos = search_start + pos;
1649
1650 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1652 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1653
1654 if preceded_by_space && followed_by_space {
1655 let first_part = text[..abs_pos].trim_end();
1657 let first_part_len = display_len(first_part, length_mode);
1658
1659 if first_part_len >= min_first_len
1660 && first_part_len <= line_length
1661 && !is_inside_element(abs_pos, element_spans)
1662 {
1663 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1665 best_split = Some((abs_pos, word.len()));
1666 }
1667 }
1668 }
1669
1670 search_start = abs_pos + word.len();
1671 }
1672 }
1673
1674 let (byte_start, _word_len) = best_split?;
1675
1676 let first = text[..byte_start].trim_end().to_string();
1677 let rest = text[byte_start..].to_string();
1678
1679 if first.is_empty() || rest.trim().is_empty() {
1680 return None;
1681 }
1682
1683 Some((first, rest))
1684}
1685
1686fn cascade_split_line(
1689 text: &str,
1690 line_length: usize,
1691 abbreviations: &Option<Vec<String>>,
1692 length_mode: ReflowLengthMode,
1693 attr_lists: bool,
1694) -> Vec<String> {
1695 if line_length == 0 || display_len(text, length_mode) <= line_length {
1696 return vec![text.to_string()];
1697 }
1698
1699 let elements = parse_markdown_elements_inner(text, attr_lists);
1700 let element_spans = compute_element_spans(&elements);
1701
1702 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
1704 let mut result = vec![first];
1705 result.extend(cascade_split_line(
1706 &rest,
1707 line_length,
1708 abbreviations,
1709 length_mode,
1710 attr_lists,
1711 ));
1712 return result;
1713 }
1714
1715 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
1717 let mut result = vec![first];
1718 result.extend(cascade_split_line(
1719 &rest,
1720 line_length,
1721 abbreviations,
1722 length_mode,
1723 attr_lists,
1724 ));
1725 return result;
1726 }
1727
1728 let options = ReflowOptions {
1730 line_length,
1731 break_on_sentences: false,
1732 preserve_breaks: false,
1733 sentence_per_line: false,
1734 semantic_line_breaks: false,
1735 abbreviations: abbreviations.clone(),
1736 length_mode,
1737 attr_lists,
1738 require_sentence_capital: true,
1739 };
1740 reflow_elements(&elements, &options)
1741}
1742
1743fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1747 let sentence_lines =
1749 reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
1750
1751 if options.line_length == 0 {
1754 return sentence_lines;
1755 }
1756
1757 let length_mode = options.length_mode;
1758 let mut result = Vec::new();
1759 for line in sentence_lines {
1760 if display_len(&line, length_mode) <= options.line_length {
1761 result.push(line);
1762 } else {
1763 result.extend(cascade_split_line(
1764 &line,
1765 options.line_length,
1766 &options.abbreviations,
1767 length_mode,
1768 options.attr_lists,
1769 ));
1770 }
1771 }
1772
1773 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1776 let mut merged: Vec<String> = Vec::with_capacity(result.len());
1777 for line in result {
1778 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
1779 let prev_ends_at_sentence = {
1781 let trimmed = merged.last().unwrap().trim_end();
1782 trimmed
1783 .chars()
1784 .rev()
1785 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1786 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1787 };
1788
1789 if !prev_ends_at_sentence {
1790 let prev = merged.last_mut().unwrap();
1791 let combined = format!("{prev} {line}");
1792 if display_len(&combined, length_mode) <= options.line_length {
1794 *prev = combined;
1795 continue;
1796 }
1797 }
1798 }
1799 merged.push(line);
1800 }
1801 merged
1802}
1803
1804fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
1812 line.char_indices()
1813 .rev()
1814 .map(|(pos, _)| pos)
1815 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
1816}
1817
1818fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1820 let mut lines = Vec::new();
1821 let mut current_line = String::new();
1822 let mut current_length = 0;
1823 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
1825 let length_mode = options.length_mode;
1826
1827 for (idx, element) in elements.iter().enumerate() {
1828 let element_str = format!("{element}");
1829 let element_len = element.display_width(length_mode);
1830
1831 let is_adjacent_to_prev = if idx > 0 {
1837 match (&elements[idx - 1], element) {
1838 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1839 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1840 _ => true,
1841 }
1842 } else {
1843 false
1844 };
1845
1846 if let Element::Text(text) = element {
1848 let has_leading_space = text.starts_with(char::is_whitespace);
1850 let words: Vec<&str> = text.split_whitespace().collect();
1852
1853 for (i, word) in words.iter().enumerate() {
1854 let word_len = display_len(word, length_mode);
1855 let is_trailing_punct = word
1857 .chars()
1858 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1859
1860 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1863
1864 if is_first_adjacent {
1865 if current_length + word_len > options.line_length && current_length > 0 {
1867 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1870 let before = current_line[..last_space].trim_end().to_string();
1871 let after = current_line[last_space + 1..].to_string();
1872 lines.push(before);
1873 current_line = format!("{after}{word}");
1874 current_length = display_len(¤t_line, length_mode);
1875 current_line_element_spans.clear();
1876 } else {
1877 current_line.push_str(word);
1878 current_length += word_len;
1879 }
1880 } else {
1881 current_line.push_str(word);
1882 current_length += word_len;
1883 }
1884 } else if current_length > 0
1885 && current_length + 1 + word_len > options.line_length
1886 && !is_trailing_punct
1887 {
1888 lines.push(current_line.trim().to_string());
1890 current_line = word.to_string();
1891 current_length = word_len;
1892 current_line_element_spans.clear();
1893 } else {
1894 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1898 current_line.push(' ');
1899 current_length += 1;
1900 }
1901 current_line.push_str(word);
1902 current_length += word_len;
1903 }
1904 }
1905 } else if matches!(
1906 element,
1907 Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
1908 ) && element_len > options.line_length
1909 {
1910 let (content, marker): (&str, &str) = match element {
1914 Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
1915 Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
1916 Element::Strikethrough(content) => (content.as_str(), "~~"),
1917 _ => unreachable!(),
1918 };
1919
1920 let words: Vec<&str> = content.split_whitespace().collect();
1921 let n = words.len();
1922
1923 if n == 0 {
1924 let full = format!("{marker}{marker}");
1926 let full_len = display_len(&full, length_mode);
1927 if !is_adjacent_to_prev && current_length > 0 {
1928 current_line.push(' ');
1929 current_length += 1;
1930 }
1931 current_line.push_str(&full);
1932 current_length += full_len;
1933 } else {
1934 for (i, word) in words.iter().enumerate() {
1935 let is_first = i == 0;
1936 let is_last = i == n - 1;
1937 let word_str: String = match (is_first, is_last) {
1938 (true, true) => format!("{marker}{word}{marker}"),
1939 (true, false) => format!("{marker}{word}"),
1940 (false, true) => format!("{word}{marker}"),
1941 (false, false) => word.to_string(),
1942 };
1943 let word_len = display_len(&word_str, length_mode);
1944
1945 let needs_space = if is_first {
1946 !is_adjacent_to_prev && current_length > 0
1947 } else {
1948 current_length > 0
1949 };
1950
1951 if needs_space && current_length + 1 + word_len > options.line_length {
1952 lines.push(current_line.trim_end().to_string());
1953 current_line = word_str;
1954 current_length = word_len;
1955 current_line_element_spans.clear();
1956 } else {
1957 if needs_space {
1958 current_line.push(' ');
1959 current_length += 1;
1960 }
1961 current_line.push_str(&word_str);
1962 current_length += word_len;
1963 }
1964 }
1965 }
1966 } else {
1967 if is_adjacent_to_prev {
1971 if current_length + element_len > options.line_length {
1973 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1976 let before = current_line[..last_space].trim_end().to_string();
1977 let after = current_line[last_space + 1..].to_string();
1978 lines.push(before);
1979 current_line = format!("{after}{element_str}");
1980 current_length = display_len(¤t_line, length_mode);
1981 current_line_element_spans.clear();
1982 let start = after.len();
1984 current_line_element_spans.push((start, start + element_str.len()));
1985 } else {
1986 let start = current_line.len();
1988 current_line.push_str(&element_str);
1989 current_length += element_len;
1990 current_line_element_spans.push((start, current_line.len()));
1991 }
1992 } else {
1993 let start = current_line.len();
1994 current_line.push_str(&element_str);
1995 current_length += element_len;
1996 current_line_element_spans.push((start, current_line.len()));
1997 }
1998 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
1999 lines.push(current_line.trim().to_string());
2001 current_line = element_str.clone();
2002 current_length = element_len;
2003 current_line_element_spans.clear();
2004 current_line_element_spans.push((0, element_str.len()));
2005 } else {
2006 let ends_with_opener =
2008 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
2009 if current_length > 0 && !ends_with_opener {
2010 current_line.push(' ');
2011 current_length += 1;
2012 }
2013 let start = current_line.len();
2014 current_line.push_str(&element_str);
2015 current_length += element_len;
2016 current_line_element_spans.push((start, current_line.len()));
2017 }
2018 }
2019 }
2020
2021 if !current_line.is_empty() {
2023 lines.push(current_line.trim_end().to_string());
2024 }
2025
2026 lines
2027}
2028
2029pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
2031 let lines: Vec<&str> = content.lines().collect();
2032 let mut result = Vec::new();
2033 let mut i = 0;
2034
2035 while i < lines.len() {
2036 let line = lines[i];
2037 let trimmed = line.trim();
2038
2039 if trimmed.is_empty() {
2041 result.push(String::new());
2042 i += 1;
2043 continue;
2044 }
2045
2046 if trimmed.starts_with('#') {
2048 result.push(line.to_string());
2049 i += 1;
2050 continue;
2051 }
2052
2053 if trimmed.starts_with(":::") {
2055 result.push(line.to_string());
2056 i += 1;
2057 continue;
2058 }
2059
2060 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2062 result.push(line.to_string());
2063 i += 1;
2064 while i < lines.len() {
2066 result.push(lines[i].to_string());
2067 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2068 i += 1;
2069 break;
2070 }
2071 i += 1;
2072 }
2073 continue;
2074 }
2075
2076 if ElementCache::calculate_indentation_width_default(line) >= 4 {
2078 result.push(line.to_string());
2080 i += 1;
2081 while i < lines.len() {
2082 let next_line = lines[i];
2083 if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2085 result.push(next_line.to_string());
2086 i += 1;
2087 } else {
2088 break;
2089 }
2090 }
2091 continue;
2092 }
2093
2094 if trimmed.starts_with('>') {
2096 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2099 let quote_prefix = line[0..gt_pos + 1].to_string();
2100 let quote_content = &line[quote_prefix.len()..].trim_start();
2101
2102 let reflowed = reflow_line(quote_content, options);
2103 for reflowed_line in reflowed.iter() {
2104 result.push(format!("{quote_prefix} {reflowed_line}"));
2105 }
2106 i += 1;
2107 continue;
2108 }
2109
2110 if is_horizontal_rule(trimmed) {
2112 result.push(line.to_string());
2113 i += 1;
2114 continue;
2115 }
2116
2117 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2119 let indent = line.len() - line.trim_start().len();
2121 let indent_str = " ".repeat(indent);
2122
2123 let mut marker_end = indent;
2126 let mut content_start = indent;
2127
2128 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
2129 if let Some(period_pos) = line[indent..].find('.') {
2131 marker_end = indent + period_pos + 1; content_start = marker_end;
2133 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2137 content_start += 1;
2138 }
2139 }
2140 } else {
2141 marker_end = indent + 1; content_start = marker_end;
2144 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2148 content_start += 1;
2149 }
2150 }
2151
2152 let marker = &line[indent..marker_end];
2153
2154 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2157 i += 1;
2158
2159 while i < lines.len() {
2161 let next_line = lines[i];
2162 let next_trimmed = next_line.trim();
2163
2164 if is_block_boundary(next_trimmed) {
2166 break;
2167 }
2168
2169 let next_indent = next_line.len() - next_line.trim_start().len();
2171 if next_indent >= content_start {
2172 let trimmed_start = next_line.trim_start();
2175 list_content.push(trim_preserving_hard_break(trimmed_start));
2176 i += 1;
2177 } else {
2178 break;
2180 }
2181 }
2182
2183 let combined_content = if options.preserve_breaks {
2186 list_content[0].clone()
2187 } else {
2188 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2190 if has_hard_breaks {
2191 list_content.join("\n")
2193 } else {
2194 list_content.join(" ")
2196 }
2197 };
2198
2199 let trimmed_marker = marker;
2201 let continuation_spaces = content_start;
2202
2203 let prefix_length = indent + trimmed_marker.len() + 1;
2205
2206 let adjusted_options = ReflowOptions {
2208 line_length: options.line_length.saturating_sub(prefix_length),
2209 ..options.clone()
2210 };
2211
2212 let reflowed = reflow_line(&combined_content, &adjusted_options);
2213 for (j, reflowed_line) in reflowed.iter().enumerate() {
2214 if j == 0 {
2215 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2216 } else {
2217 let continuation_indent = " ".repeat(continuation_spaces);
2219 result.push(format!("{continuation_indent}{reflowed_line}"));
2220 }
2221 }
2222 continue;
2223 }
2224
2225 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2227 result.push(line.to_string());
2228 i += 1;
2229 continue;
2230 }
2231
2232 if trimmed.starts_with('[') && line.contains("]:") {
2234 result.push(line.to_string());
2235 i += 1;
2236 continue;
2237 }
2238
2239 if is_definition_list_item(trimmed) {
2241 result.push(line.to_string());
2242 i += 1;
2243 continue;
2244 }
2245
2246 let mut is_single_line_paragraph = true;
2248 if i + 1 < lines.len() {
2249 let next_trimmed = lines[i + 1].trim();
2250 if !is_block_boundary(next_trimmed) {
2252 is_single_line_paragraph = false;
2253 }
2254 }
2255
2256 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2258 result.push(line.to_string());
2259 i += 1;
2260 continue;
2261 }
2262
2263 let mut paragraph_parts = Vec::new();
2265 let mut current_part = vec![line];
2266 i += 1;
2267
2268 if options.preserve_breaks {
2270 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2272 Some("\\")
2273 } else if line.ends_with(" ") {
2274 Some(" ")
2275 } else {
2276 None
2277 };
2278 let reflowed = reflow_line(line, options);
2279
2280 if let Some(break_marker) = hard_break_type {
2282 if !reflowed.is_empty() {
2283 let mut reflowed_with_break = reflowed;
2284 let last_idx = reflowed_with_break.len() - 1;
2285 if !has_hard_break(&reflowed_with_break[last_idx]) {
2286 reflowed_with_break[last_idx].push_str(break_marker);
2287 }
2288 result.extend(reflowed_with_break);
2289 }
2290 } else {
2291 result.extend(reflowed);
2292 }
2293 } else {
2294 while i < lines.len() {
2296 let prev_line = if !current_part.is_empty() {
2297 current_part.last().unwrap()
2298 } else {
2299 ""
2300 };
2301 let next_line = lines[i];
2302 let next_trimmed = next_line.trim();
2303
2304 if is_block_boundary(next_trimmed) {
2306 break;
2307 }
2308
2309 let prev_trimmed = prev_line.trim();
2312 let abbreviations = get_abbreviations(&options.abbreviations);
2313 let ends_with_sentence = (prev_trimmed.ends_with('.')
2314 || prev_trimmed.ends_with('!')
2315 || prev_trimmed.ends_with('?')
2316 || prev_trimmed.ends_with(".*")
2317 || prev_trimmed.ends_with("!*")
2318 || prev_trimmed.ends_with("?*")
2319 || prev_trimmed.ends_with("._")
2320 || prev_trimmed.ends_with("!_")
2321 || prev_trimmed.ends_with("?_")
2322 || prev_trimmed.ends_with(".\"")
2324 || prev_trimmed.ends_with("!\"")
2325 || prev_trimmed.ends_with("?\"")
2326 || prev_trimmed.ends_with(".'")
2327 || prev_trimmed.ends_with("!'")
2328 || prev_trimmed.ends_with("?'")
2329 || prev_trimmed.ends_with(".\u{201D}")
2330 || prev_trimmed.ends_with("!\u{201D}")
2331 || prev_trimmed.ends_with("?\u{201D}")
2332 || prev_trimmed.ends_with(".\u{2019}")
2333 || prev_trimmed.ends_with("!\u{2019}")
2334 || prev_trimmed.ends_with("?\u{2019}"))
2335 && !text_ends_with_abbreviation(
2336 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2337 &abbreviations,
2338 );
2339
2340 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2341 paragraph_parts.push(current_part.join(" "));
2343 current_part = vec![next_line];
2344 } else {
2345 current_part.push(next_line);
2346 }
2347 i += 1;
2348 }
2349
2350 if !current_part.is_empty() {
2352 if current_part.len() == 1 {
2353 paragraph_parts.push(current_part[0].to_string());
2355 } else {
2356 paragraph_parts.push(current_part.join(" "));
2357 }
2358 }
2359
2360 for (j, part) in paragraph_parts.iter().enumerate() {
2362 let reflowed = reflow_line(part, options);
2363 result.extend(reflowed);
2364
2365 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2369 let last_idx = result.len() - 1;
2370 if !has_hard_break(&result[last_idx]) {
2371 result[last_idx].push_str(" ");
2372 }
2373 }
2374 }
2375 }
2376 }
2377
2378 let result_text = result.join("\n");
2380 if content.ends_with('\n') && !result_text.ends_with('\n') {
2381 format!("{result_text}\n")
2382 } else {
2383 result_text
2384 }
2385}
2386
2387#[derive(Debug, Clone)]
2389pub struct ParagraphReflow {
2390 pub start_byte: usize,
2392 pub end_byte: usize,
2394 pub reflowed_text: String,
2396}
2397
2398#[derive(Debug, Clone)]
2404pub struct BlockquoteLineData {
2405 pub(crate) content: String,
2407 pub(crate) is_explicit: bool,
2409 pub(crate) prefix: Option<String>,
2411}
2412
2413impl BlockquoteLineData {
2414 pub fn explicit(content: String, prefix: String) -> Self {
2416 Self {
2417 content,
2418 is_explicit: true,
2419 prefix: Some(prefix),
2420 }
2421 }
2422
2423 pub fn lazy(content: String) -> Self {
2425 Self {
2426 content,
2427 is_explicit: false,
2428 prefix: None,
2429 }
2430 }
2431}
2432
2433#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2435pub enum BlockquoteContinuationStyle {
2436 Explicit,
2437 Lazy,
2438}
2439
2440pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2448 let mut explicit_count = 0usize;
2449 let mut lazy_count = 0usize;
2450
2451 for line in lines.iter().skip(1) {
2452 if line.is_explicit {
2453 explicit_count += 1;
2454 } else {
2455 lazy_count += 1;
2456 }
2457 }
2458
2459 if explicit_count > 0 && lazy_count == 0 {
2460 BlockquoteContinuationStyle::Explicit
2461 } else if lazy_count > 0 && explicit_count == 0 {
2462 BlockquoteContinuationStyle::Lazy
2463 } else if explicit_count >= lazy_count {
2464 BlockquoteContinuationStyle::Explicit
2465 } else {
2466 BlockquoteContinuationStyle::Lazy
2467 }
2468}
2469
2470pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2475 let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2476
2477 for (idx, line) in lines.iter().enumerate() {
2478 let Some(prefix) = line.prefix.as_ref() else {
2479 continue;
2480 };
2481 counts
2482 .entry(prefix.clone())
2483 .and_modify(|entry| entry.0 += 1)
2484 .or_insert((1, idx));
2485 }
2486
2487 counts
2488 .into_iter()
2489 .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2490 count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2491 })
2492 .map(|(prefix, _)| prefix)
2493 .unwrap_or_else(|| fallback.to_string())
2494}
2495
2496pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2501 let trimmed = content_line.trim_start();
2502 trimmed.starts_with('>')
2503 || trimmed.starts_with('#')
2504 || trimmed.starts_with("```")
2505 || trimmed.starts_with("~~~")
2506 || is_unordered_list_marker(trimmed)
2507 || is_numbered_list_item(trimmed)
2508 || is_horizontal_rule(trimmed)
2509 || is_definition_list_item(trimmed)
2510 || (trimmed.starts_with('[') && trimmed.contains("]:"))
2511 || trimmed.starts_with(":::")
2512 || (trimmed.starts_with('<')
2513 && !trimmed.starts_with("<http")
2514 && !trimmed.starts_with("<https")
2515 && !trimmed.starts_with("<mailto:"))
2516}
2517
2518pub fn reflow_blockquote_content(
2527 lines: &[BlockquoteLineData],
2528 explicit_prefix: &str,
2529 continuation_style: BlockquoteContinuationStyle,
2530 options: &ReflowOptions,
2531) -> Vec<String> {
2532 let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2533 let segments = split_into_segments_strs(&content_strs);
2534 let mut reflowed_content_lines: Vec<String> = Vec::new();
2535
2536 for segment in segments {
2537 let hard_break_type = segment.last().and_then(|&line| {
2538 let line = line.strip_suffix('\r').unwrap_or(line);
2539 if line.ends_with('\\') {
2540 Some("\\")
2541 } else if line.ends_with(" ") {
2542 Some(" ")
2543 } else {
2544 None
2545 }
2546 });
2547
2548 let pieces: Vec<&str> = segment
2549 .iter()
2550 .map(|&line| {
2551 if let Some(l) = line.strip_suffix('\\') {
2552 l.trim_end()
2553 } else if let Some(l) = line.strip_suffix(" ") {
2554 l.trim_end()
2555 } else {
2556 line.trim_end()
2557 }
2558 })
2559 .collect();
2560
2561 let segment_text = pieces.join(" ");
2562 let segment_text = segment_text.trim();
2563 if segment_text.is_empty() {
2564 continue;
2565 }
2566
2567 let mut reflowed = reflow_line(segment_text, options);
2568 if let Some(break_marker) = hard_break_type
2569 && !reflowed.is_empty()
2570 {
2571 let last_idx = reflowed.len() - 1;
2572 if !has_hard_break(&reflowed[last_idx]) {
2573 reflowed[last_idx].push_str(break_marker);
2574 }
2575 }
2576 reflowed_content_lines.extend(reflowed);
2577 }
2578
2579 let mut styled_lines: Vec<String> = Vec::new();
2580 for (idx, line) in reflowed_content_lines.iter().enumerate() {
2581 let force_explicit = idx == 0
2582 || continuation_style == BlockquoteContinuationStyle::Explicit
2583 || should_force_explicit_blockquote_line(line);
2584 if force_explicit {
2585 styled_lines.push(format!("{explicit_prefix}{line}"));
2586 } else {
2587 styled_lines.push(line.clone());
2588 }
2589 }
2590
2591 styled_lines
2592}
2593
2594fn is_blockquote_content_boundary(content: &str) -> bool {
2595 let trimmed = content.trim();
2596 trimmed.is_empty()
2597 || is_block_boundary(trimmed)
2598 || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2599 || trimmed.starts_with(":::")
2600 || crate::utils::is_template_directive_only(content)
2601 || is_standalone_attr_list(content)
2602 || is_snippet_block_delimiter(content)
2603}
2604
2605fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2606 let mut segments = Vec::new();
2607 let mut current = Vec::new();
2608
2609 for &line in lines {
2610 current.push(line);
2611 if has_hard_break(line) {
2612 segments.push(current);
2613 current = Vec::new();
2614 }
2615 }
2616
2617 if !current.is_empty() {
2618 segments.push(current);
2619 }
2620
2621 segments
2622}
2623
2624fn reflow_blockquote_paragraph_at_line(
2625 content: &str,
2626 lines: &[&str],
2627 target_idx: usize,
2628 options: &ReflowOptions,
2629) -> Option<ParagraphReflow> {
2630 let mut anchor_idx = target_idx;
2631 let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2632 parsed.nesting_level
2633 } else {
2634 let mut found = None;
2635 let mut idx = target_idx;
2636 loop {
2637 if lines[idx].trim().is_empty() {
2638 break;
2639 }
2640 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2641 found = Some((idx, parsed.nesting_level));
2642 break;
2643 }
2644 if idx == 0 {
2645 break;
2646 }
2647 idx -= 1;
2648 }
2649 let (idx, level) = found?;
2650 anchor_idx = idx;
2651 level
2652 };
2653
2654 let mut para_start = anchor_idx;
2656 while para_start > 0 {
2657 let prev_idx = para_start - 1;
2658 let prev_line = lines[prev_idx];
2659
2660 if prev_line.trim().is_empty() {
2661 break;
2662 }
2663
2664 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2665 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2666 break;
2667 }
2668 para_start = prev_idx;
2669 continue;
2670 }
2671
2672 let prev_lazy = prev_line.trim_start();
2673 if is_blockquote_content_boundary(prev_lazy) {
2674 break;
2675 }
2676 para_start = prev_idx;
2677 }
2678
2679 while para_start < lines.len() {
2681 let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
2682 para_start += 1;
2683 continue;
2684 };
2685 target_level = parsed.nesting_level;
2686 break;
2687 }
2688
2689 if para_start >= lines.len() || para_start > target_idx {
2690 return None;
2691 }
2692
2693 let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
2696 let mut idx = para_start;
2697 while idx < lines.len() {
2698 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
2699 break;
2700 }
2701
2702 let line = lines[idx];
2703 if line.trim().is_empty() {
2704 break;
2705 }
2706
2707 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
2708 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2709 break;
2710 }
2711 collected.push((
2712 idx,
2713 BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
2714 ));
2715 idx += 1;
2716 continue;
2717 }
2718
2719 let lazy_content = line.trim_start();
2720 if is_blockquote_content_boundary(lazy_content) {
2721 break;
2722 }
2723
2724 collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
2725 idx += 1;
2726 }
2727
2728 if collected.is_empty() {
2729 return None;
2730 }
2731
2732 let para_end = collected[collected.len() - 1].0;
2733 if target_idx < para_start || target_idx > para_end {
2734 return None;
2735 }
2736
2737 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
2738
2739 let fallback_prefix = line_data
2740 .iter()
2741 .find_map(|d| d.prefix.clone())
2742 .unwrap_or_else(|| "> ".to_string());
2743 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
2744 let continuation_style = blockquote_continuation_style(&line_data);
2745
2746 let adjusted_line_length = options
2747 .line_length
2748 .saturating_sub(display_len(&explicit_prefix, options.length_mode))
2749 .max(1);
2750
2751 let adjusted_options = ReflowOptions {
2752 line_length: adjusted_line_length,
2753 ..options.clone()
2754 };
2755
2756 let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
2757
2758 if styled_lines.is_empty() {
2759 return None;
2760 }
2761
2762 let mut start_byte = 0;
2764 for line in lines.iter().take(para_start) {
2765 start_byte += line.len() + 1;
2766 }
2767
2768 let mut end_byte = start_byte;
2769 for line in lines.iter().take(para_end + 1).skip(para_start) {
2770 end_byte += line.len() + 1;
2771 }
2772
2773 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2774 if !includes_trailing_newline {
2775 end_byte -= 1;
2776 }
2777
2778 let reflowed_joined = styled_lines.join("\n");
2779 let reflowed_text = if includes_trailing_newline {
2780 if reflowed_joined.ends_with('\n') {
2781 reflowed_joined
2782 } else {
2783 format!("{reflowed_joined}\n")
2784 }
2785 } else if reflowed_joined.ends_with('\n') {
2786 reflowed_joined.trim_end_matches('\n').to_string()
2787 } else {
2788 reflowed_joined
2789 };
2790
2791 Some(ParagraphReflow {
2792 start_byte,
2793 end_byte,
2794 reflowed_text,
2795 })
2796}
2797
2798pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2816 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
2817}
2818
2819pub fn reflow_paragraph_at_line_with_mode(
2821 content: &str,
2822 line_number: usize,
2823 line_length: usize,
2824 length_mode: ReflowLengthMode,
2825) -> Option<ParagraphReflow> {
2826 let options = ReflowOptions {
2827 line_length,
2828 length_mode,
2829 ..Default::default()
2830 };
2831 reflow_paragraph_at_line_with_options(content, line_number, &options)
2832}
2833
2834pub fn reflow_paragraph_at_line_with_options(
2845 content: &str,
2846 line_number: usize,
2847 options: &ReflowOptions,
2848) -> Option<ParagraphReflow> {
2849 if line_number == 0 {
2850 return None;
2851 }
2852
2853 let lines: Vec<&str> = content.lines().collect();
2854
2855 if line_number > lines.len() {
2857 return None;
2858 }
2859
2860 let target_idx = line_number - 1; let target_line = lines[target_idx];
2862 let trimmed = target_line.trim();
2863
2864 if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
2867 return Some(blockquote_reflow);
2868 }
2869
2870 if is_paragraph_boundary(trimmed, target_line) {
2872 return None;
2873 }
2874
2875 let mut para_start = target_idx;
2877 while para_start > 0 {
2878 let prev_idx = para_start - 1;
2879 let prev_line = lines[prev_idx];
2880 let prev_trimmed = prev_line.trim();
2881
2882 if is_paragraph_boundary(prev_trimmed, prev_line) {
2884 break;
2885 }
2886
2887 para_start = prev_idx;
2888 }
2889
2890 let mut para_end = target_idx;
2892 while para_end + 1 < lines.len() {
2893 let next_idx = para_end + 1;
2894 let next_line = lines[next_idx];
2895 let next_trimmed = next_line.trim();
2896
2897 if is_paragraph_boundary(next_trimmed, next_line) {
2899 break;
2900 }
2901
2902 para_end = next_idx;
2903 }
2904
2905 let paragraph_lines = &lines[para_start..=para_end];
2907
2908 let mut start_byte = 0;
2910 for line in lines.iter().take(para_start) {
2911 start_byte += line.len() + 1; }
2913
2914 let mut end_byte = start_byte;
2915 for line in paragraph_lines.iter() {
2916 end_byte += line.len() + 1; }
2918
2919 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2922
2923 if !includes_trailing_newline {
2925 end_byte -= 1;
2926 }
2927
2928 let paragraph_text = paragraph_lines.join("\n");
2930
2931 let reflowed = reflow_markdown(¶graph_text, options);
2933
2934 let reflowed_text = if includes_trailing_newline {
2938 if reflowed.ends_with('\n') {
2940 reflowed
2941 } else {
2942 format!("{reflowed}\n")
2943 }
2944 } else {
2945 if reflowed.ends_with('\n') {
2947 reflowed.trim_end_matches('\n').to_string()
2948 } else {
2949 reflowed
2950 }
2951 };
2952
2953 Some(ParagraphReflow {
2954 start_byte,
2955 end_byte,
2956 reflowed_text,
2957 })
2958}
2959
2960#[cfg(test)]
2961mod tests {
2962 use super::*;
2963
2964 #[test]
2969 fn test_helper_function_text_ends_with_abbreviation() {
2970 let abbreviations = get_abbreviations(&None);
2972
2973 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2975 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2976 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2977 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2978 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2979 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2980 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2981 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2982
2983 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2985 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2986 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2987 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2988 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2989 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
2995
2996 #[test]
2997 fn test_is_unordered_list_marker() {
2998 assert!(is_unordered_list_marker("- item"));
3000 assert!(is_unordered_list_marker("* item"));
3001 assert!(is_unordered_list_marker("+ item"));
3002 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
3004 assert!(is_unordered_list_marker("+"));
3005
3006 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
3017
3018 #[test]
3019 fn test_is_block_boundary() {
3020 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
3042 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
3045 }
3046
3047 #[test]
3048 fn test_definition_list_boundary_in_single_line_paragraph() {
3049 let options = ReflowOptions {
3052 line_length: 80,
3053 ..Default::default()
3054 };
3055 let input = "Term\n: Definition of the term";
3056 let result = reflow_markdown(input, &options);
3057 assert!(
3059 result.contains(": Definition"),
3060 "Definition list item should not be merged into previous line. Got: {result:?}"
3061 );
3062 let lines: Vec<&str> = result.lines().collect();
3063 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3064 assert_eq!(lines[0], "Term");
3065 assert_eq!(lines[1], ": Definition of the term");
3066 }
3067
3068 #[test]
3069 fn test_is_paragraph_boundary() {
3070 assert!(is_paragraph_boundary("# Heading", "# Heading"));
3072 assert!(is_paragraph_boundary("- item", "- item"));
3073 assert!(is_paragraph_boundary(":::", ":::"));
3074 assert!(is_paragraph_boundary(": definition", ": definition"));
3075
3076 assert!(is_paragraph_boundary("code", " code"));
3078 assert!(is_paragraph_boundary("code", "\tcode"));
3079
3080 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3082 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
3086 assert!(!is_paragraph_boundary("text", " text")); }
3088
3089 #[test]
3090 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3091 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3094 let result = reflow_paragraph_at_line(content, 3, 80);
3096 assert!(result.is_none(), "Div marker line should not be reflowed");
3097 }
3098}