1use crate::utils::calculate_indentation_width_default;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
13 LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
14 SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18 text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27 Chars,
29 #[default]
31 Visual,
32 Bytes,
34}
35
36fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38 match mode {
39 ReflowLengthMode::Chars => s.chars().count(),
40 ReflowLengthMode::Visual => s.width(),
41 ReflowLengthMode::Bytes => s.len(),
42 }
43}
44
45#[derive(Clone)]
47pub struct ReflowOptions {
48 pub line_length: usize,
50 pub break_on_sentences: bool,
52 pub preserve_breaks: bool,
54 pub sentence_per_line: bool,
56 pub semantic_line_breaks: bool,
58 pub abbreviations: Option<Vec<String>>,
62 pub length_mode: ReflowLengthMode,
64 pub attr_lists: bool,
67 pub require_sentence_capital: bool,
72 pub max_list_continuation_indent: Option<usize>,
76}
77
78impl Default for ReflowOptions {
79 fn default() -> Self {
80 Self {
81 line_length: 80,
82 break_on_sentences: true,
83 preserve_breaks: false,
84 sentence_per_line: false,
85 semantic_line_breaks: false,
86 abbreviations: None,
87 length_mode: ReflowLengthMode::default(),
88 attr_lists: false,
89 require_sentence_capital: true,
90 max_list_continuation_indent: None,
91 }
92 }
93}
94
95fn compute_inline_code_mask(text: &str) -> Vec<bool> {
98 let chars: Vec<char> = text.chars().collect();
99 let len = chars.len();
100 let mut mask = vec![false; len];
101 let mut i = 0;
102
103 while i < len {
104 if chars[i] == '`' {
105 let open_start = i;
107 let mut backtick_count = 0;
108 while i < len && chars[i] == '`' {
109 backtick_count += 1;
110 i += 1;
111 }
112
113 let mut found_close = false;
115 let content_start = i;
116 while i < len {
117 if chars[i] == '`' {
118 let close_start = i;
119 let mut close_count = 0;
120 while i < len && chars[i] == '`' {
121 close_count += 1;
122 i += 1;
123 }
124 if close_count == backtick_count {
125 for item in mask.iter_mut().take(close_start).skip(content_start) {
127 *item = true;
128 }
129 for item in mask.iter_mut().take(content_start).skip(open_start) {
131 *item = true;
132 }
133 for item in mask.iter_mut().take(i).skip(close_start) {
134 *item = true;
135 }
136 found_close = true;
137 break;
138 }
139 } else {
140 i += 1;
141 }
142 }
143
144 if !found_close {
145 i = open_start + backtick_count;
147 }
148 } else {
149 i += 1;
150 }
151 }
152
153 mask
154}
155
156fn is_sentence_boundary(
160 text: &str,
161 pos: usize,
162 abbreviations: &HashSet<String>,
163 require_sentence_capital: bool,
164) -> bool {
165 let chars: Vec<char> = text.chars().collect();
166
167 if pos + 1 >= chars.len() {
168 return false;
169 }
170
171 let c = chars[pos];
172 let next_char = chars[pos + 1];
173
174 if is_cjk_sentence_ending(c) {
177 let mut after_punct_pos = pos + 1;
179 while after_punct_pos < chars.len()
180 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
181 {
182 after_punct_pos += 1;
183 }
184
185 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
187 after_punct_pos += 1;
188 }
189
190 if after_punct_pos >= chars.len() {
192 return false;
193 }
194
195 while after_punct_pos < chars.len()
197 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
198 {
199 after_punct_pos += 1;
200 }
201
202 if after_punct_pos >= chars.len() {
203 return false;
204 }
205
206 return true;
209 }
210
211 if c != '.' && c != '!' && c != '?' {
213 return false;
214 }
215
216 let (_space_pos, after_space_pos) = if next_char == ' ' {
218 (pos + 1, pos + 2)
220 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
221 if chars[pos + 2] == ' ' {
223 (pos + 2, pos + 3)
225 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
226 (pos + 3, pos + 4)
228 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
229 && pos + 4 < chars.len()
230 && chars[pos + 3] == chars[pos + 2]
231 && chars[pos + 4] == ' '
232 {
233 (pos + 4, pos + 5)
235 } else {
236 return false;
237 }
238 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
239 (pos + 2, pos + 3)
241 } else if (next_char == '*' || next_char == '_')
242 && pos + 3 < chars.len()
243 && chars[pos + 2] == next_char
244 && chars[pos + 3] == ' '
245 {
246 (pos + 3, pos + 4)
248 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
249 (pos + 3, pos + 4)
251 } else {
252 return false;
253 };
254
255 let mut next_char_pos = after_space_pos;
257 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
258 next_char_pos += 1;
259 }
260
261 if next_char_pos >= chars.len() {
263 return false;
264 }
265
266 let mut first_letter_pos = next_char_pos;
268 while first_letter_pos < chars.len()
269 && (chars[first_letter_pos] == '*'
270 || chars[first_letter_pos] == '_'
271 || chars[first_letter_pos] == '~'
272 || is_opening_quote(chars[first_letter_pos]))
273 {
274 first_letter_pos += 1;
275 }
276
277 if first_letter_pos >= chars.len() {
279 return false;
280 }
281
282 let first_char = chars[first_letter_pos];
283
284 if c == '!' || c == '?' {
286 return true;
287 }
288
289 if pos > 0 {
293 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
295 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
296 return false;
297 }
298
299 if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
301 return false;
302 }
303
304 if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
308 return false;
309 }
310 }
311
312 if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
315 return false;
316 }
317
318 true
319}
320
321pub fn split_into_sentences(text: &str) -> Vec<String> {
323 split_into_sentences_custom(text, &None)
324}
325
326pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
328 let abbreviations = get_abbreviations(custom_abbreviations);
329 split_into_sentences_with_set(text, &abbreviations, true)
330}
331
332fn split_into_sentences_with_set(
335 text: &str,
336 abbreviations: &HashSet<String>,
337 require_sentence_capital: bool,
338) -> Vec<String> {
339 let in_code = compute_inline_code_mask(text);
341
342 let mut sentences = Vec::new();
343 let mut current_sentence = String::new();
344 let mut chars = text.chars().peekable();
345 let mut pos = 0;
346
347 while let Some(c) = chars.next() {
348 current_sentence.push(c);
349
350 if !in_code[pos] && is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
351 while let Some(&next) = chars.peek() {
353 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
354 current_sentence.push(chars.next().unwrap());
355 pos += 1;
356 } else {
357 break;
358 }
359 }
360
361 if chars.peek() == Some(&' ') {
363 chars.next();
364 pos += 1;
365 }
366
367 sentences.push(current_sentence.trim().to_string());
368 current_sentence.clear();
369 }
370
371 pos += 1;
372 }
373
374 if !current_sentence.trim().is_empty() {
376 sentences.push(current_sentence.trim().to_string());
377 }
378 sentences
379}
380
381fn is_horizontal_rule(line: &str) -> bool {
383 if line.len() < 3 {
384 return false;
385 }
386
387 let chars: Vec<char> = line.chars().collect();
389 if chars.is_empty() {
390 return false;
391 }
392
393 let first_char = chars[0];
394 if first_char != '-' && first_char != '_' && first_char != '*' {
395 return false;
396 }
397
398 for c in &chars {
400 if *c != first_char && *c != ' ' {
401 return false;
402 }
403 }
404
405 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
407 non_space_count >= 3
408}
409
410fn is_numbered_list_item(line: &str) -> bool {
412 let mut chars = line.chars();
413
414 if !chars.next().is_some_and(char::is_numeric) {
416 return false;
417 }
418
419 while let Some(c) = chars.next() {
421 if c == '.' {
422 return chars.next() == Some(' ');
425 }
426 if !c.is_numeric() {
427 return false;
428 }
429 }
430
431 false
432}
433
434fn is_unordered_list_marker(s: &str) -> bool {
436 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
437 && !is_horizontal_rule(s)
438 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
439}
440
441fn is_block_boundary_core(trimmed: &str) -> bool {
444 trimmed.is_empty()
445 || trimmed.starts_with('#')
446 || trimmed.starts_with("```")
447 || trimmed.starts_with("~~~")
448 || trimmed.starts_with('>')
449 || (trimmed.starts_with('[') && trimmed.contains("]:"))
450 || is_horizontal_rule(trimmed)
451 || is_unordered_list_marker(trimmed)
452 || is_numbered_list_item(trimmed)
453 || is_definition_list_item(trimmed)
454 || trimmed.starts_with(":::")
455}
456
457fn is_block_boundary(trimmed: &str) -> bool {
460 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
461}
462
463fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
467 is_block_boundary_core(trimmed)
468 || calculate_indentation_width_default(line) >= 4
469 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
470}
471
472fn has_hard_break(line: &str) -> bool {
478 let line = line.strip_suffix('\r').unwrap_or(line);
479 line.ends_with(" ") || line.ends_with('\\')
480}
481
482fn ends_with_sentence_punct(text: &str) -> bool {
484 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
485}
486
487fn trim_preserving_hard_break(s: &str) -> String {
493 let s = s.strip_suffix('\r').unwrap_or(s);
495
496 if s.ends_with('\\') {
498 return s.to_string();
500 }
501
502 if s.ends_with(" ") {
504 let content_end = s.trim_end().len();
506 if content_end == 0 {
507 return String::new();
509 }
510 format!("{} ", &s[..content_end])
512 } else {
513 s.trim_end().to_string()
515 }
516}
517
518fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
520 if options.attr_lists {
521 parse_markdown_elements_with_attr_lists(text)
522 } else {
523 parse_markdown_elements(text)
524 }
525}
526
527pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
528 if options.sentence_per_line {
530 let elements = parse_elements(line, options);
531 return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
532 }
533
534 if options.semantic_line_breaks {
536 let elements = parse_elements(line, options);
537 return reflow_elements_semantic(&elements, options);
538 }
539
540 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
543 return vec![line.to_string()];
544 }
545
546 let elements = parse_elements(line, options);
548
549 reflow_elements(&elements, options)
551}
552
553#[derive(Debug, Clone)]
555enum LinkedImageSource {
556 Inline(String),
558 Reference(String),
560}
561
562#[derive(Debug, Clone)]
564enum LinkedImageTarget {
565 Inline(String),
567 Reference(String),
569}
570
571#[derive(Debug, Clone)]
573enum Element {
574 Text(String),
576 Link { text: String, url: String },
578 ReferenceLink { text: String, reference: String },
580 EmptyReferenceLink { text: String },
582 ShortcutReference { reference: String },
584 InlineImage { alt: String, url: String },
586 ReferenceImage { alt: String, reference: String },
588 EmptyReferenceImage { alt: String },
590 LinkedImage {
596 alt: String,
597 img_source: LinkedImageSource,
598 link_target: LinkedImageTarget,
599 },
600 FootnoteReference { note: String },
602 Strikethrough(String),
604 WikiLink(String),
606 InlineMath(String),
608 DisplayMath(String),
610 EmojiShortcode(String),
612 Autolink(String),
614 HtmlTag(String),
616 HtmlEntity(String),
618 HugoShortcode(String),
620 AttrList(String),
622 Code(String),
624 Bold {
626 content: String,
627 underscore: bool,
629 },
630 Italic {
632 content: String,
633 underscore: bool,
635 },
636}
637
638impl std::fmt::Display for Element {
639 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
640 match self {
641 Element::Text(s) => write!(f, "{s}"),
642 Element::Link { text, url } => write!(f, "[{text}]({url})"),
643 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
644 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
645 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
646 Element::InlineImage { alt, url } => write!(f, ""),
647 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
648 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
649 Element::LinkedImage {
650 alt,
651 img_source,
652 link_target,
653 } => {
654 let img_part = match img_source {
656 LinkedImageSource::Inline(url) => format!(""),
657 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
658 };
659 match link_target {
661 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
662 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
663 }
664 }
665 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
666 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
667 Element::WikiLink(s) => write!(f, "[[{s}]]"),
668 Element::InlineMath(s) => write!(f, "${s}$"),
669 Element::DisplayMath(s) => write!(f, "$${s}$$"),
670 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
671 Element::Autolink(s) => write!(f, "{s}"),
672 Element::HtmlTag(s) => write!(f, "{s}"),
673 Element::HtmlEntity(s) => write!(f, "{s}"),
674 Element::HugoShortcode(s) => write!(f, "{s}"),
675 Element::AttrList(s) => write!(f, "{s}"),
676 Element::Code(s) => write!(f, "`{s}`"),
677 Element::Bold { content, underscore } => {
678 if *underscore {
679 write!(f, "__{content}__")
680 } else {
681 write!(f, "**{content}**")
682 }
683 }
684 Element::Italic { content, underscore } => {
685 if *underscore {
686 write!(f, "_{content}_")
687 } else {
688 write!(f, "*{content}*")
689 }
690 }
691 }
692 }
693}
694
695impl Element {
696 fn display_width(&self, mode: ReflowLengthMode) -> usize {
700 let formatted = format!("{self}");
701 display_len(&formatted, mode)
702 }
703}
704
705#[derive(Debug, Clone)]
707struct EmphasisSpan {
708 start: usize,
710 end: usize,
712 content: String,
714 is_strong: bool,
716 is_strikethrough: bool,
718 uses_underscore: bool,
720}
721
722fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
732 let mut spans = Vec::new();
733 let mut options = Options::empty();
734 options.insert(Options::ENABLE_STRIKETHROUGH);
735
736 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
739 let mut strikethrough_stack: Vec<usize> = Vec::new();
740
741 let parser = Parser::new_ext(text, options).into_offset_iter();
742
743 for (event, range) in parser {
744 match event {
745 Event::Start(Tag::Emphasis) => {
746 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
748 emphasis_stack.push((range.start, uses_underscore));
749 }
750 Event::End(TagEnd::Emphasis) => {
751 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
752 let content_start = start_byte + 1;
754 let content_end = range.end - 1;
755 if content_end > content_start
756 && let Some(content) = text.get(content_start..content_end)
757 {
758 spans.push(EmphasisSpan {
759 start: start_byte,
760 end: range.end,
761 content: content.to_string(),
762 is_strong: false,
763 is_strikethrough: false,
764 uses_underscore,
765 });
766 }
767 }
768 }
769 Event::Start(Tag::Strong) => {
770 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
772 strong_stack.push((range.start, uses_underscore));
773 }
774 Event::End(TagEnd::Strong) => {
775 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
776 let content_start = start_byte + 2;
778 let content_end = range.end - 2;
779 if content_end > content_start
780 && let Some(content) = text.get(content_start..content_end)
781 {
782 spans.push(EmphasisSpan {
783 start: start_byte,
784 end: range.end,
785 content: content.to_string(),
786 is_strong: true,
787 is_strikethrough: false,
788 uses_underscore,
789 });
790 }
791 }
792 }
793 Event::Start(Tag::Strikethrough) => {
794 strikethrough_stack.push(range.start);
795 }
796 Event::End(TagEnd::Strikethrough) => {
797 if let Some(start_byte) = strikethrough_stack.pop() {
798 let content_start = start_byte + 2;
800 let content_end = range.end - 2;
801 if content_end > content_start
802 && let Some(content) = text.get(content_start..content_end)
803 {
804 spans.push(EmphasisSpan {
805 start: start_byte,
806 end: range.end,
807 content: content.to_string(),
808 is_strong: false,
809 is_strikethrough: true,
810 uses_underscore: false,
811 });
812 }
813 }
814 }
815 _ => {}
816 }
817 }
818
819 spans.sort_by_key(|s| s.start);
821 spans
822}
823
824fn parse_markdown_elements(text: &str) -> Vec<Element> {
835 parse_markdown_elements_inner(text, false)
836}
837
838fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
839 parse_markdown_elements_inner(text, true)
840}
841
842fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
843 let mut elements = Vec::new();
844 let mut remaining = text;
845
846 let emphasis_spans = extract_emphasis_spans(text);
848
849 while !remaining.is_empty() {
850 let current_offset = text.len() - remaining.len();
852 let mut earliest_match: Option<(usize, usize, &str)> = None;
855
856 if remaining.contains("[!") {
860 if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
862 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
863 {
864 earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
865 }
866
867 if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
869 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
870 {
871 earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
872 }
873
874 if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
876 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
877 {
878 earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
879 }
880
881 if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
883 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
884 {
885 earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
886 }
887 }
888
889 if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
892 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
893 {
894 earliest_match = Some((m.start(), m.end(), "inline_image"));
895 }
896
897 if let Some(m) = REF_IMAGE_REGEX.find(remaining)
899 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
900 {
901 earliest_match = Some((m.start(), m.end(), "ref_image"));
902 }
903
904 if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
906 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
907 {
908 earliest_match = Some((m.start(), m.end(), "footnote_ref"));
909 }
910
911 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
913 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
914 {
915 earliest_match = Some((m.start(), m.end(), "inline_link"));
916 }
917
918 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
920 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
921 {
922 earliest_match = Some((m.start(), m.end(), "ref_link"));
923 }
924
925 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
928 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
929 {
930 earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
931 }
932
933 if let Some(m) = WIKI_LINK_REGEX.find(remaining)
935 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
936 {
937 earliest_match = Some((m.start(), m.end(), "wiki_link"));
938 }
939
940 if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
942 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
943 {
944 earliest_match = Some((m.start(), m.end(), "display_math"));
945 }
946
947 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
949 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
950 {
951 earliest_match = Some((m.start(), m.end(), "inline_math"));
952 }
953
954 if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
958 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
959 {
960 earliest_match = Some((m.start(), m.end(), "emoji"));
961 }
962
963 if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
965 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
966 {
967 earliest_match = Some((m.start(), m.end(), "html_entity"));
968 }
969
970 if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
973 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
974 {
975 earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
976 }
977
978 if let Some(m) = HTML_TAG_PATTERN.find(remaining)
981 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
982 {
983 let matched_text = &remaining[m.start()..m.end()];
985 let is_url_autolink = matched_text.starts_with("<http://")
986 || matched_text.starts_with("<https://")
987 || matched_text.starts_with("<mailto:")
988 || matched_text.starts_with("<ftp://")
989 || matched_text.starts_with("<ftps://");
990
991 let is_email_autolink = {
994 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
995 EMAIL_PATTERN.is_match(content)
996 };
997
998 if is_url_autolink || is_email_autolink {
999 earliest_match = Some((m.start(), m.end(), "autolink"));
1000 } else {
1001 earliest_match = Some((m.start(), m.end(), "html_tag"));
1002 }
1003 }
1004
1005 let mut next_special = remaining.len();
1007 let mut special_type = "";
1008 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
1009 let mut attr_list_len: usize = 0;
1010
1011 if let Some(pos) = remaining.find('`')
1013 && pos < next_special
1014 {
1015 next_special = pos;
1016 special_type = "code";
1017 }
1018
1019 if attr_lists
1021 && let Some(pos) = remaining.find('{')
1022 && pos < next_special
1023 && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
1024 && m.start() == 0
1025 {
1026 next_special = pos;
1027 special_type = "attr_list";
1028 attr_list_len = m.end();
1029 }
1030
1031 for span in &emphasis_spans {
1034 if span.start >= current_offset && span.start < current_offset + remaining.len() {
1035 let pos_in_remaining = span.start - current_offset;
1036 if pos_in_remaining < next_special {
1037 next_special = pos_in_remaining;
1038 special_type = "pulldown_emphasis";
1039 pulldown_emphasis = Some(span);
1040 }
1041 break; }
1043 }
1044
1045 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
1047 pos < next_special
1048 } else {
1049 false
1050 };
1051
1052 if should_process_markdown_link {
1053 let (pos, match_end, pattern_type) = earliest_match.unwrap();
1054
1055 if pos > 0 {
1057 elements.push(Element::Text(remaining[..pos].to_string()));
1058 }
1059
1060 match pattern_type {
1062 "linked_image_ii" => {
1064 if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
1065 let alt = caps.get(1).map_or("", |m| m.as_str());
1066 let img_url = caps.get(2).map_or("", |m| m.as_str());
1067 let link_url = caps.get(3).map_or("", |m| m.as_str());
1068 elements.push(Element::LinkedImage {
1069 alt: alt.to_string(),
1070 img_source: LinkedImageSource::Inline(img_url.to_string()),
1071 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1072 });
1073 remaining = &remaining[match_end..];
1074 } else {
1075 elements.push(Element::Text("[".to_string()));
1076 remaining = &remaining[1..];
1077 }
1078 }
1079 "linked_image_ri" => {
1081 if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
1082 let alt = caps.get(1).map_or("", |m| m.as_str());
1083 let img_ref = caps.get(2).map_or("", |m| m.as_str());
1084 let link_url = caps.get(3).map_or("", |m| m.as_str());
1085 elements.push(Element::LinkedImage {
1086 alt: alt.to_string(),
1087 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1088 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1089 });
1090 remaining = &remaining[match_end..];
1091 } else {
1092 elements.push(Element::Text("[".to_string()));
1093 remaining = &remaining[1..];
1094 }
1095 }
1096 "linked_image_ir" => {
1098 if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1099 let alt = caps.get(1).map_or("", |m| m.as_str());
1100 let img_url = caps.get(2).map_or("", |m| m.as_str());
1101 let link_ref = caps.get(3).map_or("", |m| m.as_str());
1102 elements.push(Element::LinkedImage {
1103 alt: alt.to_string(),
1104 img_source: LinkedImageSource::Inline(img_url.to_string()),
1105 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1106 });
1107 remaining = &remaining[match_end..];
1108 } else {
1109 elements.push(Element::Text("[".to_string()));
1110 remaining = &remaining[1..];
1111 }
1112 }
1113 "linked_image_rr" => {
1115 if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
1116 let alt = caps.get(1).map_or("", |m| m.as_str());
1117 let img_ref = caps.get(2).map_or("", |m| m.as_str());
1118 let link_ref = caps.get(3).map_or("", |m| m.as_str());
1119 elements.push(Element::LinkedImage {
1120 alt: alt.to_string(),
1121 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1122 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1123 });
1124 remaining = &remaining[match_end..];
1125 } else {
1126 elements.push(Element::Text("[".to_string()));
1127 remaining = &remaining[1..];
1128 }
1129 }
1130 "inline_image" => {
1131 if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
1132 let alt = caps.get(1).map_or("", |m| m.as_str());
1133 let url = caps.get(2).map_or("", |m| m.as_str());
1134 elements.push(Element::InlineImage {
1135 alt: alt.to_string(),
1136 url: url.to_string(),
1137 });
1138 remaining = &remaining[match_end..];
1139 } else {
1140 elements.push(Element::Text("!".to_string()));
1141 remaining = &remaining[1..];
1142 }
1143 }
1144 "ref_image" => {
1145 if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
1146 let alt = caps.get(1).map_or("", |m| m.as_str());
1147 let reference = caps.get(2).map_or("", |m| m.as_str());
1148
1149 if reference.is_empty() {
1150 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1151 } else {
1152 elements.push(Element::ReferenceImage {
1153 alt: alt.to_string(),
1154 reference: reference.to_string(),
1155 });
1156 }
1157 remaining = &remaining[match_end..];
1158 } else {
1159 elements.push(Element::Text("!".to_string()));
1160 remaining = &remaining[1..];
1161 }
1162 }
1163 "footnote_ref" => {
1164 if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
1165 let note = caps.get(1).map_or("", |m| m.as_str());
1166 elements.push(Element::FootnoteReference { note: note.to_string() });
1167 remaining = &remaining[match_end..];
1168 } else {
1169 elements.push(Element::Text("[".to_string()));
1170 remaining = &remaining[1..];
1171 }
1172 }
1173 "inline_link" => {
1174 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1175 let text = caps.get(1).map_or("", |m| m.as_str());
1176 let url = caps.get(2).map_or("", |m| m.as_str());
1177 elements.push(Element::Link {
1178 text: text.to_string(),
1179 url: url.to_string(),
1180 });
1181 remaining = &remaining[match_end..];
1182 } else {
1183 elements.push(Element::Text("[".to_string()));
1185 remaining = &remaining[1..];
1186 }
1187 }
1188 "ref_link" => {
1189 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1190 let text = caps.get(1).map_or("", |m| m.as_str());
1191 let reference = caps.get(2).map_or("", |m| m.as_str());
1192
1193 if reference.is_empty() {
1194 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1196 } else {
1197 elements.push(Element::ReferenceLink {
1199 text: text.to_string(),
1200 reference: reference.to_string(),
1201 });
1202 }
1203 remaining = &remaining[match_end..];
1204 } else {
1205 elements.push(Element::Text("[".to_string()));
1207 remaining = &remaining[1..];
1208 }
1209 }
1210 "shortcut_ref" => {
1211 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1212 let reference = caps.get(1).map_or("", |m| m.as_str());
1213 elements.push(Element::ShortcutReference {
1214 reference: reference.to_string(),
1215 });
1216 remaining = &remaining[match_end..];
1217 } else {
1218 elements.push(Element::Text("[".to_string()));
1220 remaining = &remaining[1..];
1221 }
1222 }
1223 "wiki_link" => {
1224 if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
1225 let content = caps.get(1).map_or("", |m| m.as_str());
1226 elements.push(Element::WikiLink(content.to_string()));
1227 remaining = &remaining[match_end..];
1228 } else {
1229 elements.push(Element::Text("[[".to_string()));
1230 remaining = &remaining[2..];
1231 }
1232 }
1233 "display_math" => {
1234 if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
1235 let math = caps.get(1).map_or("", |m| m.as_str());
1236 elements.push(Element::DisplayMath(math.to_string()));
1237 remaining = &remaining[match_end..];
1238 } else {
1239 elements.push(Element::Text("$$".to_string()));
1240 remaining = &remaining[2..];
1241 }
1242 }
1243 "inline_math" => {
1244 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1245 let math = caps.get(1).map_or("", |m| m.as_str());
1246 elements.push(Element::InlineMath(math.to_string()));
1247 remaining = &remaining[match_end..];
1248 } else {
1249 elements.push(Element::Text("$".to_string()));
1250 remaining = &remaining[1..];
1251 }
1252 }
1253 "emoji" => {
1255 if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1256 let emoji = caps.get(1).map_or("", |m| m.as_str());
1257 elements.push(Element::EmojiShortcode(emoji.to_string()));
1258 remaining = &remaining[match_end..];
1259 } else {
1260 elements.push(Element::Text(":".to_string()));
1261 remaining = &remaining[1..];
1262 }
1263 }
1264 "html_entity" => {
1265 elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
1267 remaining = &remaining[match_end..];
1268 }
1269 "hugo_shortcode" => {
1270 elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
1272 remaining = &remaining[match_end..];
1273 }
1274 "autolink" => {
1275 elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
1277 remaining = &remaining[match_end..];
1278 }
1279 "html_tag" => {
1280 elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
1282 remaining = &remaining[match_end..];
1283 }
1284 _ => {
1285 elements.push(Element::Text("[".to_string()));
1287 remaining = &remaining[1..];
1288 }
1289 }
1290 } else {
1291 if next_special > 0 && next_special < remaining.len() {
1295 elements.push(Element::Text(remaining[..next_special].to_string()));
1296 remaining = &remaining[next_special..];
1297 }
1298
1299 match special_type {
1301 "code" => {
1302 if let Some(code_end) = remaining[1..].find('`') {
1304 let code = &remaining[1..=code_end];
1305 elements.push(Element::Code(code.to_string()));
1306 remaining = &remaining[1 + code_end + 1..];
1307 } else {
1308 elements.push(Element::Text(remaining.to_string()));
1310 break;
1311 }
1312 }
1313 "attr_list" => {
1314 elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1315 remaining = &remaining[attr_list_len..];
1316 }
1317 "pulldown_emphasis" => {
1318 if let Some(span) = pulldown_emphasis {
1320 let span_len = span.end - span.start;
1321 if span.is_strikethrough {
1322 elements.push(Element::Strikethrough(span.content.clone()));
1323 } else if span.is_strong {
1324 elements.push(Element::Bold {
1325 content: span.content.clone(),
1326 underscore: span.uses_underscore,
1327 });
1328 } else {
1329 elements.push(Element::Italic {
1330 content: span.content.clone(),
1331 underscore: span.uses_underscore,
1332 });
1333 }
1334 remaining = &remaining[span_len..];
1335 } else {
1336 elements.push(Element::Text(remaining[..1].to_string()));
1338 remaining = &remaining[1..];
1339 }
1340 }
1341 _ => {
1342 elements.push(Element::Text(remaining.to_string()));
1344 break;
1345 }
1346 }
1347 }
1348 }
1349
1350 elements
1351}
1352
1353fn reflow_elements_sentence_per_line(
1355 elements: &[Element],
1356 custom_abbreviations: &Option<Vec<String>>,
1357 require_sentence_capital: bool,
1358) -> Vec<String> {
1359 let abbreviations = get_abbreviations(custom_abbreviations);
1360 let mut lines = Vec::new();
1361 let mut current_line = String::new();
1362
1363 for (idx, element) in elements.iter().enumerate() {
1364 let element_str = format!("{element}");
1365
1366 if let Element::Text(text) = element {
1368 let combined = format!("{current_line}{text}");
1370 let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
1372
1373 if sentences.len() > 1 {
1374 for (i, sentence) in sentences.iter().enumerate() {
1376 if i == 0 {
1377 let trimmed = sentence.trim();
1380
1381 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1382 current_line.clone_from(sentence);
1384 } else {
1385 lines.push(sentence.clone());
1387 current_line.clear();
1388 }
1389 } else if i == sentences.len() - 1 {
1390 let trimmed = sentence.trim();
1392 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1393
1394 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1395 lines.push(sentence.clone());
1397 current_line.clear();
1398 } else {
1399 current_line.clone_from(sentence);
1401 }
1402 } else {
1403 lines.push(sentence.clone());
1405 }
1406 }
1407 } else {
1408 let trimmed = combined.trim();
1410
1411 if trimmed.is_empty() {
1415 continue;
1416 }
1417
1418 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1419
1420 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1421 lines.push(trimmed.to_string());
1423 current_line.clear();
1424 } else {
1425 current_line = combined;
1427 }
1428 }
1429 } else if let Element::Italic { content, underscore } = element {
1430 let marker = if *underscore { "_" } else { "*" };
1432 handle_emphasis_sentence_split(
1433 content,
1434 marker,
1435 &abbreviations,
1436 require_sentence_capital,
1437 &mut current_line,
1438 &mut lines,
1439 );
1440 } else if let Element::Bold { content, underscore } = element {
1441 let marker = if *underscore { "__" } else { "**" };
1443 handle_emphasis_sentence_split(
1444 content,
1445 marker,
1446 &abbreviations,
1447 require_sentence_capital,
1448 &mut current_line,
1449 &mut lines,
1450 );
1451 } else if let Element::Strikethrough(content) = element {
1452 handle_emphasis_sentence_split(
1454 content,
1455 "~~",
1456 &abbreviations,
1457 require_sentence_capital,
1458 &mut current_line,
1459 &mut lines,
1460 );
1461 } else {
1462 let is_adjacent = if idx > 0 {
1465 match &elements[idx - 1] {
1466 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1467 _ => true,
1468 }
1469 } else {
1470 false
1471 };
1472
1473 if !is_adjacent
1475 && !current_line.is_empty()
1476 && !current_line.ends_with(' ')
1477 && !current_line.ends_with('(')
1478 && !current_line.ends_with('[')
1479 {
1480 current_line.push(' ');
1481 }
1482 current_line.push_str(&element_str);
1483 }
1484 }
1485
1486 if !current_line.is_empty() {
1488 lines.push(current_line.trim().to_string());
1489 }
1490 lines
1491}
1492
1493fn handle_emphasis_sentence_split(
1495 content: &str,
1496 marker: &str,
1497 abbreviations: &HashSet<String>,
1498 require_sentence_capital: bool,
1499 current_line: &mut String,
1500 lines: &mut Vec<String>,
1501) {
1502 let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
1504
1505 if sentences.len() <= 1 {
1506 if !current_line.is_empty()
1508 && !current_line.ends_with(' ')
1509 && !current_line.ends_with('(')
1510 && !current_line.ends_with('[')
1511 {
1512 current_line.push(' ');
1513 }
1514 current_line.push_str(marker);
1515 current_line.push_str(content);
1516 current_line.push_str(marker);
1517
1518 let trimmed = content.trim();
1520 let ends_with_punct = ends_with_sentence_punct(trimmed);
1521 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1522 lines.push(current_line.clone());
1523 current_line.clear();
1524 }
1525 } else {
1526 for (i, sentence) in sentences.iter().enumerate() {
1528 let trimmed = sentence.trim();
1529 if trimmed.is_empty() {
1530 continue;
1531 }
1532
1533 if i == 0 {
1534 if !current_line.is_empty()
1536 && !current_line.ends_with(' ')
1537 && !current_line.ends_with('(')
1538 && !current_line.ends_with('[')
1539 {
1540 current_line.push(' ');
1541 }
1542 current_line.push_str(marker);
1543 current_line.push_str(trimmed);
1544 current_line.push_str(marker);
1545
1546 let ends_with_punct = ends_with_sentence_punct(trimmed);
1548 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1549 lines.push(current_line.clone());
1550 current_line.clear();
1551 }
1552 } else if i == sentences.len() - 1 {
1553 let ends_with_punct = ends_with_sentence_punct(trimmed);
1555
1556 let mut line = String::new();
1557 line.push_str(marker);
1558 line.push_str(trimmed);
1559 line.push_str(marker);
1560
1561 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1562 lines.push(line);
1563 } else {
1564 *current_line = line;
1566 }
1567 } else {
1568 let mut line = String::new();
1570 line.push_str(marker);
1571 line.push_str(trimmed);
1572 line.push_str(marker);
1573 lines.push(line);
1574 }
1575 }
1576 }
1577}
1578
1579const BREAK_WORDS: &[&str] = &[
1583 "and",
1584 "or",
1585 "but",
1586 "nor",
1587 "yet",
1588 "so",
1589 "for",
1590 "which",
1591 "that",
1592 "because",
1593 "when",
1594 "if",
1595 "while",
1596 "where",
1597 "although",
1598 "though",
1599 "unless",
1600 "since",
1601 "after",
1602 "before",
1603 "until",
1604 "as",
1605 "once",
1606 "whether",
1607 "however",
1608 "therefore",
1609 "moreover",
1610 "furthermore",
1611 "nevertheless",
1612 "whereas",
1613];
1614
1615fn is_clause_punctuation(c: char) -> bool {
1617 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1619
1620fn paren_group_end<'a>(slice: &'a str, element_spans: &[(usize, usize)], offset: usize) -> Option<(usize, &'a str)> {
1634 debug_assert!(slice.starts_with('('));
1635 let mut depth: i32 = 0;
1636 for (local_byte, c) in slice.char_indices() {
1637 let global_byte = offset + local_byte;
1638 if depth > 0 && is_inside_element(global_byte, element_spans) {
1643 continue;
1644 }
1645 match c {
1646 '(' => depth += 1,
1647 ')' => {
1648 depth -= 1;
1649 if depth == 0 {
1650 let end = local_byte + 1;
1651 let inner = &slice[1..local_byte];
1652 return Some((end, inner));
1653 }
1654 }
1655 _ => {}
1656 }
1657 }
1658 None
1659}
1660
1661fn split_at_parenthetical(
1678 text: &str,
1679 line_length: usize,
1680 element_spans: &[(usize, usize)],
1681 length_mode: ReflowLengthMode,
1682) -> Option<(String, String)> {
1683 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1684
1685 if text.starts_with('(')
1687 && let Some((end_local, inner)) = paren_group_end(text, element_spans, 0)
1688 && inner.contains(' ')
1689 {
1690 let tail = &text[end_local..];
1695 let (first_end, rest_start) = match tail.chars().next() {
1696 Some(c) if is_clause_punctuation(c) => (end_local + c.len_utf8(), end_local + c.len_utf8()),
1697 _ => (end_local, end_local),
1698 };
1699 let first = &text[..first_end];
1700 let first_len = display_len(first, length_mode);
1701 if first_len <= line_length {
1704 let rest = text[rest_start..].trim_start();
1705 if !rest.is_empty() {
1706 return Some((first.to_string(), rest.to_string()));
1707 }
1708 }
1709 }
1710
1711 let mut best_open_byte: Option<usize> = None;
1713 let mut pos = 0usize;
1714 while pos < text.len() {
1715 if text.as_bytes()[pos] != b'(' {
1717 let c = text[pos..].chars().next().unwrap();
1718 pos += c.len_utf8();
1719 continue;
1720 }
1721 if is_inside_element(pos, element_spans) {
1723 pos += 1;
1724 continue;
1725 }
1726 if let Some((end_local, inner)) = paren_group_end(&text[pos..], element_spans, pos) {
1727 let first = text[..pos].trim_end();
1728 let first_len = display_len(first, length_mode);
1729 if !first.is_empty()
1730 && first_len >= min_first_len
1731 && first_len <= line_length
1732 && inner.contains(' ')
1733 && best_open_byte.is_none_or(|prev| pos > prev)
1734 {
1735 best_open_byte = Some(pos);
1736 }
1737 pos += end_local;
1738 } else {
1739 pos += 1;
1740 }
1741 }
1742
1743 let open_byte = best_open_byte?;
1744 let first = text[..open_byte].trim_end().to_string();
1745 let rest = text[open_byte..].to_string();
1746 if first.is_empty() || rest.trim().is_empty() {
1747 return None;
1748 }
1749 Some((first, rest))
1750}
1751
1752fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1756 let mut spans = Vec::new();
1757 let mut offset = 0;
1758 for element in elements {
1759 let rendered = format!("{element}");
1760 let len = rendered.len();
1761 if !matches!(element, Element::Text(_)) {
1762 spans.push((offset, offset + len));
1763 }
1764 offset += len;
1765 }
1766 spans
1767}
1768
1769fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1771 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1772}
1773
1774const MIN_SPLIT_RATIO: f64 = 0.3;
1777
1778fn split_at_clause_punctuation(
1782 text: &str,
1783 line_length: usize,
1784 element_spans: &[(usize, usize)],
1785 length_mode: ReflowLengthMode,
1786) -> Option<(String, String)> {
1787 let chars: Vec<char> = text.chars().collect();
1788 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1789
1790 let mut width_acc = 0;
1792 let mut search_end_char = 0;
1793 for (idx, &c) in chars.iter().enumerate() {
1794 let c_width = display_len(&c.to_string(), length_mode);
1795 if width_acc + c_width > line_length {
1796 break;
1797 }
1798 width_acc += c_width;
1799 search_end_char = idx + 1;
1800 }
1801
1802 let mut paren_depth: i32 = 0;
1809 let mut best_pos = None;
1810 for i in (0..search_end_char).rev() {
1811 let byte_start: usize = chars[..i].iter().map(|c| c.len_utf8()).sum();
1813 let byte_after: usize = byte_start + chars[i].len_utf8();
1815
1816 if !is_inside_element(byte_start, element_spans) {
1817 match chars[i] {
1818 ')' => paren_depth += 1,
1819 '(' => paren_depth = paren_depth.saturating_sub(1),
1820 _ => {}
1821 }
1822 }
1823
1824 if paren_depth == 0 && is_clause_punctuation(chars[i]) && !is_inside_element(byte_after, element_spans) {
1825 best_pos = Some(i);
1826 break;
1827 }
1828 }
1829
1830 let pos = best_pos?;
1831
1832 let first: String = chars[..=pos].iter().collect();
1834 let first_display_len = display_len(&first, length_mode);
1835 if first_display_len < min_first_len {
1836 return None;
1837 }
1838
1839 let rest: String = chars[pos + 1..].iter().collect();
1841 let rest = rest.trim_start().to_string();
1842
1843 if rest.is_empty() {
1844 return None;
1845 }
1846
1847 Some((first, rest))
1848}
1849
1850fn paren_depth_map(text: &str, element_spans: &[(usize, usize)]) -> Vec<i32> {
1857 let mut map = vec![0i32; text.len()];
1858 let mut depth = 0i32;
1859 for (byte, c) in text.char_indices() {
1860 if !is_inside_element(byte, element_spans) {
1861 match c {
1862 '(' => depth += 1,
1863 ')' => depth = depth.saturating_sub(1),
1864 _ => {}
1865 }
1866 }
1867 let end = (byte + c.len_utf8()).min(map.len());
1869 for slot in &mut map[byte..end] {
1870 *slot = depth;
1871 }
1872 }
1873 map
1874}
1875
1876fn is_standalone_parenthetical(line: &str) -> bool {
1885 let trimmed = line.trim();
1886 if !trimmed.starts_with('(') {
1887 return false;
1888 }
1889 let core = trimmed.trim_end_matches(|c: char| is_clause_punctuation(c));
1891 if !core.ends_with(')') {
1892 return false;
1893 }
1894 let inner = &core[1..core.len() - 1];
1896 if !inner.contains(' ') {
1897 return false;
1898 }
1899 let mut depth = 0i32;
1901 for c in core.chars() {
1902 match c {
1903 '(' => depth += 1,
1904 ')' => depth -= 1,
1905 _ => {}
1906 }
1907 if depth < 0 {
1908 return false;
1909 }
1910 }
1911 depth == 0
1912}
1913
1914fn split_at_break_word(
1918 text: &str,
1919 line_length: usize,
1920 element_spans: &[(usize, usize)],
1921 length_mode: ReflowLengthMode,
1922) -> Option<(String, String)> {
1923 let lower = text.to_lowercase();
1924 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1925 let mut best_split: Option<(usize, usize)> = None; let depth_map = paren_depth_map(text, element_spans);
1930
1931 for &word in BREAK_WORDS {
1932 let mut search_start = 0;
1933 while let Some(pos) = lower[search_start..].find(word) {
1934 let abs_pos = search_start + pos;
1935
1936 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1938 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1939
1940 if preceded_by_space && followed_by_space {
1941 let first_part = text[..abs_pos].trim_end();
1943 let first_part_len = display_len(first_part, length_mode);
1944
1945 let inside_paren = depth_map.get(abs_pos).is_some_and(|&d| d > 0);
1947
1948 if first_part_len >= min_first_len
1949 && first_part_len <= line_length
1950 && !is_inside_element(abs_pos, element_spans)
1951 && !inside_paren
1952 {
1953 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1955 best_split = Some((abs_pos, word.len()));
1956 }
1957 }
1958 }
1959
1960 search_start = abs_pos + word.len();
1961 }
1962 }
1963
1964 let (byte_start, _word_len) = best_split?;
1965
1966 let first = text[..byte_start].trim_end().to_string();
1967 let rest = text[byte_start..].to_string();
1968
1969 if first.is_empty() || rest.trim().is_empty() {
1970 return None;
1971 }
1972
1973 Some((first, rest))
1974}
1975
1976fn cascade_split_line(
1979 text: &str,
1980 line_length: usize,
1981 abbreviations: &Option<Vec<String>>,
1982 length_mode: ReflowLengthMode,
1983 attr_lists: bool,
1984) -> Vec<String> {
1985 if line_length == 0 || display_len(text, length_mode) <= line_length {
1986 return vec![text.to_string()];
1987 }
1988
1989 let elements = parse_markdown_elements_inner(text, attr_lists);
1990 let element_spans = compute_element_spans(&elements);
1991
1992 if let Some((first, rest)) = split_at_parenthetical(text, line_length, &element_spans, length_mode) {
1995 let mut result = vec![first];
1996 result.extend(cascade_split_line(
1997 &rest,
1998 line_length,
1999 abbreviations,
2000 length_mode,
2001 attr_lists,
2002 ));
2003 return result;
2004 }
2005
2006 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
2008 let mut result = vec![first];
2009 result.extend(cascade_split_line(
2010 &rest,
2011 line_length,
2012 abbreviations,
2013 length_mode,
2014 attr_lists,
2015 ));
2016 return result;
2017 }
2018
2019 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
2021 let mut result = vec![first];
2022 result.extend(cascade_split_line(
2023 &rest,
2024 line_length,
2025 abbreviations,
2026 length_mode,
2027 attr_lists,
2028 ));
2029 return result;
2030 }
2031
2032 let options = ReflowOptions {
2034 line_length,
2035 break_on_sentences: false,
2036 preserve_breaks: false,
2037 sentence_per_line: false,
2038 semantic_line_breaks: false,
2039 abbreviations: abbreviations.clone(),
2040 length_mode,
2041 attr_lists,
2042 require_sentence_capital: true,
2043 max_list_continuation_indent: None,
2044 };
2045 reflow_elements(&elements, &options)
2046}
2047
2048fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2052 let sentence_lines =
2054 reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
2055
2056 if options.line_length == 0 {
2059 return sentence_lines;
2060 }
2061
2062 let length_mode = options.length_mode;
2063 let mut result = Vec::new();
2064 for line in sentence_lines {
2065 if display_len(&line, length_mode) <= options.line_length {
2066 result.push(line);
2067 } else {
2068 result.extend(cascade_split_line(
2069 &line,
2070 options.line_length,
2071 &options.abbreviations,
2072 length_mode,
2073 options.attr_lists,
2074 ));
2075 }
2076 }
2077
2078 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
2081 let mut merged: Vec<String> = Vec::with_capacity(result.len());
2082 for line in result {
2083 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
2084 if is_standalone_parenthetical(&line) {
2087 merged.push(line);
2088 continue;
2089 }
2090
2091 let prev_ends_at_sentence = {
2093 let trimmed = merged.last().unwrap().trim_end();
2094 trimmed
2095 .chars()
2096 .rev()
2097 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
2098 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
2099 };
2100
2101 if !prev_ends_at_sentence {
2102 let prev = merged.last_mut().unwrap();
2103 let combined = format!("{prev} {line}");
2104 if display_len(&combined, length_mode) <= options.line_length {
2106 *prev = combined;
2107 continue;
2108 }
2109 }
2110 }
2111 merged.push(line);
2112 }
2113 merged
2114}
2115
2116fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
2124 line.char_indices()
2125 .rev()
2126 .map(|(pos, _)| pos)
2127 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
2128}
2129
2130fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2132 let mut lines = Vec::new();
2133 let mut current_line = String::new();
2134 let mut current_length = 0;
2135 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
2137 let length_mode = options.length_mode;
2138
2139 for (idx, element) in elements.iter().enumerate() {
2140 let element_str = format!("{element}");
2141 let element_len = element.display_width(length_mode);
2142
2143 let is_adjacent_to_prev = if idx > 0 {
2149 match (&elements[idx - 1], element) {
2150 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
2151 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
2152 _ => true,
2153 }
2154 } else {
2155 false
2156 };
2157
2158 if let Element::Text(text) = element {
2160 let has_leading_space = text.starts_with(char::is_whitespace);
2162 let words: Vec<&str> = text.split_whitespace().collect();
2164
2165 for (i, word) in words.iter().enumerate() {
2166 let word_len = display_len(word, length_mode);
2167 let is_trailing_punct = word
2169 .chars()
2170 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
2171
2172 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
2175
2176 if is_first_adjacent {
2177 if current_length + word_len > options.line_length && current_length > 0 {
2179 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
2182 let before = current_line[..last_space].trim_end().to_string();
2183 let after = current_line[last_space + 1..].to_string();
2184 lines.push(before);
2185 current_line = format!("{after}{word}");
2186 current_length = display_len(¤t_line, length_mode);
2187 current_line_element_spans.clear();
2188 } else {
2189 current_line.push_str(word);
2190 current_length += word_len;
2191 }
2192 } else {
2193 current_line.push_str(word);
2194 current_length += word_len;
2195 }
2196 } else if current_length > 0
2197 && current_length + 1 + word_len > options.line_length
2198 && !is_trailing_punct
2199 {
2200 lines.push(current_line.trim().to_string());
2202 current_line = word.to_string();
2203 current_length = word_len;
2204 current_line_element_spans.clear();
2205 } else {
2206 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
2210 current_line.push(' ');
2211 current_length += 1;
2212 }
2213 current_line.push_str(word);
2214 current_length += word_len;
2215 }
2216 }
2217 } else if matches!(
2218 element,
2219 Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
2220 ) && element_len > options.line_length
2221 {
2222 let (content, marker): (&str, &str) = match element {
2226 Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
2227 Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
2228 Element::Strikethrough(content) => (content.as_str(), "~~"),
2229 _ => unreachable!(),
2230 };
2231
2232 let words: Vec<&str> = content.split_whitespace().collect();
2233 let n = words.len();
2234
2235 if n == 0 {
2236 let full = format!("{marker}{marker}");
2238 let full_len = display_len(&full, length_mode);
2239 if !is_adjacent_to_prev && current_length > 0 {
2240 current_line.push(' ');
2241 current_length += 1;
2242 }
2243 current_line.push_str(&full);
2244 current_length += full_len;
2245 } else {
2246 for (i, word) in words.iter().enumerate() {
2247 let is_first = i == 0;
2248 let is_last = i == n - 1;
2249 let word_str: String = match (is_first, is_last) {
2250 (true, true) => format!("{marker}{word}{marker}"),
2251 (true, false) => format!("{marker}{word}"),
2252 (false, true) => format!("{word}{marker}"),
2253 (false, false) => word.to_string(),
2254 };
2255 let word_len = display_len(&word_str, length_mode);
2256
2257 let needs_space = if is_first {
2258 !is_adjacent_to_prev && current_length > 0
2259 } else {
2260 current_length > 0
2261 };
2262
2263 if needs_space && current_length + 1 + word_len > options.line_length {
2264 lines.push(current_line.trim_end().to_string());
2265 current_line = word_str;
2266 current_length = word_len;
2267 current_line_element_spans.clear();
2268 } else {
2269 if needs_space {
2270 current_line.push(' ');
2271 current_length += 1;
2272 }
2273 current_line.push_str(&word_str);
2274 current_length += word_len;
2275 }
2276 }
2277 }
2278 } else {
2279 if is_adjacent_to_prev {
2283 if current_length + element_len > options.line_length {
2285 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
2288 let before = current_line[..last_space].trim_end().to_string();
2289 let after = current_line[last_space + 1..].to_string();
2290 lines.push(before);
2291 current_line = format!("{after}{element_str}");
2292 current_length = display_len(¤t_line, length_mode);
2293 current_line_element_spans.clear();
2294 let start = after.len();
2296 current_line_element_spans.push((start, start + element_str.len()));
2297 } else {
2298 let start = current_line.len();
2300 current_line.push_str(&element_str);
2301 current_length += element_len;
2302 current_line_element_spans.push((start, current_line.len()));
2303 }
2304 } else {
2305 let start = current_line.len();
2306 current_line.push_str(&element_str);
2307 current_length += element_len;
2308 current_line_element_spans.push((start, current_line.len()));
2309 }
2310 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
2311 lines.push(current_line.trim().to_string());
2313 current_line.clone_from(&element_str);
2314 current_length = element_len;
2315 current_line_element_spans.clear();
2316 current_line_element_spans.push((0, element_str.len()));
2317 } else {
2318 let ends_with_opener =
2320 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
2321 if current_length > 0 && !ends_with_opener {
2322 current_line.push(' ');
2323 current_length += 1;
2324 }
2325 let start = current_line.len();
2326 current_line.push_str(&element_str);
2327 current_length += element_len;
2328 current_line_element_spans.push((start, current_line.len()));
2329 }
2330 }
2331 }
2332
2333 if !current_line.is_empty() {
2335 lines.push(current_line.trim_end().to_string());
2336 }
2337
2338 lines
2339}
2340
2341pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
2343 let lines: Vec<&str> = content.lines().collect();
2344 let mut result = Vec::new();
2345 let mut i = 0;
2346
2347 while i < lines.len() {
2348 let line = lines[i];
2349 let trimmed = line.trim();
2350
2351 if trimmed.is_empty() {
2353 result.push(String::new());
2354 i += 1;
2355 continue;
2356 }
2357
2358 if trimmed.starts_with('#') {
2360 result.push(line.to_string());
2361 i += 1;
2362 continue;
2363 }
2364
2365 if trimmed.starts_with(":::") {
2367 result.push(line.to_string());
2368 i += 1;
2369 continue;
2370 }
2371
2372 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2374 result.push(line.to_string());
2375 i += 1;
2376 while i < lines.len() {
2378 result.push(lines[i].to_string());
2379 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2380 i += 1;
2381 break;
2382 }
2383 i += 1;
2384 }
2385 continue;
2386 }
2387
2388 if calculate_indentation_width_default(line) >= 4 {
2390 result.push(line.to_string());
2392 i += 1;
2393 while i < lines.len() {
2394 let next_line = lines[i];
2395 if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2397 result.push(next_line.to_string());
2398 i += 1;
2399 } else {
2400 break;
2401 }
2402 }
2403 continue;
2404 }
2405
2406 if trimmed.starts_with('>') {
2408 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2411 let quote_prefix = line[0..=gt_pos].to_string();
2412 let quote_content = &line[quote_prefix.len()..].trim_start();
2413
2414 let reflowed = reflow_line(quote_content, options);
2415 for reflowed_line in &reflowed {
2416 result.push(format!("{quote_prefix} {reflowed_line}"));
2417 }
2418 i += 1;
2419 continue;
2420 }
2421
2422 if is_horizontal_rule(trimmed) {
2424 result.push(line.to_string());
2425 i += 1;
2426 continue;
2427 }
2428
2429 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2431 let indent = line.len() - line.trim_start().len();
2433 let indent_str = " ".repeat(indent);
2434
2435 let mut marker_end = indent;
2438 let mut content_start = indent;
2439
2440 if trimmed.chars().next().is_some_and(char::is_numeric) {
2441 if let Some(period_pos) = line[indent..].find('.') {
2443 marker_end = indent + period_pos + 1; content_start = marker_end;
2445 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2449 content_start += 1;
2450 }
2451 }
2452 } else {
2453 marker_end = indent + 1; content_start = marker_end;
2456 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2460 content_start += 1;
2461 }
2462 }
2463
2464 let min_continuation_indent = content_start;
2466
2467 let rest = &line[content_start..];
2470 if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
2471 marker_end = content_start + 3; content_start += 4; }
2474
2475 let marker = &line[indent..marker_end];
2476
2477 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2480 i += 1;
2481
2482 while i < lines.len() {
2486 let next_line = lines[i];
2487 let next_trimmed = next_line.trim();
2488
2489 if is_block_boundary(next_trimmed) {
2491 break;
2492 }
2493
2494 let next_indent = next_line.len() - next_line.trim_start().len();
2496 if next_indent >= min_continuation_indent {
2497 let trimmed_start = next_line.trim_start();
2500 list_content.push(trim_preserving_hard_break(trimmed_start));
2501 i += 1;
2502 } else {
2503 break;
2505 }
2506 }
2507
2508 let combined_content = if options.preserve_breaks {
2511 list_content[0].clone()
2512 } else {
2513 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2515 if has_hard_breaks {
2516 list_content.join("\n")
2518 } else {
2519 list_content.join(" ")
2521 }
2522 };
2523
2524 let trimmed_marker = marker;
2526 let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
2527 indent + (content_start - indent).min(max_indent)
2530 } else {
2531 content_start
2532 };
2533
2534 let prefix_length = indent + trimmed_marker.len() + 1;
2536
2537 let adjusted_options = ReflowOptions {
2539 line_length: options.line_length.saturating_sub(prefix_length),
2540 ..options.clone()
2541 };
2542
2543 let reflowed = reflow_line(&combined_content, &adjusted_options);
2544 for (j, reflowed_line) in reflowed.iter().enumerate() {
2545 if j == 0 {
2546 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2547 } else {
2548 let continuation_indent = " ".repeat(continuation_spaces);
2550 result.push(format!("{continuation_indent}{reflowed_line}"));
2551 }
2552 }
2553 continue;
2554 }
2555
2556 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2558 result.push(line.to_string());
2559 i += 1;
2560 continue;
2561 }
2562
2563 if trimmed.starts_with('[') && line.contains("]:") {
2565 result.push(line.to_string());
2566 i += 1;
2567 continue;
2568 }
2569
2570 if is_definition_list_item(trimmed) {
2572 result.push(line.to_string());
2573 i += 1;
2574 continue;
2575 }
2576
2577 let mut is_single_line_paragraph = true;
2579 if i + 1 < lines.len() {
2580 let next_trimmed = lines[i + 1].trim();
2581 if !is_block_boundary(next_trimmed) {
2583 is_single_line_paragraph = false;
2584 }
2585 }
2586
2587 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2589 result.push(line.to_string());
2590 i += 1;
2591 continue;
2592 }
2593
2594 let mut paragraph_parts = Vec::new();
2596 let mut current_part = vec![line];
2597 i += 1;
2598
2599 if options.preserve_breaks {
2601 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2603 Some("\\")
2604 } else if line.ends_with(" ") {
2605 Some(" ")
2606 } else {
2607 None
2608 };
2609 let reflowed = reflow_line(line, options);
2610
2611 if let Some(break_marker) = hard_break_type {
2613 if !reflowed.is_empty() {
2614 let mut reflowed_with_break = reflowed;
2615 let last_idx = reflowed_with_break.len() - 1;
2616 if !has_hard_break(&reflowed_with_break[last_idx]) {
2617 reflowed_with_break[last_idx].push_str(break_marker);
2618 }
2619 result.extend(reflowed_with_break);
2620 }
2621 } else {
2622 result.extend(reflowed);
2623 }
2624 } else {
2625 while i < lines.len() {
2627 let prev_line = if !current_part.is_empty() {
2628 current_part.last().unwrap()
2629 } else {
2630 ""
2631 };
2632 let next_line = lines[i];
2633 let next_trimmed = next_line.trim();
2634
2635 if is_block_boundary(next_trimmed) {
2637 break;
2638 }
2639
2640 let prev_trimmed = prev_line.trim();
2643 let abbreviations = get_abbreviations(&options.abbreviations);
2644 let ends_with_sentence = (prev_trimmed.ends_with('.')
2645 || prev_trimmed.ends_with('!')
2646 || prev_trimmed.ends_with('?')
2647 || prev_trimmed.ends_with(".*")
2648 || prev_trimmed.ends_with("!*")
2649 || prev_trimmed.ends_with("?*")
2650 || prev_trimmed.ends_with("._")
2651 || prev_trimmed.ends_with("!_")
2652 || prev_trimmed.ends_with("?_")
2653 || prev_trimmed.ends_with(".\"")
2655 || prev_trimmed.ends_with("!\"")
2656 || prev_trimmed.ends_with("?\"")
2657 || prev_trimmed.ends_with(".'")
2658 || prev_trimmed.ends_with("!'")
2659 || prev_trimmed.ends_with("?'")
2660 || prev_trimmed.ends_with(".\u{201D}")
2661 || prev_trimmed.ends_with("!\u{201D}")
2662 || prev_trimmed.ends_with("?\u{201D}")
2663 || prev_trimmed.ends_with(".\u{2019}")
2664 || prev_trimmed.ends_with("!\u{2019}")
2665 || prev_trimmed.ends_with("?\u{2019}"))
2666 && !text_ends_with_abbreviation(
2667 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2668 &abbreviations,
2669 );
2670
2671 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2672 paragraph_parts.push(current_part.join(" "));
2674 current_part = vec![next_line];
2675 } else {
2676 current_part.push(next_line);
2677 }
2678 i += 1;
2679 }
2680
2681 if !current_part.is_empty() {
2683 if current_part.len() == 1 {
2684 paragraph_parts.push(current_part[0].to_string());
2686 } else {
2687 paragraph_parts.push(current_part.join(" "));
2688 }
2689 }
2690
2691 for (j, part) in paragraph_parts.iter().enumerate() {
2693 let reflowed = reflow_line(part, options);
2694 result.extend(reflowed);
2695
2696 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2700 let last_idx = result.len() - 1;
2701 if !has_hard_break(&result[last_idx]) {
2702 result[last_idx].push_str(" ");
2703 }
2704 }
2705 }
2706 }
2707 }
2708
2709 let result_text = result.join("\n");
2711 if content.ends_with('\n') && !result_text.ends_with('\n') {
2712 format!("{result_text}\n")
2713 } else {
2714 result_text
2715 }
2716}
2717
2718#[derive(Debug, Clone)]
2720pub struct ParagraphReflow {
2721 pub start_byte: usize,
2723 pub end_byte: usize,
2725 pub reflowed_text: String,
2727}
2728
2729#[derive(Debug, Clone)]
2735pub struct BlockquoteLineData {
2736 pub(crate) content: String,
2738 pub(crate) is_explicit: bool,
2740 pub(crate) prefix: Option<String>,
2742}
2743
2744impl BlockquoteLineData {
2745 pub fn explicit(content: String, prefix: String) -> Self {
2747 Self {
2748 content,
2749 is_explicit: true,
2750 prefix: Some(prefix),
2751 }
2752 }
2753
2754 pub fn lazy(content: String) -> Self {
2756 Self {
2757 content,
2758 is_explicit: false,
2759 prefix: None,
2760 }
2761 }
2762}
2763
2764#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2766pub enum BlockquoteContinuationStyle {
2767 Explicit,
2768 Lazy,
2769}
2770
2771pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2779 let mut explicit_count = 0usize;
2780 let mut lazy_count = 0usize;
2781
2782 for line in lines.iter().skip(1) {
2783 if line.is_explicit {
2784 explicit_count += 1;
2785 } else {
2786 lazy_count += 1;
2787 }
2788 }
2789
2790 if explicit_count > 0 && lazy_count == 0 {
2791 BlockquoteContinuationStyle::Explicit
2792 } else if lazy_count > 0 && explicit_count == 0 {
2793 BlockquoteContinuationStyle::Lazy
2794 } else if explicit_count >= lazy_count {
2795 BlockquoteContinuationStyle::Explicit
2796 } else {
2797 BlockquoteContinuationStyle::Lazy
2798 }
2799}
2800
2801pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2806 let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2807
2808 for (idx, line) in lines.iter().enumerate() {
2809 let Some(prefix) = line.prefix.as_ref() else {
2810 continue;
2811 };
2812 counts
2813 .entry(prefix.clone())
2814 .and_modify(|entry| entry.0 += 1)
2815 .or_insert((1, idx));
2816 }
2817
2818 counts
2819 .into_iter()
2820 .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2821 count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2822 })
2823 .map_or_else(|| fallback.to_string(), |(prefix, _)| prefix)
2824}
2825
2826pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2831 let trimmed = content_line.trim_start();
2832 trimmed.starts_with('>')
2833 || trimmed.starts_with('#')
2834 || trimmed.starts_with("```")
2835 || trimmed.starts_with("~~~")
2836 || is_unordered_list_marker(trimmed)
2837 || is_numbered_list_item(trimmed)
2838 || is_horizontal_rule(trimmed)
2839 || is_definition_list_item(trimmed)
2840 || (trimmed.starts_with('[') && trimmed.contains("]:"))
2841 || trimmed.starts_with(":::")
2842 || (trimmed.starts_with('<')
2843 && !trimmed.starts_with("<http")
2844 && !trimmed.starts_with("<https")
2845 && !trimmed.starts_with("<mailto:"))
2846}
2847
2848pub fn reflow_blockquote_content(
2857 lines: &[BlockquoteLineData],
2858 explicit_prefix: &str,
2859 continuation_style: BlockquoteContinuationStyle,
2860 options: &ReflowOptions,
2861) -> Vec<String> {
2862 let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2863 let segments = split_into_segments_strs(&content_strs);
2864 let mut reflowed_content_lines: Vec<String> = Vec::new();
2865
2866 for segment in segments {
2867 let hard_break_type = segment.last().and_then(|&line| {
2868 let line = line.strip_suffix('\r').unwrap_or(line);
2869 if line.ends_with('\\') {
2870 Some("\\")
2871 } else if line.ends_with(" ") {
2872 Some(" ")
2873 } else {
2874 None
2875 }
2876 });
2877
2878 let pieces: Vec<&str> = segment
2879 .iter()
2880 .map(|&line| {
2881 if let Some(l) = line.strip_suffix('\\') {
2882 l.trim_end()
2883 } else if let Some(l) = line.strip_suffix(" ") {
2884 l.trim_end()
2885 } else {
2886 line.trim_end()
2887 }
2888 })
2889 .collect();
2890
2891 let segment_text = pieces.join(" ");
2892 let segment_text = segment_text.trim();
2893 if segment_text.is_empty() {
2894 continue;
2895 }
2896
2897 let mut reflowed = reflow_line(segment_text, options);
2898 if let Some(break_marker) = hard_break_type
2899 && !reflowed.is_empty()
2900 {
2901 let last_idx = reflowed.len() - 1;
2902 if !has_hard_break(&reflowed[last_idx]) {
2903 reflowed[last_idx].push_str(break_marker);
2904 }
2905 }
2906 reflowed_content_lines.extend(reflowed);
2907 }
2908
2909 let mut styled_lines: Vec<String> = Vec::new();
2910 for (idx, line) in reflowed_content_lines.iter().enumerate() {
2911 let force_explicit = idx == 0
2912 || continuation_style == BlockquoteContinuationStyle::Explicit
2913 || should_force_explicit_blockquote_line(line);
2914 if force_explicit {
2915 styled_lines.push(format!("{explicit_prefix}{line}"));
2916 } else {
2917 styled_lines.push(line.clone());
2918 }
2919 }
2920
2921 styled_lines
2922}
2923
2924fn is_blockquote_content_boundary(content: &str) -> bool {
2925 let trimmed = content.trim();
2926 trimmed.is_empty()
2927 || is_block_boundary(trimmed)
2928 || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2929 || trimmed.starts_with(":::")
2930 || crate::utils::is_template_directive_only(content)
2931 || is_standalone_attr_list(content)
2932 || is_snippet_block_delimiter(content)
2933}
2934
2935fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2936 let mut segments = Vec::new();
2937 let mut current = Vec::new();
2938
2939 for &line in lines {
2940 current.push(line);
2941 if has_hard_break(line) {
2942 segments.push(current);
2943 current = Vec::new();
2944 }
2945 }
2946
2947 if !current.is_empty() {
2948 segments.push(current);
2949 }
2950
2951 segments
2952}
2953
2954fn reflow_blockquote_paragraph_at_line(
2955 content: &str,
2956 lines: &[&str],
2957 target_idx: usize,
2958 options: &ReflowOptions,
2959) -> Option<ParagraphReflow> {
2960 let mut anchor_idx = target_idx;
2961 let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2962 parsed.nesting_level
2963 } else {
2964 let mut found = None;
2965 let mut idx = target_idx;
2966 loop {
2967 if lines[idx].trim().is_empty() {
2968 break;
2969 }
2970 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2971 found = Some((idx, parsed.nesting_level));
2972 break;
2973 }
2974 if idx == 0 {
2975 break;
2976 }
2977 idx -= 1;
2978 }
2979 let (idx, level) = found?;
2980 anchor_idx = idx;
2981 level
2982 };
2983
2984 let mut para_start = anchor_idx;
2986 while para_start > 0 {
2987 let prev_idx = para_start - 1;
2988 let prev_line = lines[prev_idx];
2989
2990 if prev_line.trim().is_empty() {
2991 break;
2992 }
2993
2994 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2995 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2996 break;
2997 }
2998 para_start = prev_idx;
2999 continue;
3000 }
3001
3002 let prev_lazy = prev_line.trim_start();
3003 if is_blockquote_content_boundary(prev_lazy) {
3004 break;
3005 }
3006 para_start = prev_idx;
3007 }
3008
3009 while para_start < lines.len() {
3011 let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
3012 para_start += 1;
3013 continue;
3014 };
3015 target_level = parsed.nesting_level;
3016 break;
3017 }
3018
3019 if para_start >= lines.len() || para_start > target_idx {
3020 return None;
3021 }
3022
3023 let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
3026 let mut idx = para_start;
3027 while idx < lines.len() {
3028 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
3029 break;
3030 }
3031
3032 let line = lines[idx];
3033 if line.trim().is_empty() {
3034 break;
3035 }
3036
3037 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
3038 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
3039 break;
3040 }
3041 collected.push((
3042 idx,
3043 BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
3044 ));
3045 idx += 1;
3046 continue;
3047 }
3048
3049 let lazy_content = line.trim_start();
3050 if is_blockquote_content_boundary(lazy_content) {
3051 break;
3052 }
3053
3054 collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
3055 idx += 1;
3056 }
3057
3058 if collected.is_empty() {
3059 return None;
3060 }
3061
3062 let para_end = collected[collected.len() - 1].0;
3063 if target_idx < para_start || target_idx > para_end {
3064 return None;
3065 }
3066
3067 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
3068
3069 let fallback_prefix = line_data
3070 .iter()
3071 .find_map(|d| d.prefix.clone())
3072 .unwrap_or_else(|| "> ".to_string());
3073 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
3074 let continuation_style = blockquote_continuation_style(&line_data);
3075
3076 let adjusted_line_length = options
3077 .line_length
3078 .saturating_sub(display_len(&explicit_prefix, options.length_mode))
3079 .max(1);
3080
3081 let adjusted_options = ReflowOptions {
3082 line_length: adjusted_line_length,
3083 ..options.clone()
3084 };
3085
3086 let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
3087
3088 if styled_lines.is_empty() {
3089 return None;
3090 }
3091
3092 let mut start_byte = 0;
3094 for line in lines.iter().take(para_start) {
3095 start_byte += line.len() + 1;
3096 }
3097
3098 let mut end_byte = start_byte;
3099 for line in lines.iter().take(para_end + 1).skip(para_start) {
3100 end_byte += line.len() + 1;
3101 }
3102
3103 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3104 if !includes_trailing_newline {
3105 end_byte -= 1;
3106 }
3107
3108 let reflowed_joined = styled_lines.join("\n");
3109 let reflowed_text = if includes_trailing_newline {
3110 if reflowed_joined.ends_with('\n') {
3111 reflowed_joined
3112 } else {
3113 format!("{reflowed_joined}\n")
3114 }
3115 } else if reflowed_joined.ends_with('\n') {
3116 reflowed_joined.trim_end_matches('\n').to_string()
3117 } else {
3118 reflowed_joined
3119 };
3120
3121 Some(ParagraphReflow {
3122 start_byte,
3123 end_byte,
3124 reflowed_text,
3125 })
3126}
3127
3128pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
3146 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
3147}
3148
3149pub fn reflow_paragraph_at_line_with_mode(
3151 content: &str,
3152 line_number: usize,
3153 line_length: usize,
3154 length_mode: ReflowLengthMode,
3155) -> Option<ParagraphReflow> {
3156 let options = ReflowOptions {
3157 line_length,
3158 length_mode,
3159 ..Default::default()
3160 };
3161 reflow_paragraph_at_line_with_options(content, line_number, &options)
3162}
3163
3164pub fn reflow_paragraph_at_line_with_options(
3175 content: &str,
3176 line_number: usize,
3177 options: &ReflowOptions,
3178) -> Option<ParagraphReflow> {
3179 if line_number == 0 {
3180 return None;
3181 }
3182
3183 let lines: Vec<&str> = content.lines().collect();
3184
3185 if line_number > lines.len() {
3187 return None;
3188 }
3189
3190 let target_idx = line_number - 1; let target_line = lines[target_idx];
3192 let trimmed = target_line.trim();
3193
3194 if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
3197 return Some(blockquote_reflow);
3198 }
3199
3200 if is_paragraph_boundary(trimmed, target_line) {
3202 return None;
3203 }
3204
3205 let mut para_start = target_idx;
3207 while para_start > 0 {
3208 let prev_idx = para_start - 1;
3209 let prev_line = lines[prev_idx];
3210 let prev_trimmed = prev_line.trim();
3211
3212 if is_paragraph_boundary(prev_trimmed, prev_line) {
3214 break;
3215 }
3216
3217 para_start = prev_idx;
3218 }
3219
3220 let mut para_end = target_idx;
3222 while para_end + 1 < lines.len() {
3223 let next_idx = para_end + 1;
3224 let next_line = lines[next_idx];
3225 let next_trimmed = next_line.trim();
3226
3227 if is_paragraph_boundary(next_trimmed, next_line) {
3229 break;
3230 }
3231
3232 para_end = next_idx;
3233 }
3234
3235 let paragraph_lines = &lines[para_start..=para_end];
3237
3238 let mut start_byte = 0;
3240 for line in lines.iter().take(para_start) {
3241 start_byte += line.len() + 1; }
3243
3244 let mut end_byte = start_byte;
3245 for line in paragraph_lines {
3246 end_byte += line.len() + 1; }
3248
3249 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3252
3253 if !includes_trailing_newline {
3255 end_byte -= 1;
3256 }
3257
3258 let paragraph_text = paragraph_lines.join("\n");
3260
3261 let reflowed = reflow_markdown(¶graph_text, options);
3263
3264 let reflowed_text = if includes_trailing_newline {
3268 if reflowed.ends_with('\n') {
3270 reflowed
3271 } else {
3272 format!("{reflowed}\n")
3273 }
3274 } else {
3275 if reflowed.ends_with('\n') {
3277 reflowed.trim_end_matches('\n').to_string()
3278 } else {
3279 reflowed
3280 }
3281 };
3282
3283 Some(ParagraphReflow {
3284 start_byte,
3285 end_byte,
3286 reflowed_text,
3287 })
3288}
3289
3290#[cfg(test)]
3291mod tests {
3292 use super::*;
3293
3294 #[test]
3299 fn test_helper_function_text_ends_with_abbreviation() {
3300 let abbreviations = get_abbreviations(&None);
3302
3303 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
3305 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
3306 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
3307 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
3308 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
3309 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
3310 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
3311 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
3312
3313 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
3315 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
3316 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
3317 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
3318 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
3319 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
3325
3326 #[test]
3327 fn test_is_unordered_list_marker() {
3328 assert!(is_unordered_list_marker("- item"));
3330 assert!(is_unordered_list_marker("* item"));
3331 assert!(is_unordered_list_marker("+ item"));
3332 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
3334 assert!(is_unordered_list_marker("+"));
3335
3336 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
3347
3348 #[test]
3349 fn test_is_block_boundary() {
3350 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
3372 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
3375 }
3376
3377 #[test]
3378 fn test_definition_list_boundary_in_single_line_paragraph() {
3379 let options = ReflowOptions {
3382 line_length: 80,
3383 ..Default::default()
3384 };
3385 let input = "Term\n: Definition of the term";
3386 let result = reflow_markdown(input, &options);
3387 assert!(
3389 result.contains(": Definition"),
3390 "Definition list item should not be merged into previous line. Got: {result:?}"
3391 );
3392 let lines: Vec<&str> = result.lines().collect();
3393 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3394 assert_eq!(lines[0], "Term");
3395 assert_eq!(lines[1], ": Definition of the term");
3396 }
3397
3398 #[test]
3399 fn test_is_paragraph_boundary() {
3400 assert!(is_paragraph_boundary("# Heading", "# Heading"));
3402 assert!(is_paragraph_boundary("- item", "- item"));
3403 assert!(is_paragraph_boundary(":::", ":::"));
3404 assert!(is_paragraph_boundary(": definition", ": definition"));
3405
3406 assert!(is_paragraph_boundary("code", " code"));
3408 assert!(is_paragraph_boundary("code", "\tcode"));
3409
3410 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3412 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
3416 assert!(!is_paragraph_boundary("text", " text")); }
3418
3419 #[test]
3420 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3421 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3424 let result = reflow_paragraph_at_line(content, 3, 80);
3426 assert!(result.is_none(), "Div marker line should not be reflowed");
3427 }
3428}