1use crate::utils::calculate_indentation_width_default;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
13 LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
14 SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18 text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27 Chars,
29 #[default]
31 Visual,
32 Bytes,
34}
35
36fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38 match mode {
39 ReflowLengthMode::Chars => s.chars().count(),
40 ReflowLengthMode::Visual => s.width(),
41 ReflowLengthMode::Bytes => s.len(),
42 }
43}
44
45#[derive(Clone)]
47pub struct ReflowOptions {
48 pub line_length: usize,
50 pub break_on_sentences: bool,
52 pub preserve_breaks: bool,
54 pub sentence_per_line: bool,
56 pub semantic_line_breaks: bool,
58 pub abbreviations: Option<Vec<String>>,
62 pub length_mode: ReflowLengthMode,
64 pub attr_lists: bool,
67 pub require_sentence_capital: bool,
72 pub max_list_continuation_indent: Option<usize>,
76}
77
78impl Default for ReflowOptions {
79 fn default() -> Self {
80 Self {
81 line_length: 80,
82 break_on_sentences: true,
83 preserve_breaks: false,
84 sentence_per_line: false,
85 semantic_line_breaks: false,
86 abbreviations: None,
87 length_mode: ReflowLengthMode::default(),
88 attr_lists: false,
89 require_sentence_capital: true,
90 max_list_continuation_indent: None,
91 }
92 }
93}
94
95fn compute_inline_code_mask(text: &str) -> Vec<bool> {
98 let chars: Vec<char> = text.chars().collect();
99 let len = chars.len();
100 let mut mask = vec![false; len];
101 let mut i = 0;
102
103 while i < len {
104 if chars[i] == '`' {
105 let open_start = i;
107 let mut backtick_count = 0;
108 while i < len && chars[i] == '`' {
109 backtick_count += 1;
110 i += 1;
111 }
112
113 let mut found_close = false;
115 let content_start = i;
116 while i < len {
117 if chars[i] == '`' {
118 let close_start = i;
119 let mut close_count = 0;
120 while i < len && chars[i] == '`' {
121 close_count += 1;
122 i += 1;
123 }
124 if close_count == backtick_count {
125 for item in mask.iter_mut().take(close_start).skip(content_start) {
127 *item = true;
128 }
129 for item in mask.iter_mut().take(content_start).skip(open_start) {
131 *item = true;
132 }
133 for item in mask.iter_mut().take(i).skip(close_start) {
134 *item = true;
135 }
136 found_close = true;
137 break;
138 }
139 } else {
140 i += 1;
141 }
142 }
143
144 if !found_close {
145 i = open_start + backtick_count;
147 }
148 } else {
149 i += 1;
150 }
151 }
152
153 mask
154}
155
156fn is_sentence_boundary(
160 text: &str,
161 pos: usize,
162 abbreviations: &HashSet<String>,
163 require_sentence_capital: bool,
164) -> bool {
165 let chars: Vec<char> = text.chars().collect();
166
167 if pos + 1 >= chars.len() {
168 return false;
169 }
170
171 let c = chars[pos];
172 let next_char = chars[pos + 1];
173
174 if is_cjk_sentence_ending(c) {
177 let mut after_punct_pos = pos + 1;
179 while after_punct_pos < chars.len()
180 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
181 {
182 after_punct_pos += 1;
183 }
184
185 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
187 after_punct_pos += 1;
188 }
189
190 if after_punct_pos >= chars.len() {
192 return false;
193 }
194
195 while after_punct_pos < chars.len()
197 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
198 {
199 after_punct_pos += 1;
200 }
201
202 if after_punct_pos >= chars.len() {
203 return false;
204 }
205
206 return true;
209 }
210
211 if c != '.' && c != '!' && c != '?' {
213 return false;
214 }
215
216 let (_space_pos, after_space_pos) = if next_char == ' ' {
218 (pos + 1, pos + 2)
220 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
221 if chars[pos + 2] == ' ' {
223 (pos + 2, pos + 3)
225 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
226 (pos + 3, pos + 4)
228 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
229 && pos + 4 < chars.len()
230 && chars[pos + 3] == chars[pos + 2]
231 && chars[pos + 4] == ' '
232 {
233 (pos + 4, pos + 5)
235 } else {
236 return false;
237 }
238 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
239 (pos + 2, pos + 3)
241 } else if (next_char == '*' || next_char == '_')
242 && pos + 3 < chars.len()
243 && chars[pos + 2] == next_char
244 && chars[pos + 3] == ' '
245 {
246 (pos + 3, pos + 4)
248 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
249 (pos + 3, pos + 4)
251 } else {
252 return false;
253 };
254
255 let mut next_char_pos = after_space_pos;
257 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
258 next_char_pos += 1;
259 }
260
261 if next_char_pos >= chars.len() {
263 return false;
264 }
265
266 let mut first_letter_pos = next_char_pos;
268 while first_letter_pos < chars.len()
269 && (chars[first_letter_pos] == '*'
270 || chars[first_letter_pos] == '_'
271 || chars[first_letter_pos] == '~'
272 || is_opening_quote(chars[first_letter_pos]))
273 {
274 first_letter_pos += 1;
275 }
276
277 if first_letter_pos >= chars.len() {
279 return false;
280 }
281
282 let first_char = chars[first_letter_pos];
283
284 if c == '!' || c == '?' {
286 return true;
287 }
288
289 if pos > 0 {
293 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
295 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
296 return false;
297 }
298
299 if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
301 return false;
302 }
303
304 if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
308 return false;
309 }
310 }
311
312 if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
315 return false;
316 }
317
318 true
319}
320
321pub fn split_into_sentences(text: &str) -> Vec<String> {
323 split_into_sentences_custom(text, &None)
324}
325
326pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
328 let abbreviations = get_abbreviations(custom_abbreviations);
329 split_into_sentences_with_set(text, &abbreviations, true)
330}
331
332fn split_into_sentences_with_set(
335 text: &str,
336 abbreviations: &HashSet<String>,
337 require_sentence_capital: bool,
338) -> Vec<String> {
339 let in_code = compute_inline_code_mask(text);
341
342 let mut sentences = Vec::new();
343 let mut current_sentence = String::new();
344 let mut chars = text.chars().peekable();
345 let mut pos = 0;
346
347 while let Some(c) = chars.next() {
348 current_sentence.push(c);
349
350 if !in_code[pos] && is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
351 while let Some(&next) = chars.peek() {
353 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
354 current_sentence.push(chars.next().unwrap());
355 pos += 1;
356 } else {
357 break;
358 }
359 }
360
361 if chars.peek() == Some(&' ') {
363 chars.next();
364 pos += 1;
365 }
366
367 sentences.push(current_sentence.trim().to_string());
368 current_sentence.clear();
369 }
370
371 pos += 1;
372 }
373
374 if !current_sentence.trim().is_empty() {
376 sentences.push(current_sentence.trim().to_string());
377 }
378 sentences
379}
380
381fn is_horizontal_rule(line: &str) -> bool {
383 if line.len() < 3 {
384 return false;
385 }
386
387 let chars: Vec<char> = line.chars().collect();
389 if chars.is_empty() {
390 return false;
391 }
392
393 let first_char = chars[0];
394 if first_char != '-' && first_char != '_' && first_char != '*' {
395 return false;
396 }
397
398 for c in &chars {
400 if *c != first_char && *c != ' ' {
401 return false;
402 }
403 }
404
405 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
407 non_space_count >= 3
408}
409
410fn is_numbered_list_item(line: &str) -> bool {
412 let mut chars = line.chars();
413
414 if !chars.next().is_some_and(char::is_numeric) {
416 return false;
417 }
418
419 while let Some(c) = chars.next() {
421 if c == '.' {
422 return chars.next() == Some(' ');
425 }
426 if !c.is_numeric() {
427 return false;
428 }
429 }
430
431 false
432}
433
434fn is_unordered_list_marker(s: &str) -> bool {
436 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
437 && !is_horizontal_rule(s)
438 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
439}
440
441fn is_block_boundary_core(trimmed: &str) -> bool {
444 trimmed.is_empty()
445 || trimmed.starts_with('#')
446 || trimmed.starts_with("```")
447 || trimmed.starts_with("~~~")
448 || trimmed.starts_with('>')
449 || (trimmed.starts_with('[') && trimmed.contains("]:"))
450 || is_horizontal_rule(trimmed)
451 || is_unordered_list_marker(trimmed)
452 || is_numbered_list_item(trimmed)
453 || is_definition_list_item(trimmed)
454 || trimmed.starts_with(":::")
455}
456
457fn is_block_boundary(trimmed: &str) -> bool {
460 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
461}
462
463fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
467 is_block_boundary_core(trimmed)
468 || calculate_indentation_width_default(line) >= 4
469 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
470}
471
472fn has_hard_break(line: &str) -> bool {
478 let line = line.strip_suffix('\r').unwrap_or(line);
479 line.ends_with(" ") || line.ends_with('\\')
480}
481
482fn ends_with_sentence_punct(text: &str) -> bool {
484 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
485}
486
487fn trim_preserving_hard_break(s: &str) -> String {
493 let s = s.strip_suffix('\r').unwrap_or(s);
495
496 if s.ends_with('\\') {
498 return s.to_string();
500 }
501
502 if s.ends_with(" ") {
504 let content_end = s.trim_end().len();
506 if content_end == 0 {
507 return String::new();
509 }
510 format!("{} ", &s[..content_end])
512 } else {
513 s.trim_end().to_string()
515 }
516}
517
518fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
520 if options.attr_lists {
521 parse_markdown_elements_with_attr_lists(text)
522 } else {
523 parse_markdown_elements(text)
524 }
525}
526
527pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
528 if options.sentence_per_line {
530 let elements = parse_elements(line, options);
531 return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
532 }
533
534 if options.semantic_line_breaks {
536 let elements = parse_elements(line, options);
537 return reflow_elements_semantic(&elements, options);
538 }
539
540 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
543 return vec![line.to_string()];
544 }
545
546 let elements = parse_elements(line, options);
548
549 reflow_elements(&elements, options)
551}
552
553#[derive(Debug, Clone)]
555enum LinkedImageSource {
556 Inline(String),
558 Reference(String),
560}
561
562#[derive(Debug, Clone)]
564enum LinkedImageTarget {
565 Inline(String),
567 Reference(String),
569}
570
571#[derive(Debug, Clone)]
573enum Element {
574 Text(String),
576 Link { text: String, url: String },
578 ReferenceLink { text: String, reference: String },
580 EmptyReferenceLink { text: String },
582 ShortcutReference { reference: String },
584 InlineImage { alt: String, url: String },
586 ReferenceImage { alt: String, reference: String },
588 EmptyReferenceImage { alt: String },
590 LinkedImage {
596 alt: String,
597 img_source: LinkedImageSource,
598 link_target: LinkedImageTarget,
599 },
600 FootnoteReference { note: String },
602 Strikethrough(String),
604 WikiLink(String),
606 InlineMath(String),
608 DisplayMath(String),
610 EmojiShortcode(String),
612 Autolink(String),
614 HtmlTag(String),
616 HtmlEntity(String),
618 HugoShortcode(String),
620 AttrList(String),
622 Code(String),
624 Bold {
626 content: String,
627 underscore: bool,
629 },
630 Italic {
632 content: String,
633 underscore: bool,
635 },
636}
637
638impl std::fmt::Display for Element {
639 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
640 match self {
641 Element::Text(s) => write!(f, "{s}"),
642 Element::Link { text, url } => write!(f, "[{text}]({url})"),
643 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
644 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
645 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
646 Element::InlineImage { alt, url } => write!(f, ""),
647 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
648 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
649 Element::LinkedImage {
650 alt,
651 img_source,
652 link_target,
653 } => {
654 let img_part = match img_source {
656 LinkedImageSource::Inline(url) => format!(""),
657 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
658 };
659 match link_target {
661 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
662 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
663 }
664 }
665 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
666 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
667 Element::WikiLink(s) => write!(f, "[[{s}]]"),
668 Element::InlineMath(s) => write!(f, "${s}$"),
669 Element::DisplayMath(s) => write!(f, "$${s}$$"),
670 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
671 Element::Autolink(s) => write!(f, "{s}"),
672 Element::HtmlTag(s) => write!(f, "{s}"),
673 Element::HtmlEntity(s) => write!(f, "{s}"),
674 Element::HugoShortcode(s) => write!(f, "{s}"),
675 Element::AttrList(s) => write!(f, "{s}"),
676 Element::Code(s) => write!(f, "`{s}`"),
677 Element::Bold { content, underscore } => {
678 if *underscore {
679 write!(f, "__{content}__")
680 } else {
681 write!(f, "**{content}**")
682 }
683 }
684 Element::Italic { content, underscore } => {
685 if *underscore {
686 write!(f, "_{content}_")
687 } else {
688 write!(f, "*{content}*")
689 }
690 }
691 }
692 }
693}
694
695impl Element {
696 fn display_width(&self, mode: ReflowLengthMode) -> usize {
700 let formatted = format!("{self}");
701 display_len(&formatted, mode)
702 }
703}
704
705#[derive(Debug, Clone)]
707struct EmphasisSpan {
708 start: usize,
710 end: usize,
712 content: String,
714 is_strong: bool,
716 is_strikethrough: bool,
718 uses_underscore: bool,
720}
721
722fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
732 let mut spans = Vec::new();
733 let mut options = Options::empty();
734 options.insert(Options::ENABLE_STRIKETHROUGH);
735
736 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
739 let mut strikethrough_stack: Vec<usize> = Vec::new();
740
741 let parser = Parser::new_ext(text, options).into_offset_iter();
742
743 for (event, range) in parser {
744 match event {
745 Event::Start(Tag::Emphasis) => {
746 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
748 emphasis_stack.push((range.start, uses_underscore));
749 }
750 Event::End(TagEnd::Emphasis) => {
751 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
752 let content_start = start_byte + 1;
754 let content_end = range.end - 1;
755 if content_end > content_start
756 && let Some(content) = text.get(content_start..content_end)
757 {
758 spans.push(EmphasisSpan {
759 start: start_byte,
760 end: range.end,
761 content: content.to_string(),
762 is_strong: false,
763 is_strikethrough: false,
764 uses_underscore,
765 });
766 }
767 }
768 }
769 Event::Start(Tag::Strong) => {
770 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
772 strong_stack.push((range.start, uses_underscore));
773 }
774 Event::End(TagEnd::Strong) => {
775 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
776 let content_start = start_byte + 2;
778 let content_end = range.end - 2;
779 if content_end > content_start
780 && let Some(content) = text.get(content_start..content_end)
781 {
782 spans.push(EmphasisSpan {
783 start: start_byte,
784 end: range.end,
785 content: content.to_string(),
786 is_strong: true,
787 is_strikethrough: false,
788 uses_underscore,
789 });
790 }
791 }
792 }
793 Event::Start(Tag::Strikethrough) => {
794 strikethrough_stack.push(range.start);
795 }
796 Event::End(TagEnd::Strikethrough) => {
797 if let Some(start_byte) = strikethrough_stack.pop() {
798 let content_start = start_byte + 2;
800 let content_end = range.end - 2;
801 if content_end > content_start
802 && let Some(content) = text.get(content_start..content_end)
803 {
804 spans.push(EmphasisSpan {
805 start: start_byte,
806 end: range.end,
807 content: content.to_string(),
808 is_strong: false,
809 is_strikethrough: true,
810 uses_underscore: false,
811 });
812 }
813 }
814 }
815 _ => {}
816 }
817 }
818
819 spans.sort_by_key(|s| s.start);
821 spans
822}
823
824fn parse_markdown_elements(text: &str) -> Vec<Element> {
835 parse_markdown_elements_inner(text, false)
836}
837
838fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
839 parse_markdown_elements_inner(text, true)
840}
841
842fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
843 let mut elements = Vec::new();
844 let mut remaining = text;
845
846 let emphasis_spans = extract_emphasis_spans(text);
848
849 while !remaining.is_empty() {
850 let current_offset = text.len() - remaining.len();
852 let mut earliest_match: Option<(usize, usize, &str)> = None;
855
856 if remaining.contains("[!") {
860 if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
862 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
863 {
864 earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
865 }
866
867 if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
869 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
870 {
871 earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
872 }
873
874 if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
876 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
877 {
878 earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
879 }
880
881 if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
883 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
884 {
885 earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
886 }
887 }
888
889 if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
892 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
893 {
894 earliest_match = Some((m.start(), m.end(), "inline_image"));
895 }
896
897 if let Some(m) = REF_IMAGE_REGEX.find(remaining)
899 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
900 {
901 earliest_match = Some((m.start(), m.end(), "ref_image"));
902 }
903
904 if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
906 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
907 {
908 earliest_match = Some((m.start(), m.end(), "footnote_ref"));
909 }
910
911 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
913 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
914 {
915 earliest_match = Some((m.start(), m.end(), "inline_link"));
916 }
917
918 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
920 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
921 {
922 earliest_match = Some((m.start(), m.end(), "ref_link"));
923 }
924
925 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
928 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
929 {
930 earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
931 }
932
933 if let Some(m) = WIKI_LINK_REGEX.find(remaining)
935 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
936 {
937 earliest_match = Some((m.start(), m.end(), "wiki_link"));
938 }
939
940 if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
942 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
943 {
944 earliest_match = Some((m.start(), m.end(), "display_math"));
945 }
946
947 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
949 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
950 {
951 earliest_match = Some((m.start(), m.end(), "inline_math"));
952 }
953
954 if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
958 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
959 {
960 earliest_match = Some((m.start(), m.end(), "emoji"));
961 }
962
963 if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
965 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
966 {
967 earliest_match = Some((m.start(), m.end(), "html_entity"));
968 }
969
970 if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
973 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
974 {
975 earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
976 }
977
978 if let Some(m) = HTML_TAG_PATTERN.find(remaining)
981 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
982 {
983 let matched_text = &remaining[m.start()..m.end()];
985 let is_url_autolink = matched_text.starts_with("<http://")
986 || matched_text.starts_with("<https://")
987 || matched_text.starts_with("<mailto:")
988 || matched_text.starts_with("<ftp://")
989 || matched_text.starts_with("<ftps://");
990
991 let is_email_autolink = {
994 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
995 EMAIL_PATTERN.is_match(content)
996 };
997
998 if is_url_autolink || is_email_autolink {
999 earliest_match = Some((m.start(), m.end(), "autolink"));
1000 } else {
1001 earliest_match = Some((m.start(), m.end(), "html_tag"));
1002 }
1003 }
1004
1005 let mut next_special = remaining.len();
1007 let mut special_type = "";
1008 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
1009 let mut attr_list_len: usize = 0;
1010
1011 if let Some(pos) = remaining.find('`')
1013 && pos < next_special
1014 {
1015 next_special = pos;
1016 special_type = "code";
1017 }
1018
1019 if attr_lists
1021 && let Some(pos) = remaining.find('{')
1022 && pos < next_special
1023 && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
1024 && m.start() == 0
1025 {
1026 next_special = pos;
1027 special_type = "attr_list";
1028 attr_list_len = m.end();
1029 }
1030
1031 for span in &emphasis_spans {
1034 if span.start >= current_offset && span.start < current_offset + remaining.len() {
1035 let pos_in_remaining = span.start - current_offset;
1036 if pos_in_remaining < next_special {
1037 next_special = pos_in_remaining;
1038 special_type = "pulldown_emphasis";
1039 pulldown_emphasis = Some(span);
1040 }
1041 break; }
1043 }
1044
1045 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
1047 pos < next_special
1048 } else {
1049 false
1050 };
1051
1052 if should_process_markdown_link {
1053 let (pos, match_end, pattern_type) = earliest_match.unwrap();
1054
1055 if pos > 0 {
1057 elements.push(Element::Text(remaining[..pos].to_string()));
1058 }
1059
1060 match pattern_type {
1062 "linked_image_ii" => {
1064 if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
1065 let alt = caps.get(1).map_or("", |m| m.as_str());
1066 let img_url = caps.get(2).map_or("", |m| m.as_str());
1067 let link_url = caps.get(3).map_or("", |m| m.as_str());
1068 elements.push(Element::LinkedImage {
1069 alt: alt.to_string(),
1070 img_source: LinkedImageSource::Inline(img_url.to_string()),
1071 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1072 });
1073 remaining = &remaining[match_end..];
1074 } else {
1075 elements.push(Element::Text("[".to_string()));
1076 remaining = &remaining[1..];
1077 }
1078 }
1079 "linked_image_ri" => {
1081 if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
1082 let alt = caps.get(1).map_or("", |m| m.as_str());
1083 let img_ref = caps.get(2).map_or("", |m| m.as_str());
1084 let link_url = caps.get(3).map_or("", |m| m.as_str());
1085 elements.push(Element::LinkedImage {
1086 alt: alt.to_string(),
1087 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1088 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1089 });
1090 remaining = &remaining[match_end..];
1091 } else {
1092 elements.push(Element::Text("[".to_string()));
1093 remaining = &remaining[1..];
1094 }
1095 }
1096 "linked_image_ir" => {
1098 if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1099 let alt = caps.get(1).map_or("", |m| m.as_str());
1100 let img_url = caps.get(2).map_or("", |m| m.as_str());
1101 let link_ref = caps.get(3).map_or("", |m| m.as_str());
1102 elements.push(Element::LinkedImage {
1103 alt: alt.to_string(),
1104 img_source: LinkedImageSource::Inline(img_url.to_string()),
1105 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1106 });
1107 remaining = &remaining[match_end..];
1108 } else {
1109 elements.push(Element::Text("[".to_string()));
1110 remaining = &remaining[1..];
1111 }
1112 }
1113 "linked_image_rr" => {
1115 if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
1116 let alt = caps.get(1).map_or("", |m| m.as_str());
1117 let img_ref = caps.get(2).map_or("", |m| m.as_str());
1118 let link_ref = caps.get(3).map_or("", |m| m.as_str());
1119 elements.push(Element::LinkedImage {
1120 alt: alt.to_string(),
1121 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1122 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1123 });
1124 remaining = &remaining[match_end..];
1125 } else {
1126 elements.push(Element::Text("[".to_string()));
1127 remaining = &remaining[1..];
1128 }
1129 }
1130 "inline_image" => {
1131 if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
1132 let alt = caps.get(1).map_or("", |m| m.as_str());
1133 let url = caps.get(2).map_or("", |m| m.as_str());
1134 elements.push(Element::InlineImage {
1135 alt: alt.to_string(),
1136 url: url.to_string(),
1137 });
1138 remaining = &remaining[match_end..];
1139 } else {
1140 elements.push(Element::Text("!".to_string()));
1141 remaining = &remaining[1..];
1142 }
1143 }
1144 "ref_image" => {
1145 if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
1146 let alt = caps.get(1).map_or("", |m| m.as_str());
1147 let reference = caps.get(2).map_or("", |m| m.as_str());
1148
1149 if reference.is_empty() {
1150 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1151 } else {
1152 elements.push(Element::ReferenceImage {
1153 alt: alt.to_string(),
1154 reference: reference.to_string(),
1155 });
1156 }
1157 remaining = &remaining[match_end..];
1158 } else {
1159 elements.push(Element::Text("!".to_string()));
1160 remaining = &remaining[1..];
1161 }
1162 }
1163 "footnote_ref" => {
1164 if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
1165 let note = caps.get(1).map_or("", |m| m.as_str());
1166 elements.push(Element::FootnoteReference { note: note.to_string() });
1167 remaining = &remaining[match_end..];
1168 } else {
1169 elements.push(Element::Text("[".to_string()));
1170 remaining = &remaining[1..];
1171 }
1172 }
1173 "inline_link" => {
1174 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1175 let text = caps.get(1).map_or("", |m| m.as_str());
1176 let url = caps.get(2).map_or("", |m| m.as_str());
1177 elements.push(Element::Link {
1178 text: text.to_string(),
1179 url: url.to_string(),
1180 });
1181 remaining = &remaining[match_end..];
1182 } else {
1183 elements.push(Element::Text("[".to_string()));
1185 remaining = &remaining[1..];
1186 }
1187 }
1188 "ref_link" => {
1189 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1190 let text = caps.get(1).map_or("", |m| m.as_str());
1191 let reference = caps.get(2).map_or("", |m| m.as_str());
1192
1193 if reference.is_empty() {
1194 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1196 } else {
1197 elements.push(Element::ReferenceLink {
1199 text: text.to_string(),
1200 reference: reference.to_string(),
1201 });
1202 }
1203 remaining = &remaining[match_end..];
1204 } else {
1205 elements.push(Element::Text("[".to_string()));
1207 remaining = &remaining[1..];
1208 }
1209 }
1210 "shortcut_ref" => {
1211 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1212 let reference = caps.get(1).map_or("", |m| m.as_str());
1213 elements.push(Element::ShortcutReference {
1214 reference: reference.to_string(),
1215 });
1216 remaining = &remaining[match_end..];
1217 } else {
1218 elements.push(Element::Text("[".to_string()));
1220 remaining = &remaining[1..];
1221 }
1222 }
1223 "wiki_link" => {
1224 if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
1225 let content = caps.get(1).map_or("", |m| m.as_str());
1226 elements.push(Element::WikiLink(content.to_string()));
1227 remaining = &remaining[match_end..];
1228 } else {
1229 elements.push(Element::Text("[[".to_string()));
1230 remaining = &remaining[2..];
1231 }
1232 }
1233 "display_math" => {
1234 if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
1235 let math = caps.get(1).map_or("", |m| m.as_str());
1236 elements.push(Element::DisplayMath(math.to_string()));
1237 remaining = &remaining[match_end..];
1238 } else {
1239 elements.push(Element::Text("$$".to_string()));
1240 remaining = &remaining[2..];
1241 }
1242 }
1243 "inline_math" => {
1244 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1245 let math = caps.get(1).map_or("", |m| m.as_str());
1246 elements.push(Element::InlineMath(math.to_string()));
1247 remaining = &remaining[match_end..];
1248 } else {
1249 elements.push(Element::Text("$".to_string()));
1250 remaining = &remaining[1..];
1251 }
1252 }
1253 "emoji" => {
1255 if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1256 let emoji = caps.get(1).map_or("", |m| m.as_str());
1257 elements.push(Element::EmojiShortcode(emoji.to_string()));
1258 remaining = &remaining[match_end..];
1259 } else {
1260 elements.push(Element::Text(":".to_string()));
1261 remaining = &remaining[1..];
1262 }
1263 }
1264 "html_entity" => {
1265 elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
1267 remaining = &remaining[match_end..];
1268 }
1269 "hugo_shortcode" => {
1270 elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
1272 remaining = &remaining[match_end..];
1273 }
1274 "autolink" => {
1275 elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
1277 remaining = &remaining[match_end..];
1278 }
1279 "html_tag" => {
1280 elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
1282 remaining = &remaining[match_end..];
1283 }
1284 _ => {
1285 elements.push(Element::Text("[".to_string()));
1287 remaining = &remaining[1..];
1288 }
1289 }
1290 } else {
1291 if next_special > 0 && next_special < remaining.len() {
1295 elements.push(Element::Text(remaining[..next_special].to_string()));
1296 remaining = &remaining[next_special..];
1297 }
1298
1299 match special_type {
1301 "code" => {
1302 if let Some(code_end) = remaining[1..].find('`') {
1304 let code = &remaining[1..=code_end];
1305 elements.push(Element::Code(code.to_string()));
1306 remaining = &remaining[1 + code_end + 1..];
1307 } else {
1308 elements.push(Element::Text(remaining.to_string()));
1310 break;
1311 }
1312 }
1313 "attr_list" => {
1314 elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1315 remaining = &remaining[attr_list_len..];
1316 }
1317 "pulldown_emphasis" => {
1318 if let Some(span) = pulldown_emphasis {
1320 let span_len = span.end - span.start;
1321 if span.is_strikethrough {
1322 elements.push(Element::Strikethrough(span.content.clone()));
1323 } else if span.is_strong {
1324 elements.push(Element::Bold {
1325 content: span.content.clone(),
1326 underscore: span.uses_underscore,
1327 });
1328 } else {
1329 elements.push(Element::Italic {
1330 content: span.content.clone(),
1331 underscore: span.uses_underscore,
1332 });
1333 }
1334 remaining = &remaining[span_len..];
1335 } else {
1336 elements.push(Element::Text(remaining[..1].to_string()));
1338 remaining = &remaining[1..];
1339 }
1340 }
1341 _ => {
1342 elements.push(Element::Text(remaining.to_string()));
1344 break;
1345 }
1346 }
1347 }
1348 }
1349
1350 elements
1351}
1352
1353fn reflow_elements_sentence_per_line(
1355 elements: &[Element],
1356 custom_abbreviations: &Option<Vec<String>>,
1357 require_sentence_capital: bool,
1358) -> Vec<String> {
1359 let abbreviations = get_abbreviations(custom_abbreviations);
1360 let mut lines = Vec::new();
1361 let mut current_line = String::new();
1362
1363 for (idx, element) in elements.iter().enumerate() {
1364 let element_str = format!("{element}");
1365
1366 if let Element::Text(text) = element {
1368 let combined = format!("{current_line}{text}");
1370 let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
1372
1373 if sentences.len() > 1 {
1374 for (i, sentence) in sentences.iter().enumerate() {
1376 if i == 0 {
1377 let trimmed = sentence.trim();
1380
1381 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1382 current_line.clone_from(sentence);
1384 } else {
1385 lines.push(sentence.clone());
1387 current_line.clear();
1388 }
1389 } else if i == sentences.len() - 1 {
1390 let trimmed = sentence.trim();
1392 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1393
1394 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1395 lines.push(sentence.clone());
1397 current_line.clear();
1398 } else {
1399 current_line.clone_from(sentence);
1401 }
1402 } else {
1403 lines.push(sentence.clone());
1405 }
1406 }
1407 } else {
1408 let trimmed = combined.trim();
1410
1411 if trimmed.is_empty() {
1415 continue;
1416 }
1417
1418 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1419
1420 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1421 lines.push(trimmed.to_string());
1423 current_line.clear();
1424 } else {
1425 current_line = combined;
1427 }
1428 }
1429 } else if let Element::Italic { content, underscore } = element {
1430 let marker = if *underscore { "_" } else { "*" };
1432 handle_emphasis_sentence_split(
1433 content,
1434 marker,
1435 &abbreviations,
1436 require_sentence_capital,
1437 &mut current_line,
1438 &mut lines,
1439 );
1440 } else if let Element::Bold { content, underscore } = element {
1441 let marker = if *underscore { "__" } else { "**" };
1443 handle_emphasis_sentence_split(
1444 content,
1445 marker,
1446 &abbreviations,
1447 require_sentence_capital,
1448 &mut current_line,
1449 &mut lines,
1450 );
1451 } else if let Element::Strikethrough(content) = element {
1452 handle_emphasis_sentence_split(
1454 content,
1455 "~~",
1456 &abbreviations,
1457 require_sentence_capital,
1458 &mut current_line,
1459 &mut lines,
1460 );
1461 } else {
1462 let is_adjacent = if idx > 0 {
1465 match &elements[idx - 1] {
1466 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1467 _ => true,
1468 }
1469 } else {
1470 false
1471 };
1472
1473 if !is_adjacent
1475 && !current_line.is_empty()
1476 && !current_line.ends_with(' ')
1477 && !current_line.ends_with('(')
1478 && !current_line.ends_with('[')
1479 {
1480 current_line.push(' ');
1481 }
1482 current_line.push_str(&element_str);
1483 }
1484 }
1485
1486 if !current_line.is_empty() {
1488 lines.push(current_line.trim().to_string());
1489 }
1490 lines
1491}
1492
1493fn handle_emphasis_sentence_split(
1495 content: &str,
1496 marker: &str,
1497 abbreviations: &HashSet<String>,
1498 require_sentence_capital: bool,
1499 current_line: &mut String,
1500 lines: &mut Vec<String>,
1501) {
1502 let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
1504
1505 if sentences.len() <= 1 {
1506 if !current_line.is_empty()
1508 && !current_line.ends_with(' ')
1509 && !current_line.ends_with('(')
1510 && !current_line.ends_with('[')
1511 {
1512 current_line.push(' ');
1513 }
1514 current_line.push_str(marker);
1515 current_line.push_str(content);
1516 current_line.push_str(marker);
1517
1518 let trimmed = content.trim();
1520 let ends_with_punct = ends_with_sentence_punct(trimmed);
1521 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1522 lines.push(current_line.clone());
1523 current_line.clear();
1524 }
1525 } else {
1526 for (i, sentence) in sentences.iter().enumerate() {
1528 let trimmed = sentence.trim();
1529 if trimmed.is_empty() {
1530 continue;
1531 }
1532
1533 if i == 0 {
1534 if !current_line.is_empty()
1536 && !current_line.ends_with(' ')
1537 && !current_line.ends_with('(')
1538 && !current_line.ends_with('[')
1539 {
1540 current_line.push(' ');
1541 }
1542 current_line.push_str(marker);
1543 current_line.push_str(trimmed);
1544 current_line.push_str(marker);
1545
1546 let ends_with_punct = ends_with_sentence_punct(trimmed);
1548 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1549 lines.push(current_line.clone());
1550 current_line.clear();
1551 }
1552 } else if i == sentences.len() - 1 {
1553 let ends_with_punct = ends_with_sentence_punct(trimmed);
1555
1556 let mut line = String::new();
1557 line.push_str(marker);
1558 line.push_str(trimmed);
1559 line.push_str(marker);
1560
1561 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1562 lines.push(line);
1563 } else {
1564 *current_line = line;
1566 }
1567 } else {
1568 let mut line = String::new();
1570 line.push_str(marker);
1571 line.push_str(trimmed);
1572 line.push_str(marker);
1573 lines.push(line);
1574 }
1575 }
1576 }
1577}
1578
1579const BREAK_WORDS: &[&str] = &[
1583 "and",
1584 "or",
1585 "but",
1586 "nor",
1587 "yet",
1588 "so",
1589 "for",
1590 "which",
1591 "that",
1592 "because",
1593 "when",
1594 "if",
1595 "while",
1596 "where",
1597 "although",
1598 "though",
1599 "unless",
1600 "since",
1601 "after",
1602 "before",
1603 "until",
1604 "as",
1605 "once",
1606 "whether",
1607 "however",
1608 "therefore",
1609 "moreover",
1610 "furthermore",
1611 "nevertheless",
1612 "whereas",
1613];
1614
1615fn is_clause_punctuation(c: char) -> bool {
1617 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1619
1620fn paren_group_end<'a>(slice: &'a str, element_spans: &[(usize, usize)], offset: usize) -> Option<(usize, &'a str)> {
1634 debug_assert!(slice.starts_with('('));
1635 let mut depth: i32 = 0;
1636 for (local_byte, c) in slice.char_indices() {
1637 let global_byte = offset + local_byte;
1638 if depth > 0 && is_inside_element(global_byte, element_spans) {
1643 continue;
1644 }
1645 match c {
1646 '(' => depth += 1,
1647 ')' => {
1648 depth -= 1;
1649 if depth == 0 {
1650 let end = local_byte + 1;
1651 let inner = &slice[1..local_byte];
1652 return Some((end, inner));
1653 }
1654 }
1655 _ => {}
1656 }
1657 }
1658 None
1659}
1660
1661fn split_at_parenthetical(
1678 text: &str,
1679 line_length: usize,
1680 element_spans: &[(usize, usize)],
1681 length_mode: ReflowLengthMode,
1682) -> Option<(String, String)> {
1683 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1684
1685 if text.starts_with('(')
1687 && let Some((end_local, inner)) = paren_group_end(text, element_spans, 0)
1688 && inner.contains(' ')
1689 {
1690 let tail = &text[end_local..];
1694 let attached_len = tail
1695 .char_indices()
1696 .take_while(|(_, c)| is_closing_quote(*c) || is_clause_punctuation(*c))
1697 .last()
1698 .map_or(0, |(idx, c)| idx + c.len_utf8());
1699 let first_end = end_local + attached_len;
1700 let rest_start = first_end;
1701 let first = &text[..first_end];
1702 let first_len = display_len(first, length_mode);
1703 if first_len <= line_length {
1706 let rest = text[rest_start..].trim_start();
1707 if !rest.is_empty() {
1708 return Some((first.to_string(), rest.to_string()));
1709 }
1710 }
1711 }
1712
1713 let mut best_open_byte: Option<usize> = None;
1715 let mut pos = 0usize;
1716 while pos < text.len() {
1717 if text.as_bytes()[pos] != b'(' {
1719 let c = text[pos..].chars().next().unwrap();
1720 pos += c.len_utf8();
1721 continue;
1722 }
1723 if is_inside_element(pos, element_spans) {
1725 pos += 1;
1726 continue;
1727 }
1728 if let Some((end_local, inner)) = paren_group_end(&text[pos..], element_spans, pos) {
1729 let first = text[..pos].trim_end();
1730 let first_len = display_len(first, length_mode);
1731 if !first.is_empty()
1732 && first_len >= min_first_len
1733 && first_len <= line_length
1734 && inner.contains(' ')
1735 && best_open_byte.is_none_or(|prev| pos > prev)
1736 {
1737 best_open_byte = Some(pos);
1738 }
1739 pos += end_local;
1740 } else {
1741 pos += 1;
1742 }
1743 }
1744
1745 let open_byte = best_open_byte?;
1746 let first = text[..open_byte].trim_end().to_string();
1747 let rest = text[open_byte..].to_string();
1748 if first.is_empty() || rest.trim().is_empty() {
1749 return None;
1750 }
1751 Some((first, rest))
1752}
1753
1754fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1758 let mut spans = Vec::new();
1759 let mut offset = 0;
1760 for element in elements {
1761 let rendered = format!("{element}");
1762 let len = rendered.len();
1763 if !matches!(element, Element::Text(_)) {
1764 spans.push((offset, offset + len));
1765 }
1766 offset += len;
1767 }
1768 spans
1769}
1770
1771fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1773 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1774}
1775
1776const MIN_SPLIT_RATIO: f64 = 0.3;
1779
1780fn split_at_clause_punctuation(
1784 text: &str,
1785 line_length: usize,
1786 element_spans: &[(usize, usize)],
1787 length_mode: ReflowLengthMode,
1788) -> Option<(String, String)> {
1789 let chars: Vec<char> = text.chars().collect();
1790 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1791
1792 let mut width_acc = 0;
1794 let mut search_end_char = 0;
1795 for (idx, &c) in chars.iter().enumerate() {
1796 let c_width = display_len(&c.to_string(), length_mode);
1797 if width_acc + c_width > line_length {
1798 break;
1799 }
1800 width_acc += c_width;
1801 search_end_char = idx + 1;
1802 }
1803
1804 let mut paren_depth: i32 = 0;
1811 let mut best_pos = None;
1812 for i in (0..search_end_char).rev() {
1813 let byte_start: usize = chars[..i].iter().map(|c| c.len_utf8()).sum();
1815 let byte_after: usize = byte_start + chars[i].len_utf8();
1817
1818 if !is_inside_element(byte_start, element_spans) {
1819 match chars[i] {
1820 ')' => paren_depth += 1,
1821 '(' => paren_depth = paren_depth.saturating_sub(1),
1822 _ => {}
1823 }
1824 }
1825
1826 if paren_depth == 0 && is_clause_punctuation(chars[i]) && !is_inside_element(byte_after, element_spans) {
1827 best_pos = Some(i);
1828 break;
1829 }
1830 }
1831
1832 let pos = best_pos?;
1833
1834 let first: String = chars[..=pos].iter().collect();
1836 let first_display_len = display_len(&first, length_mode);
1837 if first_display_len < min_first_len {
1838 return None;
1839 }
1840
1841 let rest: String = chars[pos + 1..].iter().collect();
1843 let rest = rest.trim_start().to_string();
1844
1845 if rest.is_empty() {
1846 return None;
1847 }
1848
1849 Some((first, rest))
1850}
1851
1852fn paren_depth_map(text: &str, element_spans: &[(usize, usize)]) -> Vec<i32> {
1859 let mut map = vec![0i32; text.len()];
1860 let mut depth = 0i32;
1861 for (byte, c) in text.char_indices() {
1862 if !is_inside_element(byte, element_spans) {
1863 match c {
1864 '(' => depth += 1,
1865 ')' => depth = depth.saturating_sub(1),
1866 _ => {}
1867 }
1868 }
1869 let end = (byte + c.len_utf8()).min(map.len());
1871 for slot in &mut map[byte..end] {
1872 *slot = depth;
1873 }
1874 }
1875 map
1876}
1877
1878fn is_standalone_parenthetical(line: &str) -> bool {
1887 let trimmed = line.trim();
1888 if !trimmed.starts_with('(') {
1889 return false;
1890 }
1891 let core = trimmed.trim_end_matches(|c: char| is_clause_punctuation(c));
1893 if !core.ends_with(')') {
1894 return false;
1895 }
1896 let inner = &core[1..core.len() - 1];
1898 if !inner.contains(' ') {
1899 return false;
1900 }
1901 let mut depth = 0i32;
1903 for c in core.chars() {
1904 match c {
1905 '(' => depth += 1,
1906 ')' => depth -= 1,
1907 _ => {}
1908 }
1909 if depth < 0 {
1910 return false;
1911 }
1912 }
1913 depth == 0
1914}
1915
1916fn split_at_break_word(
1920 text: &str,
1921 line_length: usize,
1922 element_spans: &[(usize, usize)],
1923 length_mode: ReflowLengthMode,
1924) -> Option<(String, String)> {
1925 let lower = text.to_lowercase();
1926 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1927 let mut best_split: Option<(usize, usize)> = None; let depth_map = paren_depth_map(text, element_spans);
1932
1933 for &word in BREAK_WORDS {
1934 let mut search_start = 0;
1935 while let Some(pos) = lower[search_start..].find(word) {
1936 let abs_pos = search_start + pos;
1937
1938 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1940 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1941
1942 if preceded_by_space && followed_by_space {
1943 let first_part = text[..abs_pos].trim_end();
1945 let first_part_len = display_len(first_part, length_mode);
1946
1947 let inside_paren = depth_map.get(abs_pos).is_some_and(|&d| d > 0);
1949
1950 if first_part_len >= min_first_len
1951 && first_part_len <= line_length
1952 && !is_inside_element(abs_pos, element_spans)
1953 && !inside_paren
1954 {
1955 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1957 best_split = Some((abs_pos, word.len()));
1958 }
1959 }
1960 }
1961
1962 search_start = abs_pos + word.len();
1963 }
1964 }
1965
1966 let (byte_start, _word_len) = best_split?;
1967
1968 let first = text[..byte_start].trim_end().to_string();
1969 let rest = text[byte_start..].to_string();
1970
1971 if first.is_empty() || rest.trim().is_empty() {
1972 return None;
1973 }
1974
1975 Some((first, rest))
1976}
1977
1978fn cascade_split_line(
1981 text: &str,
1982 line_length: usize,
1983 abbreviations: &Option<Vec<String>>,
1984 length_mode: ReflowLengthMode,
1985 attr_lists: bool,
1986) -> Vec<String> {
1987 if line_length == 0 || display_len(text, length_mode) <= line_length {
1988 return vec![text.to_string()];
1989 }
1990
1991 let elements = parse_markdown_elements_inner(text, attr_lists);
1992 let element_spans = compute_element_spans(&elements);
1993
1994 if let Some((first, rest)) = split_at_parenthetical(text, line_length, &element_spans, length_mode) {
1997 let mut result = vec![first];
1998 result.extend(cascade_split_line(
1999 &rest,
2000 line_length,
2001 abbreviations,
2002 length_mode,
2003 attr_lists,
2004 ));
2005 return result;
2006 }
2007
2008 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
2010 let mut result = vec![first];
2011 result.extend(cascade_split_line(
2012 &rest,
2013 line_length,
2014 abbreviations,
2015 length_mode,
2016 attr_lists,
2017 ));
2018 return result;
2019 }
2020
2021 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
2023 let mut result = vec![first];
2024 result.extend(cascade_split_line(
2025 &rest,
2026 line_length,
2027 abbreviations,
2028 length_mode,
2029 attr_lists,
2030 ));
2031 return result;
2032 }
2033
2034 let options = ReflowOptions {
2036 line_length,
2037 break_on_sentences: false,
2038 preserve_breaks: false,
2039 sentence_per_line: false,
2040 semantic_line_breaks: false,
2041 abbreviations: abbreviations.clone(),
2042 length_mode,
2043 attr_lists,
2044 require_sentence_capital: true,
2045 max_list_continuation_indent: None,
2046 };
2047 reflow_elements(&elements, &options)
2048}
2049
2050fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2054 let sentence_lines =
2056 reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
2057
2058 if options.line_length == 0 {
2061 return sentence_lines;
2062 }
2063
2064 let length_mode = options.length_mode;
2065 let mut result = Vec::new();
2066 for line in sentence_lines {
2067 if display_len(&line, length_mode) <= options.line_length {
2068 result.push(line);
2069 } else {
2070 result.extend(cascade_split_line(
2071 &line,
2072 options.line_length,
2073 &options.abbreviations,
2074 length_mode,
2075 options.attr_lists,
2076 ));
2077 }
2078 }
2079
2080 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
2083 let mut merged: Vec<String> = Vec::with_capacity(result.len());
2084 for line in result {
2085 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
2086 if is_standalone_parenthetical(&line) {
2089 merged.push(line);
2090 continue;
2091 }
2092
2093 let prev_ends_at_sentence = {
2095 let trimmed = merged.last().unwrap().trim_end();
2096 trimmed
2097 .chars()
2098 .rev()
2099 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
2100 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
2101 };
2102
2103 if !prev_ends_at_sentence {
2104 let prev = merged.last_mut().unwrap();
2105 let combined = format!("{prev} {line}");
2106 if display_len(&combined, length_mode) <= options.line_length {
2108 *prev = combined;
2109 continue;
2110 }
2111 }
2112 }
2113 merged.push(line);
2114 }
2115 merged
2116}
2117
2118fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
2126 line.char_indices()
2127 .rev()
2128 .map(|(pos, _)| pos)
2129 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
2130}
2131
2132fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2134 let mut lines = Vec::new();
2135 let mut current_line = String::new();
2136 let mut current_length = 0;
2137 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
2139 let length_mode = options.length_mode;
2140
2141 for (idx, element) in elements.iter().enumerate() {
2142 let element_str = format!("{element}");
2143 let element_len = element.display_width(length_mode);
2144
2145 let is_adjacent_to_prev = if idx > 0 {
2151 match (&elements[idx - 1], element) {
2152 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
2153 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
2154 _ => true,
2155 }
2156 } else {
2157 false
2158 };
2159
2160 if let Element::Text(text) = element {
2162 let has_leading_space = text.starts_with(char::is_whitespace);
2164 let words: Vec<&str> = text.split_whitespace().collect();
2166
2167 for (i, word) in words.iter().enumerate() {
2168 let word_len = display_len(word, length_mode);
2169 let is_trailing_punct = word
2171 .chars()
2172 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
2173
2174 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
2177
2178 if is_first_adjacent {
2179 if current_length + word_len > options.line_length && current_length > 0 {
2181 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
2184 let before = current_line[..last_space].trim_end().to_string();
2185 let after = current_line[last_space + 1..].to_string();
2186 lines.push(before);
2187 current_line = format!("{after}{word}");
2188 current_length = display_len(¤t_line, length_mode);
2189 current_line_element_spans.clear();
2190 } else {
2191 current_line.push_str(word);
2192 current_length += word_len;
2193 }
2194 } else {
2195 current_line.push_str(word);
2196 current_length += word_len;
2197 }
2198 } else if current_length > 0
2199 && current_length + 1 + word_len > options.line_length
2200 && !is_trailing_punct
2201 {
2202 lines.push(current_line.trim().to_string());
2204 current_line = word.to_string();
2205 current_length = word_len;
2206 current_line_element_spans.clear();
2207 } else {
2208 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
2212 current_line.push(' ');
2213 current_length += 1;
2214 }
2215 current_line.push_str(word);
2216 current_length += word_len;
2217 }
2218 }
2219 } else if matches!(
2220 element,
2221 Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
2222 ) && element_len > options.line_length
2223 {
2224 let (content, marker): (&str, &str) = match element {
2228 Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
2229 Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
2230 Element::Strikethrough(content) => (content.as_str(), "~~"),
2231 _ => unreachable!(),
2232 };
2233
2234 let words: Vec<&str> = content.split_whitespace().collect();
2235 let n = words.len();
2236
2237 if n == 0 {
2238 let full = format!("{marker}{marker}");
2240 let full_len = display_len(&full, length_mode);
2241 if !is_adjacent_to_prev && current_length > 0 {
2242 current_line.push(' ');
2243 current_length += 1;
2244 }
2245 current_line.push_str(&full);
2246 current_length += full_len;
2247 } else {
2248 for (i, word) in words.iter().enumerate() {
2249 let is_first = i == 0;
2250 let is_last = i == n - 1;
2251 let word_str: String = match (is_first, is_last) {
2252 (true, true) => format!("{marker}{word}{marker}"),
2253 (true, false) => format!("{marker}{word}"),
2254 (false, true) => format!("{word}{marker}"),
2255 (false, false) => word.to_string(),
2256 };
2257 let word_len = display_len(&word_str, length_mode);
2258
2259 let needs_space = if is_first {
2260 !is_adjacent_to_prev && current_length > 0
2261 } else {
2262 current_length > 0
2263 };
2264
2265 if needs_space && current_length + 1 + word_len > options.line_length {
2266 lines.push(current_line.trim_end().to_string());
2267 current_line = word_str;
2268 current_length = word_len;
2269 current_line_element_spans.clear();
2270 } else {
2271 if needs_space {
2272 current_line.push(' ');
2273 current_length += 1;
2274 }
2275 current_line.push_str(&word_str);
2276 current_length += word_len;
2277 }
2278 }
2279 }
2280 } else {
2281 if is_adjacent_to_prev {
2285 if current_length + element_len > options.line_length {
2287 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
2290 let before = current_line[..last_space].trim_end().to_string();
2291 let after = current_line[last_space + 1..].to_string();
2292 lines.push(before);
2293 current_line = format!("{after}{element_str}");
2294 current_length = display_len(¤t_line, length_mode);
2295 current_line_element_spans.clear();
2296 let start = after.len();
2298 current_line_element_spans.push((start, start + element_str.len()));
2299 } else {
2300 let start = current_line.len();
2302 current_line.push_str(&element_str);
2303 current_length += element_len;
2304 current_line_element_spans.push((start, current_line.len()));
2305 }
2306 } else {
2307 let start = current_line.len();
2308 current_line.push_str(&element_str);
2309 current_length += element_len;
2310 current_line_element_spans.push((start, current_line.len()));
2311 }
2312 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
2313 lines.push(current_line.trim().to_string());
2315 current_line.clone_from(&element_str);
2316 current_length = element_len;
2317 current_line_element_spans.clear();
2318 current_line_element_spans.push((0, element_str.len()));
2319 } else {
2320 let ends_with_opener =
2322 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
2323 if current_length > 0 && !ends_with_opener {
2324 current_line.push(' ');
2325 current_length += 1;
2326 }
2327 let start = current_line.len();
2328 current_line.push_str(&element_str);
2329 current_length += element_len;
2330 current_line_element_spans.push((start, current_line.len()));
2331 }
2332 }
2333 }
2334
2335 if !current_line.is_empty() {
2337 lines.push(current_line.trim_end().to_string());
2338 }
2339
2340 lines
2341}
2342
2343pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
2345 let lines: Vec<&str> = content.lines().collect();
2346 let mut result = Vec::new();
2347 let mut i = 0;
2348
2349 while i < lines.len() {
2350 let line = lines[i];
2351 let trimmed = line.trim();
2352
2353 if trimmed.is_empty() {
2355 result.push(String::new());
2356 i += 1;
2357 continue;
2358 }
2359
2360 if trimmed.starts_with('#') {
2362 result.push(line.to_string());
2363 i += 1;
2364 continue;
2365 }
2366
2367 if trimmed.starts_with(":::") {
2369 result.push(line.to_string());
2370 i += 1;
2371 continue;
2372 }
2373
2374 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2376 result.push(line.to_string());
2377 i += 1;
2378 while i < lines.len() {
2380 result.push(lines[i].to_string());
2381 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2382 i += 1;
2383 break;
2384 }
2385 i += 1;
2386 }
2387 continue;
2388 }
2389
2390 if calculate_indentation_width_default(line) >= 4 {
2392 result.push(line.to_string());
2394 i += 1;
2395 while i < lines.len() {
2396 let next_line = lines[i];
2397 if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2399 result.push(next_line.to_string());
2400 i += 1;
2401 } else {
2402 break;
2403 }
2404 }
2405 continue;
2406 }
2407
2408 if trimmed.starts_with('>') {
2410 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2413 let quote_prefix = line[0..=gt_pos].to_string();
2414 let quote_content = &line[quote_prefix.len()..].trim_start();
2415
2416 let reflowed = reflow_line(quote_content, options);
2417 for reflowed_line in &reflowed {
2418 result.push(format!("{quote_prefix} {reflowed_line}"));
2419 }
2420 i += 1;
2421 continue;
2422 }
2423
2424 if is_horizontal_rule(trimmed) {
2426 result.push(line.to_string());
2427 i += 1;
2428 continue;
2429 }
2430
2431 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2433 let indent = line.len() - line.trim_start().len();
2435 let indent_str = " ".repeat(indent);
2436
2437 let mut marker_end = indent;
2440 let mut content_start = indent;
2441
2442 if trimmed.chars().next().is_some_and(char::is_numeric) {
2443 if let Some(period_pos) = line[indent..].find('.') {
2445 marker_end = indent + period_pos + 1; content_start = marker_end;
2447 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2451 content_start += 1;
2452 }
2453 }
2454 } else {
2455 marker_end = indent + 1; content_start = marker_end;
2458 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2462 content_start += 1;
2463 }
2464 }
2465
2466 let min_continuation_indent = content_start;
2468
2469 let rest = &line[content_start..];
2472 if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
2473 marker_end = content_start + 3; content_start += 4; }
2476
2477 let marker = &line[indent..marker_end];
2478
2479 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2482 i += 1;
2483
2484 while i < lines.len() {
2488 let next_line = lines[i];
2489 let next_trimmed = next_line.trim();
2490
2491 if is_block_boundary(next_trimmed) {
2493 break;
2494 }
2495
2496 let next_indent = next_line.len() - next_line.trim_start().len();
2498 if next_indent >= min_continuation_indent {
2499 let trimmed_start = next_line.trim_start();
2502 list_content.push(trim_preserving_hard_break(trimmed_start));
2503 i += 1;
2504 } else {
2505 break;
2507 }
2508 }
2509
2510 let combined_content = if options.preserve_breaks {
2513 list_content[0].clone()
2514 } else {
2515 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2517 if has_hard_breaks {
2518 list_content.join("\n")
2520 } else {
2521 list_content.join(" ")
2523 }
2524 };
2525
2526 let trimmed_marker = marker;
2528 let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
2529 indent + (content_start - indent).min(max_indent)
2532 } else {
2533 content_start
2534 };
2535
2536 let prefix_length = indent + trimmed_marker.len() + 1;
2538
2539 let adjusted_options = ReflowOptions {
2541 line_length: options.line_length.saturating_sub(prefix_length),
2542 ..options.clone()
2543 };
2544
2545 let reflowed = reflow_line(&combined_content, &adjusted_options);
2546 for (j, reflowed_line) in reflowed.iter().enumerate() {
2547 if j == 0 {
2548 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2549 } else {
2550 let continuation_indent = " ".repeat(continuation_spaces);
2552 result.push(format!("{continuation_indent}{reflowed_line}"));
2553 }
2554 }
2555 continue;
2556 }
2557
2558 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2560 result.push(line.to_string());
2561 i += 1;
2562 continue;
2563 }
2564
2565 if trimmed.starts_with('[') && line.contains("]:") {
2567 result.push(line.to_string());
2568 i += 1;
2569 continue;
2570 }
2571
2572 if is_definition_list_item(trimmed) {
2574 result.push(line.to_string());
2575 i += 1;
2576 continue;
2577 }
2578
2579 let mut is_single_line_paragraph = true;
2581 if i + 1 < lines.len() {
2582 let next_trimmed = lines[i + 1].trim();
2583 if !is_block_boundary(next_trimmed) {
2585 is_single_line_paragraph = false;
2586 }
2587 }
2588
2589 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2591 result.push(line.to_string());
2592 i += 1;
2593 continue;
2594 }
2595
2596 let mut paragraph_parts = Vec::new();
2598 let mut current_part = vec![line];
2599 i += 1;
2600
2601 if options.preserve_breaks {
2603 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2605 Some("\\")
2606 } else if line.ends_with(" ") {
2607 Some(" ")
2608 } else {
2609 None
2610 };
2611 let reflowed = reflow_line(line, options);
2612
2613 if let Some(break_marker) = hard_break_type {
2615 if !reflowed.is_empty() {
2616 let mut reflowed_with_break = reflowed;
2617 let last_idx = reflowed_with_break.len() - 1;
2618 if !has_hard_break(&reflowed_with_break[last_idx]) {
2619 reflowed_with_break[last_idx].push_str(break_marker);
2620 }
2621 result.extend(reflowed_with_break);
2622 }
2623 } else {
2624 result.extend(reflowed);
2625 }
2626 } else {
2627 while i < lines.len() {
2629 let prev_line = if !current_part.is_empty() {
2630 current_part.last().unwrap()
2631 } else {
2632 ""
2633 };
2634 let next_line = lines[i];
2635 let next_trimmed = next_line.trim();
2636
2637 if is_block_boundary(next_trimmed) {
2639 break;
2640 }
2641
2642 let prev_trimmed = prev_line.trim();
2645 let abbreviations = get_abbreviations(&options.abbreviations);
2646 let ends_with_sentence = (prev_trimmed.ends_with('.')
2647 || prev_trimmed.ends_with('!')
2648 || prev_trimmed.ends_with('?')
2649 || prev_trimmed.ends_with(".*")
2650 || prev_trimmed.ends_with("!*")
2651 || prev_trimmed.ends_with("?*")
2652 || prev_trimmed.ends_with("._")
2653 || prev_trimmed.ends_with("!_")
2654 || prev_trimmed.ends_with("?_")
2655 || prev_trimmed.ends_with(".\"")
2657 || prev_trimmed.ends_with("!\"")
2658 || prev_trimmed.ends_with("?\"")
2659 || prev_trimmed.ends_with(".'")
2660 || prev_trimmed.ends_with("!'")
2661 || prev_trimmed.ends_with("?'")
2662 || prev_trimmed.ends_with(".\u{201D}")
2663 || prev_trimmed.ends_with("!\u{201D}")
2664 || prev_trimmed.ends_with("?\u{201D}")
2665 || prev_trimmed.ends_with(".\u{2019}")
2666 || prev_trimmed.ends_with("!\u{2019}")
2667 || prev_trimmed.ends_with("?\u{2019}"))
2668 && !text_ends_with_abbreviation(
2669 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2670 &abbreviations,
2671 );
2672
2673 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2674 paragraph_parts.push(current_part.join(" "));
2676 current_part = vec![next_line];
2677 } else {
2678 current_part.push(next_line);
2679 }
2680 i += 1;
2681 }
2682
2683 if !current_part.is_empty() {
2685 if current_part.len() == 1 {
2686 paragraph_parts.push(current_part[0].to_string());
2688 } else {
2689 paragraph_parts.push(current_part.join(" "));
2690 }
2691 }
2692
2693 for (j, part) in paragraph_parts.iter().enumerate() {
2695 let reflowed = reflow_line(part, options);
2696 result.extend(reflowed);
2697
2698 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2702 let last_idx = result.len() - 1;
2703 if !has_hard_break(&result[last_idx]) {
2704 result[last_idx].push_str(" ");
2705 }
2706 }
2707 }
2708 }
2709 }
2710
2711 let result_text = result.join("\n");
2713 if content.ends_with('\n') && !result_text.ends_with('\n') {
2714 format!("{result_text}\n")
2715 } else {
2716 result_text
2717 }
2718}
2719
2720#[derive(Debug, Clone)]
2722pub struct ParagraphReflow {
2723 pub start_byte: usize,
2725 pub end_byte: usize,
2727 pub reflowed_text: String,
2729}
2730
2731#[derive(Debug, Clone)]
2737pub struct BlockquoteLineData {
2738 pub(crate) content: String,
2740 pub(crate) is_explicit: bool,
2742 pub(crate) prefix: Option<String>,
2744}
2745
2746impl BlockquoteLineData {
2747 pub fn explicit(content: String, prefix: String) -> Self {
2749 Self {
2750 content,
2751 is_explicit: true,
2752 prefix: Some(prefix),
2753 }
2754 }
2755
2756 pub fn lazy(content: String) -> Self {
2758 Self {
2759 content,
2760 is_explicit: false,
2761 prefix: None,
2762 }
2763 }
2764}
2765
2766#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2768pub enum BlockquoteContinuationStyle {
2769 Explicit,
2770 Lazy,
2771}
2772
2773pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2781 let mut explicit_count = 0usize;
2782 let mut lazy_count = 0usize;
2783
2784 for line in lines.iter().skip(1) {
2785 if line.is_explicit {
2786 explicit_count += 1;
2787 } else {
2788 lazy_count += 1;
2789 }
2790 }
2791
2792 if explicit_count > 0 && lazy_count == 0 {
2793 BlockquoteContinuationStyle::Explicit
2794 } else if lazy_count > 0 && explicit_count == 0 {
2795 BlockquoteContinuationStyle::Lazy
2796 } else if explicit_count >= lazy_count {
2797 BlockquoteContinuationStyle::Explicit
2798 } else {
2799 BlockquoteContinuationStyle::Lazy
2800 }
2801}
2802
2803pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2808 let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2809
2810 for (idx, line) in lines.iter().enumerate() {
2811 let Some(prefix) = line.prefix.as_ref() else {
2812 continue;
2813 };
2814 counts
2815 .entry(prefix.clone())
2816 .and_modify(|entry| entry.0 += 1)
2817 .or_insert((1, idx));
2818 }
2819
2820 counts
2821 .into_iter()
2822 .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2823 count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2824 })
2825 .map_or_else(|| fallback.to_string(), |(prefix, _)| prefix)
2826}
2827
2828pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2833 let trimmed = content_line.trim_start();
2834 trimmed.starts_with('>')
2835 || trimmed.starts_with('#')
2836 || trimmed.starts_with("```")
2837 || trimmed.starts_with("~~~")
2838 || is_unordered_list_marker(trimmed)
2839 || is_numbered_list_item(trimmed)
2840 || is_horizontal_rule(trimmed)
2841 || is_definition_list_item(trimmed)
2842 || (trimmed.starts_with('[') && trimmed.contains("]:"))
2843 || trimmed.starts_with(":::")
2844 || (trimmed.starts_with('<')
2845 && !trimmed.starts_with("<http")
2846 && !trimmed.starts_with("<https")
2847 && !trimmed.starts_with("<mailto:"))
2848}
2849
2850pub fn reflow_blockquote_content(
2859 lines: &[BlockquoteLineData],
2860 explicit_prefix: &str,
2861 continuation_style: BlockquoteContinuationStyle,
2862 options: &ReflowOptions,
2863) -> Vec<String> {
2864 let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2865 let segments = split_into_segments_strs(&content_strs);
2866 let mut reflowed_content_lines: Vec<String> = Vec::new();
2867
2868 for segment in segments {
2869 let hard_break_type = segment.last().and_then(|&line| {
2870 let line = line.strip_suffix('\r').unwrap_or(line);
2871 if line.ends_with('\\') {
2872 Some("\\")
2873 } else if line.ends_with(" ") {
2874 Some(" ")
2875 } else {
2876 None
2877 }
2878 });
2879
2880 let pieces: Vec<&str> = segment
2881 .iter()
2882 .map(|&line| {
2883 if let Some(l) = line.strip_suffix('\\') {
2884 l.trim_end()
2885 } else if let Some(l) = line.strip_suffix(" ") {
2886 l.trim_end()
2887 } else {
2888 line.trim_end()
2889 }
2890 })
2891 .collect();
2892
2893 let segment_text = pieces.join(" ");
2894 let segment_text = segment_text.trim();
2895 if segment_text.is_empty() {
2896 continue;
2897 }
2898
2899 let mut reflowed = reflow_line(segment_text, options);
2900 if let Some(break_marker) = hard_break_type
2901 && !reflowed.is_empty()
2902 {
2903 let last_idx = reflowed.len() - 1;
2904 if !has_hard_break(&reflowed[last_idx]) {
2905 reflowed[last_idx].push_str(break_marker);
2906 }
2907 }
2908 reflowed_content_lines.extend(reflowed);
2909 }
2910
2911 let mut styled_lines: Vec<String> = Vec::new();
2912 for (idx, line) in reflowed_content_lines.iter().enumerate() {
2913 let force_explicit = idx == 0
2914 || continuation_style == BlockquoteContinuationStyle::Explicit
2915 || should_force_explicit_blockquote_line(line);
2916 if force_explicit {
2917 styled_lines.push(format!("{explicit_prefix}{line}"));
2918 } else {
2919 styled_lines.push(line.clone());
2920 }
2921 }
2922
2923 styled_lines
2924}
2925
2926fn is_blockquote_content_boundary(content: &str) -> bool {
2927 let trimmed = content.trim();
2928 trimmed.is_empty()
2929 || is_block_boundary(trimmed)
2930 || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2931 || trimmed.starts_with(":::")
2932 || crate::utils::is_template_directive_only(content)
2933 || is_standalone_attr_list(content)
2934 || is_snippet_block_delimiter(content)
2935}
2936
2937fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2938 let mut segments = Vec::new();
2939 let mut current = Vec::new();
2940
2941 for &line in lines {
2942 current.push(line);
2943 if has_hard_break(line) {
2944 segments.push(current);
2945 current = Vec::new();
2946 }
2947 }
2948
2949 if !current.is_empty() {
2950 segments.push(current);
2951 }
2952
2953 segments
2954}
2955
2956fn reflow_blockquote_paragraph_at_line(
2957 content: &str,
2958 lines: &[&str],
2959 target_idx: usize,
2960 options: &ReflowOptions,
2961) -> Option<ParagraphReflow> {
2962 let mut anchor_idx = target_idx;
2963 let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2964 parsed.nesting_level
2965 } else {
2966 let mut found = None;
2967 let mut idx = target_idx;
2968 loop {
2969 if lines[idx].trim().is_empty() {
2970 break;
2971 }
2972 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2973 found = Some((idx, parsed.nesting_level));
2974 break;
2975 }
2976 if idx == 0 {
2977 break;
2978 }
2979 idx -= 1;
2980 }
2981 let (idx, level) = found?;
2982 anchor_idx = idx;
2983 level
2984 };
2985
2986 let mut para_start = anchor_idx;
2988 while para_start > 0 {
2989 let prev_idx = para_start - 1;
2990 let prev_line = lines[prev_idx];
2991
2992 if prev_line.trim().is_empty() {
2993 break;
2994 }
2995
2996 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2997 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2998 break;
2999 }
3000 para_start = prev_idx;
3001 continue;
3002 }
3003
3004 let prev_lazy = prev_line.trim_start();
3005 if is_blockquote_content_boundary(prev_lazy) {
3006 break;
3007 }
3008 para_start = prev_idx;
3009 }
3010
3011 while para_start < lines.len() {
3013 let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
3014 para_start += 1;
3015 continue;
3016 };
3017 target_level = parsed.nesting_level;
3018 break;
3019 }
3020
3021 if para_start >= lines.len() || para_start > target_idx {
3022 return None;
3023 }
3024
3025 let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
3028 let mut idx = para_start;
3029 while idx < lines.len() {
3030 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
3031 break;
3032 }
3033
3034 let line = lines[idx];
3035 if line.trim().is_empty() {
3036 break;
3037 }
3038
3039 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
3040 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
3041 break;
3042 }
3043 collected.push((
3044 idx,
3045 BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
3046 ));
3047 idx += 1;
3048 continue;
3049 }
3050
3051 let lazy_content = line.trim_start();
3052 if is_blockquote_content_boundary(lazy_content) {
3053 break;
3054 }
3055
3056 collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
3057 idx += 1;
3058 }
3059
3060 if collected.is_empty() {
3061 return None;
3062 }
3063
3064 let para_end = collected[collected.len() - 1].0;
3065 if target_idx < para_start || target_idx > para_end {
3066 return None;
3067 }
3068
3069 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
3070
3071 let fallback_prefix = line_data
3072 .iter()
3073 .find_map(|d| d.prefix.clone())
3074 .unwrap_or_else(|| "> ".to_string());
3075 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
3076 let continuation_style = blockquote_continuation_style(&line_data);
3077
3078 let adjusted_line_length = options
3079 .line_length
3080 .saturating_sub(display_len(&explicit_prefix, options.length_mode))
3081 .max(1);
3082
3083 let adjusted_options = ReflowOptions {
3084 line_length: adjusted_line_length,
3085 ..options.clone()
3086 };
3087
3088 let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
3089
3090 if styled_lines.is_empty() {
3091 return None;
3092 }
3093
3094 let mut start_byte = 0;
3096 for line in lines.iter().take(para_start) {
3097 start_byte += line.len() + 1;
3098 }
3099
3100 let mut end_byte = start_byte;
3101 for line in lines.iter().take(para_end + 1).skip(para_start) {
3102 end_byte += line.len() + 1;
3103 }
3104
3105 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3106 if !includes_trailing_newline {
3107 end_byte -= 1;
3108 }
3109
3110 let reflowed_joined = styled_lines.join("\n");
3111 let reflowed_text = if includes_trailing_newline {
3112 if reflowed_joined.ends_with('\n') {
3113 reflowed_joined
3114 } else {
3115 format!("{reflowed_joined}\n")
3116 }
3117 } else if reflowed_joined.ends_with('\n') {
3118 reflowed_joined.trim_end_matches('\n').to_string()
3119 } else {
3120 reflowed_joined
3121 };
3122
3123 Some(ParagraphReflow {
3124 start_byte,
3125 end_byte,
3126 reflowed_text,
3127 })
3128}
3129
3130pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
3148 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
3149}
3150
3151pub fn reflow_paragraph_at_line_with_mode(
3153 content: &str,
3154 line_number: usize,
3155 line_length: usize,
3156 length_mode: ReflowLengthMode,
3157) -> Option<ParagraphReflow> {
3158 let options = ReflowOptions {
3159 line_length,
3160 length_mode,
3161 ..Default::default()
3162 };
3163 reflow_paragraph_at_line_with_options(content, line_number, &options)
3164}
3165
3166pub fn reflow_paragraph_at_line_with_options(
3177 content: &str,
3178 line_number: usize,
3179 options: &ReflowOptions,
3180) -> Option<ParagraphReflow> {
3181 if line_number == 0 {
3182 return None;
3183 }
3184
3185 let lines: Vec<&str> = content.lines().collect();
3186
3187 if line_number > lines.len() {
3189 return None;
3190 }
3191
3192 let target_idx = line_number - 1; let target_line = lines[target_idx];
3194 let trimmed = target_line.trim();
3195
3196 if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
3199 return Some(blockquote_reflow);
3200 }
3201
3202 if is_paragraph_boundary(trimmed, target_line) {
3204 return None;
3205 }
3206
3207 let mut para_start = target_idx;
3209 while para_start > 0 {
3210 let prev_idx = para_start - 1;
3211 let prev_line = lines[prev_idx];
3212 let prev_trimmed = prev_line.trim();
3213
3214 if is_paragraph_boundary(prev_trimmed, prev_line) {
3216 break;
3217 }
3218
3219 para_start = prev_idx;
3220 }
3221
3222 let mut para_end = target_idx;
3224 while para_end + 1 < lines.len() {
3225 let next_idx = para_end + 1;
3226 let next_line = lines[next_idx];
3227 let next_trimmed = next_line.trim();
3228
3229 if is_paragraph_boundary(next_trimmed, next_line) {
3231 break;
3232 }
3233
3234 para_end = next_idx;
3235 }
3236
3237 let paragraph_lines = &lines[para_start..=para_end];
3239
3240 let mut start_byte = 0;
3242 for line in lines.iter().take(para_start) {
3243 start_byte += line.len() + 1; }
3245
3246 let mut end_byte = start_byte;
3247 for line in paragraph_lines {
3248 end_byte += line.len() + 1; }
3250
3251 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3254
3255 if !includes_trailing_newline {
3257 end_byte -= 1;
3258 }
3259
3260 let paragraph_text = paragraph_lines.join("\n");
3262
3263 let reflowed = reflow_markdown(¶graph_text, options);
3265
3266 let reflowed_text = if includes_trailing_newline {
3270 if reflowed.ends_with('\n') {
3272 reflowed
3273 } else {
3274 format!("{reflowed}\n")
3275 }
3276 } else {
3277 if reflowed.ends_with('\n') {
3279 reflowed.trim_end_matches('\n').to_string()
3280 } else {
3281 reflowed
3282 }
3283 };
3284
3285 Some(ParagraphReflow {
3286 start_byte,
3287 end_byte,
3288 reflowed_text,
3289 })
3290}
3291
3292#[cfg(test)]
3293mod tests {
3294 use super::*;
3295
3296 #[test]
3301 fn test_helper_function_text_ends_with_abbreviation() {
3302 let abbreviations = get_abbreviations(&None);
3304
3305 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
3307 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
3308 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
3309 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
3310 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
3311 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
3312 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
3313 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
3314
3315 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
3317 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
3318 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
3319 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
3320 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
3321 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
3327
3328 #[test]
3329 fn test_is_unordered_list_marker() {
3330 assert!(is_unordered_list_marker("- item"));
3332 assert!(is_unordered_list_marker("* item"));
3333 assert!(is_unordered_list_marker("+ item"));
3334 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
3336 assert!(is_unordered_list_marker("+"));
3337
3338 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
3349
3350 #[test]
3351 fn test_is_block_boundary() {
3352 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
3374 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
3377 }
3378
3379 #[test]
3380 fn test_definition_list_boundary_in_single_line_paragraph() {
3381 let options = ReflowOptions {
3384 line_length: 80,
3385 ..Default::default()
3386 };
3387 let input = "Term\n: Definition of the term";
3388 let result = reflow_markdown(input, &options);
3389 assert!(
3391 result.contains(": Definition"),
3392 "Definition list item should not be merged into previous line. Got: {result:?}"
3393 );
3394 let lines: Vec<&str> = result.lines().collect();
3395 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3396 assert_eq!(lines[0], "Term");
3397 assert_eq!(lines[1], ": Definition of the term");
3398 }
3399
3400 #[test]
3401 fn test_is_paragraph_boundary() {
3402 assert!(is_paragraph_boundary("# Heading", "# Heading"));
3404 assert!(is_paragraph_boundary("- item", "- item"));
3405 assert!(is_paragraph_boundary(":::", ":::"));
3406 assert!(is_paragraph_boundary(": definition", ": definition"));
3407
3408 assert!(is_paragraph_boundary("code", " code"));
3410 assert!(is_paragraph_boundary("code", "\tcode"));
3411
3412 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3414 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
3418 assert!(!is_paragraph_boundary("text", " text")); }
3420
3421 #[test]
3422 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3423 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3426 let result = reflow_paragraph_at_line(content, 3, 80);
3428 assert!(result.is_none(), "Div marker line should not be reflowed");
3429 }
3430}