1use crate::utils::calculate_indentation_width_default;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
13 LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
14 SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18 text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27 Chars,
29 #[default]
31 Visual,
32 Bytes,
34}
35
36fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38 match mode {
39 ReflowLengthMode::Chars => s.chars().count(),
40 ReflowLengthMode::Visual => s.width(),
41 ReflowLengthMode::Bytes => s.len(),
42 }
43}
44
45#[derive(Clone)]
47pub struct ReflowOptions {
48 pub line_length: usize,
50 pub break_on_sentences: bool,
52 pub preserve_breaks: bool,
54 pub sentence_per_line: bool,
56 pub semantic_line_breaks: bool,
58 pub abbreviations: Option<Vec<String>>,
62 pub length_mode: ReflowLengthMode,
64 pub attr_lists: bool,
67 pub require_sentence_capital: bool,
72 pub max_list_continuation_indent: Option<usize>,
76}
77
78impl Default for ReflowOptions {
79 fn default() -> Self {
80 Self {
81 line_length: 80,
82 break_on_sentences: true,
83 preserve_breaks: false,
84 sentence_per_line: false,
85 semantic_line_breaks: false,
86 abbreviations: None,
87 length_mode: ReflowLengthMode::default(),
88 attr_lists: false,
89 require_sentence_capital: true,
90 max_list_continuation_indent: None,
91 }
92 }
93}
94
95fn compute_inline_code_mask(text: &str) -> Vec<bool> {
98 let chars: Vec<char> = text.chars().collect();
99 let len = chars.len();
100 let mut mask = vec![false; len];
101 let mut i = 0;
102
103 while i < len {
104 if chars[i] == '`' {
105 let open_start = i;
107 let mut backtick_count = 0;
108 while i < len && chars[i] == '`' {
109 backtick_count += 1;
110 i += 1;
111 }
112
113 let mut found_close = false;
115 let content_start = i;
116 while i < len {
117 if chars[i] == '`' {
118 let close_start = i;
119 let mut close_count = 0;
120 while i < len && chars[i] == '`' {
121 close_count += 1;
122 i += 1;
123 }
124 if close_count == backtick_count {
125 for item in mask.iter_mut().take(close_start).skip(content_start) {
127 *item = true;
128 }
129 for item in mask.iter_mut().take(content_start).skip(open_start) {
131 *item = true;
132 }
133 for item in mask.iter_mut().take(i).skip(close_start) {
134 *item = true;
135 }
136 found_close = true;
137 break;
138 }
139 } else {
140 i += 1;
141 }
142 }
143
144 if !found_close {
145 i = open_start + backtick_count;
147 }
148 } else {
149 i += 1;
150 }
151 }
152
153 mask
154}
155
156fn is_sentence_boundary(
160 text: &str,
161 pos: usize,
162 abbreviations: &HashSet<String>,
163 require_sentence_capital: bool,
164) -> bool {
165 let chars: Vec<char> = text.chars().collect();
166
167 if pos + 1 >= chars.len() {
168 return false;
169 }
170
171 let c = chars[pos];
172 let next_char = chars[pos + 1];
173
174 if is_cjk_sentence_ending(c) {
177 let mut after_punct_pos = pos + 1;
179 while after_punct_pos < chars.len()
180 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
181 {
182 after_punct_pos += 1;
183 }
184
185 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
187 after_punct_pos += 1;
188 }
189
190 if after_punct_pos >= chars.len() {
192 return false;
193 }
194
195 while after_punct_pos < chars.len()
197 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
198 {
199 after_punct_pos += 1;
200 }
201
202 if after_punct_pos >= chars.len() {
203 return false;
204 }
205
206 return true;
209 }
210
211 if c != '.' && c != '!' && c != '?' {
213 return false;
214 }
215
216 let (_space_pos, after_space_pos) = if next_char == ' ' {
218 (pos + 1, pos + 2)
220 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
221 if chars[pos + 2] == ' ' {
223 (pos + 2, pos + 3)
225 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
226 (pos + 3, pos + 4)
228 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
229 && pos + 4 < chars.len()
230 && chars[pos + 3] == chars[pos + 2]
231 && chars[pos + 4] == ' '
232 {
233 (pos + 4, pos + 5)
235 } else {
236 return false;
237 }
238 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
239 (pos + 2, pos + 3)
241 } else if (next_char == '*' || next_char == '_')
242 && pos + 3 < chars.len()
243 && chars[pos + 2] == next_char
244 && chars[pos + 3] == ' '
245 {
246 (pos + 3, pos + 4)
248 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
249 (pos + 3, pos + 4)
251 } else {
252 return false;
253 };
254
255 let mut next_char_pos = after_space_pos;
257 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
258 next_char_pos += 1;
259 }
260
261 if next_char_pos >= chars.len() {
263 return false;
264 }
265
266 let mut first_letter_pos = next_char_pos;
268 while first_letter_pos < chars.len()
269 && (chars[first_letter_pos] == '*'
270 || chars[first_letter_pos] == '_'
271 || chars[first_letter_pos] == '~'
272 || is_opening_quote(chars[first_letter_pos]))
273 {
274 first_letter_pos += 1;
275 }
276
277 if first_letter_pos >= chars.len() {
279 return false;
280 }
281
282 let first_char = chars[first_letter_pos];
283
284 if c == '!' || c == '?' {
286 return true;
287 }
288
289 if pos > 0 {
293 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
295 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
296 return false;
297 }
298
299 if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
301 return false;
302 }
303
304 if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
308 return false;
309 }
310 }
311
312 if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
315 return false;
316 }
317
318 true
319}
320
321pub fn split_into_sentences(text: &str) -> Vec<String> {
323 split_into_sentences_custom(text, &None)
324}
325
326pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
328 let abbreviations = get_abbreviations(custom_abbreviations);
329 split_into_sentences_with_set(text, &abbreviations, true)
330}
331
332fn split_into_sentences_with_set(
335 text: &str,
336 abbreviations: &HashSet<String>,
337 require_sentence_capital: bool,
338) -> Vec<String> {
339 let in_code = compute_inline_code_mask(text);
341
342 let mut sentences = Vec::new();
343 let mut current_sentence = String::new();
344 let mut chars = text.chars().peekable();
345 let mut pos = 0;
346
347 while let Some(c) = chars.next() {
348 current_sentence.push(c);
349
350 if !in_code[pos] && is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
351 while let Some(&next) = chars.peek() {
353 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
354 current_sentence.push(chars.next().unwrap());
355 pos += 1;
356 } else {
357 break;
358 }
359 }
360
361 if chars.peek() == Some(&' ') {
363 chars.next();
364 pos += 1;
365 }
366
367 sentences.push(current_sentence.trim().to_string());
368 current_sentence.clear();
369 }
370
371 pos += 1;
372 }
373
374 if !current_sentence.trim().is_empty() {
376 sentences.push(current_sentence.trim().to_string());
377 }
378 sentences
379}
380
381fn is_horizontal_rule(line: &str) -> bool {
383 if line.len() < 3 {
384 return false;
385 }
386
387 let chars: Vec<char> = line.chars().collect();
389 if chars.is_empty() {
390 return false;
391 }
392
393 let first_char = chars[0];
394 if first_char != '-' && first_char != '_' && first_char != '*' {
395 return false;
396 }
397
398 for c in &chars {
400 if *c != first_char && *c != ' ' {
401 return false;
402 }
403 }
404
405 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
407 non_space_count >= 3
408}
409
410fn is_numbered_list_item(line: &str) -> bool {
412 let mut chars = line.chars();
413
414 if !chars.next().is_some_and(char::is_numeric) {
416 return false;
417 }
418
419 while let Some(c) = chars.next() {
421 if c == '.' {
422 return chars.next() == Some(' ');
425 }
426 if !c.is_numeric() {
427 return false;
428 }
429 }
430
431 false
432}
433
434fn is_unordered_list_marker(s: &str) -> bool {
436 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
437 && !is_horizontal_rule(s)
438 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
439}
440
441fn is_block_boundary_core(trimmed: &str) -> bool {
444 trimmed.is_empty()
445 || trimmed.starts_with('#')
446 || trimmed.starts_with("```")
447 || trimmed.starts_with("~~~")
448 || trimmed.starts_with('>')
449 || (trimmed.starts_with('[') && trimmed.contains("]:"))
450 || is_horizontal_rule(trimmed)
451 || is_unordered_list_marker(trimmed)
452 || is_numbered_list_item(trimmed)
453 || is_definition_list_item(trimmed)
454 || trimmed.starts_with(":::")
455}
456
457fn is_block_boundary(trimmed: &str) -> bool {
460 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
461}
462
463fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
467 is_block_boundary_core(trimmed)
468 || calculate_indentation_width_default(line) >= 4
469 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
470}
471
472fn has_hard_break(line: &str) -> bool {
478 let line = line.strip_suffix('\r').unwrap_or(line);
479 line.ends_with(" ") || line.ends_with('\\')
480}
481
482fn ends_with_sentence_punct(text: &str) -> bool {
484 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
485}
486
487fn trim_preserving_hard_break(s: &str) -> String {
493 let s = s.strip_suffix('\r').unwrap_or(s);
495
496 if s.ends_with('\\') {
498 return s.to_string();
500 }
501
502 if s.ends_with(" ") {
504 let content_end = s.trim_end().len();
506 if content_end == 0 {
507 return String::new();
509 }
510 format!("{} ", &s[..content_end])
512 } else {
513 s.trim_end().to_string()
515 }
516}
517
518fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
520 if options.attr_lists {
521 parse_markdown_elements_with_attr_lists(text)
522 } else {
523 parse_markdown_elements(text)
524 }
525}
526
527pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
528 if options.sentence_per_line {
530 let elements = parse_elements(line, options);
531 return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
532 }
533
534 if options.semantic_line_breaks {
536 let elements = parse_elements(line, options);
537 return reflow_elements_semantic(&elements, options);
538 }
539
540 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
543 return vec![line.to_string()];
544 }
545
546 let elements = parse_elements(line, options);
548
549 reflow_elements(&elements, options)
551}
552
553#[derive(Debug, Clone)]
555enum LinkedImageSource {
556 Inline(String),
558 Reference(String),
560}
561
562#[derive(Debug, Clone)]
564enum LinkedImageTarget {
565 Inline(String),
567 Reference(String),
569}
570
571#[derive(Debug, Clone)]
573enum Element {
574 Text(String),
576 Link { text: String, url: String },
578 ReferenceLink { text: String, reference: String },
580 EmptyReferenceLink { text: String },
582 ShortcutReference { reference: String },
584 InlineImage { alt: String, url: String },
586 ReferenceImage { alt: String, reference: String },
588 EmptyReferenceImage { alt: String },
590 LinkedImage {
596 alt: String,
597 img_source: LinkedImageSource,
598 link_target: LinkedImageTarget,
599 },
600 FootnoteReference { note: String },
602 Strikethrough(String),
604 WikiLink(String),
606 InlineMath(String),
608 DisplayMath(String),
610 EmojiShortcode(String),
612 Autolink(String),
614 HtmlTag(String),
616 HtmlEntity(String),
618 HugoShortcode(String),
620 AttrList(String),
622 Code(String),
624 Bold {
626 content: String,
627 underscore: bool,
629 },
630 Italic {
632 content: String,
633 underscore: bool,
635 },
636}
637
638impl std::fmt::Display for Element {
639 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
640 match self {
641 Element::Text(s) => write!(f, "{s}"),
642 Element::Link { text, url } => write!(f, "[{text}]({url})"),
643 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
644 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
645 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
646 Element::InlineImage { alt, url } => write!(f, ""),
647 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
648 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
649 Element::LinkedImage {
650 alt,
651 img_source,
652 link_target,
653 } => {
654 let img_part = match img_source {
656 LinkedImageSource::Inline(url) => format!(""),
657 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
658 };
659 match link_target {
661 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
662 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
663 }
664 }
665 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
666 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
667 Element::WikiLink(s) => write!(f, "[[{s}]]"),
668 Element::InlineMath(s) => write!(f, "${s}$"),
669 Element::DisplayMath(s) => write!(f, "$${s}$$"),
670 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
671 Element::Autolink(s) => write!(f, "{s}"),
672 Element::HtmlTag(s) => write!(f, "{s}"),
673 Element::HtmlEntity(s) => write!(f, "{s}"),
674 Element::HugoShortcode(s) => write!(f, "{s}"),
675 Element::AttrList(s) => write!(f, "{s}"),
676 Element::Code(s) => write!(f, "`{s}`"),
677 Element::Bold { content, underscore } => {
678 if *underscore {
679 write!(f, "__{content}__")
680 } else {
681 write!(f, "**{content}**")
682 }
683 }
684 Element::Italic { content, underscore } => {
685 if *underscore {
686 write!(f, "_{content}_")
687 } else {
688 write!(f, "*{content}*")
689 }
690 }
691 }
692 }
693}
694
695impl Element {
696 fn display_width(&self, mode: ReflowLengthMode) -> usize {
700 let formatted = format!("{self}");
701 display_len(&formatted, mode)
702 }
703}
704
705#[derive(Debug, Clone)]
707struct EmphasisSpan {
708 start: usize,
710 end: usize,
712 content: String,
714 is_strong: bool,
716 is_strikethrough: bool,
718 uses_underscore: bool,
720}
721
722fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
732 let mut spans = Vec::new();
733 let mut options = Options::empty();
734 options.insert(Options::ENABLE_STRIKETHROUGH);
735
736 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
739 let mut strikethrough_stack: Vec<usize> = Vec::new();
740
741 let parser = Parser::new_ext(text, options).into_offset_iter();
742
743 for (event, range) in parser {
744 match event {
745 Event::Start(Tag::Emphasis) => {
746 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
748 emphasis_stack.push((range.start, uses_underscore));
749 }
750 Event::End(TagEnd::Emphasis) => {
751 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
752 let content_start = start_byte + 1;
754 let content_end = range.end - 1;
755 if content_end > content_start
756 && let Some(content) = text.get(content_start..content_end)
757 {
758 spans.push(EmphasisSpan {
759 start: start_byte,
760 end: range.end,
761 content: content.to_string(),
762 is_strong: false,
763 is_strikethrough: false,
764 uses_underscore,
765 });
766 }
767 }
768 }
769 Event::Start(Tag::Strong) => {
770 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
772 strong_stack.push((range.start, uses_underscore));
773 }
774 Event::End(TagEnd::Strong) => {
775 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
776 let content_start = start_byte + 2;
778 let content_end = range.end - 2;
779 if content_end > content_start
780 && let Some(content) = text.get(content_start..content_end)
781 {
782 spans.push(EmphasisSpan {
783 start: start_byte,
784 end: range.end,
785 content: content.to_string(),
786 is_strong: true,
787 is_strikethrough: false,
788 uses_underscore,
789 });
790 }
791 }
792 }
793 Event::Start(Tag::Strikethrough) => {
794 strikethrough_stack.push(range.start);
795 }
796 Event::End(TagEnd::Strikethrough) => {
797 if let Some(start_byte) = strikethrough_stack.pop() {
798 let content_start = start_byte + 2;
800 let content_end = range.end - 2;
801 if content_end > content_start
802 && let Some(content) = text.get(content_start..content_end)
803 {
804 spans.push(EmphasisSpan {
805 start: start_byte,
806 end: range.end,
807 content: content.to_string(),
808 is_strong: false,
809 is_strikethrough: true,
810 uses_underscore: false,
811 });
812 }
813 }
814 }
815 _ => {}
816 }
817 }
818
819 spans.sort_by_key(|s| s.start);
821 spans
822}
823
824fn parse_markdown_elements(text: &str) -> Vec<Element> {
835 parse_markdown_elements_inner(text, false)
836}
837
838fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
839 parse_markdown_elements_inner(text, true)
840}
841
842fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
843 let mut elements = Vec::new();
844 let mut remaining = text;
845
846 let emphasis_spans = extract_emphasis_spans(text);
848
849 while !remaining.is_empty() {
850 let current_offset = text.len() - remaining.len();
852 let mut earliest_match: Option<(usize, usize, &str)> = None;
855
856 if remaining.contains("[!") {
860 if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
862 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
863 {
864 earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
865 }
866
867 if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
869 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
870 {
871 earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
872 }
873
874 if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
876 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
877 {
878 earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
879 }
880
881 if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
883 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
884 {
885 earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
886 }
887 }
888
889 if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
892 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
893 {
894 earliest_match = Some((m.start(), m.end(), "inline_image"));
895 }
896
897 if let Some(m) = REF_IMAGE_REGEX.find(remaining)
899 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
900 {
901 earliest_match = Some((m.start(), m.end(), "ref_image"));
902 }
903
904 if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
906 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
907 {
908 earliest_match = Some((m.start(), m.end(), "footnote_ref"));
909 }
910
911 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
913 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
914 {
915 earliest_match = Some((m.start(), m.end(), "inline_link"));
916 }
917
918 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
920 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
921 {
922 earliest_match = Some((m.start(), m.end(), "ref_link"));
923 }
924
925 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
928 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
929 {
930 earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
931 }
932
933 if let Some(m) = WIKI_LINK_REGEX.find(remaining)
935 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
936 {
937 earliest_match = Some((m.start(), m.end(), "wiki_link"));
938 }
939
940 if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
942 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
943 {
944 earliest_match = Some((m.start(), m.end(), "display_math"));
945 }
946
947 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
949 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
950 {
951 earliest_match = Some((m.start(), m.end(), "inline_math"));
952 }
953
954 if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
958 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
959 {
960 earliest_match = Some((m.start(), m.end(), "emoji"));
961 }
962
963 if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
965 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
966 {
967 earliest_match = Some((m.start(), m.end(), "html_entity"));
968 }
969
970 if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
973 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
974 {
975 earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
976 }
977
978 if let Some(m) = HTML_TAG_PATTERN.find(remaining)
981 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
982 {
983 let matched_text = &remaining[m.start()..m.end()];
985 let is_url_autolink = matched_text.starts_with("<http://")
986 || matched_text.starts_with("<https://")
987 || matched_text.starts_with("<mailto:")
988 || matched_text.starts_with("<ftp://")
989 || matched_text.starts_with("<ftps://");
990
991 let is_email_autolink = {
994 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
995 EMAIL_PATTERN.is_match(content)
996 };
997
998 if is_url_autolink || is_email_autolink {
999 earliest_match = Some((m.start(), m.end(), "autolink"));
1000 } else {
1001 earliest_match = Some((m.start(), m.end(), "html_tag"));
1002 }
1003 }
1004
1005 let mut next_special = remaining.len();
1007 let mut special_type = "";
1008 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
1009 let mut attr_list_len: usize = 0;
1010
1011 if let Some(pos) = remaining.find('`')
1013 && pos < next_special
1014 {
1015 next_special = pos;
1016 special_type = "code";
1017 }
1018
1019 if attr_lists
1021 && let Some(pos) = remaining.find('{')
1022 && pos < next_special
1023 && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
1024 && m.start() == 0
1025 {
1026 next_special = pos;
1027 special_type = "attr_list";
1028 attr_list_len = m.end();
1029 }
1030
1031 for span in &emphasis_spans {
1034 if span.start >= current_offset && span.start < current_offset + remaining.len() {
1035 let pos_in_remaining = span.start - current_offset;
1036 if pos_in_remaining < next_special {
1037 next_special = pos_in_remaining;
1038 special_type = "pulldown_emphasis";
1039 pulldown_emphasis = Some(span);
1040 }
1041 break; }
1043 }
1044
1045 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
1047 pos < next_special
1048 } else {
1049 false
1050 };
1051
1052 if should_process_markdown_link {
1053 let (pos, match_end, pattern_type) = earliest_match.unwrap();
1054
1055 if pos > 0 {
1057 elements.push(Element::Text(remaining[..pos].to_string()));
1058 }
1059
1060 match pattern_type {
1062 "linked_image_ii" => {
1064 if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
1065 let alt = caps.get(1).map_or("", |m| m.as_str());
1066 let img_url = caps.get(2).map_or("", |m| m.as_str());
1067 let link_url = caps.get(3).map_or("", |m| m.as_str());
1068 elements.push(Element::LinkedImage {
1069 alt: alt.to_string(),
1070 img_source: LinkedImageSource::Inline(img_url.to_string()),
1071 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1072 });
1073 remaining = &remaining[match_end..];
1074 } else {
1075 elements.push(Element::Text("[".to_string()));
1076 remaining = &remaining[1..];
1077 }
1078 }
1079 "linked_image_ri" => {
1081 if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
1082 let alt = caps.get(1).map_or("", |m| m.as_str());
1083 let img_ref = caps.get(2).map_or("", |m| m.as_str());
1084 let link_url = caps.get(3).map_or("", |m| m.as_str());
1085 elements.push(Element::LinkedImage {
1086 alt: alt.to_string(),
1087 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1088 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1089 });
1090 remaining = &remaining[match_end..];
1091 } else {
1092 elements.push(Element::Text("[".to_string()));
1093 remaining = &remaining[1..];
1094 }
1095 }
1096 "linked_image_ir" => {
1098 if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1099 let alt = caps.get(1).map_or("", |m| m.as_str());
1100 let img_url = caps.get(2).map_or("", |m| m.as_str());
1101 let link_ref = caps.get(3).map_or("", |m| m.as_str());
1102 elements.push(Element::LinkedImage {
1103 alt: alt.to_string(),
1104 img_source: LinkedImageSource::Inline(img_url.to_string()),
1105 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1106 });
1107 remaining = &remaining[match_end..];
1108 } else {
1109 elements.push(Element::Text("[".to_string()));
1110 remaining = &remaining[1..];
1111 }
1112 }
1113 "linked_image_rr" => {
1115 if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
1116 let alt = caps.get(1).map_or("", |m| m.as_str());
1117 let img_ref = caps.get(2).map_or("", |m| m.as_str());
1118 let link_ref = caps.get(3).map_or("", |m| m.as_str());
1119 elements.push(Element::LinkedImage {
1120 alt: alt.to_string(),
1121 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1122 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1123 });
1124 remaining = &remaining[match_end..];
1125 } else {
1126 elements.push(Element::Text("[".to_string()));
1127 remaining = &remaining[1..];
1128 }
1129 }
1130 "inline_image" => {
1131 if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
1132 let alt = caps.get(1).map_or("", |m| m.as_str());
1133 let url = caps.get(2).map_or("", |m| m.as_str());
1134 elements.push(Element::InlineImage {
1135 alt: alt.to_string(),
1136 url: url.to_string(),
1137 });
1138 remaining = &remaining[match_end..];
1139 } else {
1140 elements.push(Element::Text("!".to_string()));
1141 remaining = &remaining[1..];
1142 }
1143 }
1144 "ref_image" => {
1145 if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
1146 let alt = caps.get(1).map_or("", |m| m.as_str());
1147 let reference = caps.get(2).map_or("", |m| m.as_str());
1148
1149 if reference.is_empty() {
1150 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1151 } else {
1152 elements.push(Element::ReferenceImage {
1153 alt: alt.to_string(),
1154 reference: reference.to_string(),
1155 });
1156 }
1157 remaining = &remaining[match_end..];
1158 } else {
1159 elements.push(Element::Text("!".to_string()));
1160 remaining = &remaining[1..];
1161 }
1162 }
1163 "footnote_ref" => {
1164 if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
1165 let note = caps.get(1).map_or("", |m| m.as_str());
1166 elements.push(Element::FootnoteReference { note: note.to_string() });
1167 remaining = &remaining[match_end..];
1168 } else {
1169 elements.push(Element::Text("[".to_string()));
1170 remaining = &remaining[1..];
1171 }
1172 }
1173 "inline_link" => {
1174 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1175 let text = caps.get(1).map_or("", |m| m.as_str());
1176 let url = caps.get(2).map_or("", |m| m.as_str());
1177 elements.push(Element::Link {
1178 text: text.to_string(),
1179 url: url.to_string(),
1180 });
1181 remaining = &remaining[match_end..];
1182 } else {
1183 elements.push(Element::Text("[".to_string()));
1185 remaining = &remaining[1..];
1186 }
1187 }
1188 "ref_link" => {
1189 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1190 let text = caps.get(1).map_or("", |m| m.as_str());
1191 let reference = caps.get(2).map_or("", |m| m.as_str());
1192
1193 if reference.is_empty() {
1194 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1196 } else {
1197 elements.push(Element::ReferenceLink {
1199 text: text.to_string(),
1200 reference: reference.to_string(),
1201 });
1202 }
1203 remaining = &remaining[match_end..];
1204 } else {
1205 elements.push(Element::Text("[".to_string()));
1207 remaining = &remaining[1..];
1208 }
1209 }
1210 "shortcut_ref" => {
1211 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1212 let reference = caps.get(1).map_or("", |m| m.as_str());
1213 elements.push(Element::ShortcutReference {
1214 reference: reference.to_string(),
1215 });
1216 remaining = &remaining[match_end..];
1217 } else {
1218 elements.push(Element::Text("[".to_string()));
1220 remaining = &remaining[1..];
1221 }
1222 }
1223 "wiki_link" => {
1224 if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
1225 let content = caps.get(1).map_or("", |m| m.as_str());
1226 elements.push(Element::WikiLink(content.to_string()));
1227 remaining = &remaining[match_end..];
1228 } else {
1229 elements.push(Element::Text("[[".to_string()));
1230 remaining = &remaining[2..];
1231 }
1232 }
1233 "display_math" => {
1234 if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
1235 let math = caps.get(1).map_or("", |m| m.as_str());
1236 elements.push(Element::DisplayMath(math.to_string()));
1237 remaining = &remaining[match_end..];
1238 } else {
1239 elements.push(Element::Text("$$".to_string()));
1240 remaining = &remaining[2..];
1241 }
1242 }
1243 "inline_math" => {
1244 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1245 let math = caps.get(1).map_or("", |m| m.as_str());
1246 elements.push(Element::InlineMath(math.to_string()));
1247 remaining = &remaining[match_end..];
1248 } else {
1249 elements.push(Element::Text("$".to_string()));
1250 remaining = &remaining[1..];
1251 }
1252 }
1253 "emoji" => {
1255 if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1256 let emoji = caps.get(1).map_or("", |m| m.as_str());
1257 elements.push(Element::EmojiShortcode(emoji.to_string()));
1258 remaining = &remaining[match_end..];
1259 } else {
1260 elements.push(Element::Text(":".to_string()));
1261 remaining = &remaining[1..];
1262 }
1263 }
1264 "html_entity" => {
1265 elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
1267 remaining = &remaining[match_end..];
1268 }
1269 "hugo_shortcode" => {
1270 elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
1272 remaining = &remaining[match_end..];
1273 }
1274 "autolink" => {
1275 elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
1277 remaining = &remaining[match_end..];
1278 }
1279 "html_tag" => {
1280 elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
1282 remaining = &remaining[match_end..];
1283 }
1284 _ => {
1285 elements.push(Element::Text("[".to_string()));
1287 remaining = &remaining[1..];
1288 }
1289 }
1290 } else {
1291 if next_special > 0 && next_special < remaining.len() {
1295 elements.push(Element::Text(remaining[..next_special].to_string()));
1296 remaining = &remaining[next_special..];
1297 }
1298
1299 match special_type {
1301 "code" => {
1302 if let Some(code_end) = remaining[1..].find('`') {
1304 let code = &remaining[1..=code_end];
1305 elements.push(Element::Code(code.to_string()));
1306 remaining = &remaining[1 + code_end + 1..];
1307 } else {
1308 elements.push(Element::Text(remaining.to_string()));
1310 break;
1311 }
1312 }
1313 "attr_list" => {
1314 elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1315 remaining = &remaining[attr_list_len..];
1316 }
1317 "pulldown_emphasis" => {
1318 if let Some(span) = pulldown_emphasis {
1320 let span_len = span.end - span.start;
1321 if span.is_strikethrough {
1322 elements.push(Element::Strikethrough(span.content.clone()));
1323 } else if span.is_strong {
1324 elements.push(Element::Bold {
1325 content: span.content.clone(),
1326 underscore: span.uses_underscore,
1327 });
1328 } else {
1329 elements.push(Element::Italic {
1330 content: span.content.clone(),
1331 underscore: span.uses_underscore,
1332 });
1333 }
1334 remaining = &remaining[span_len..];
1335 } else {
1336 elements.push(Element::Text(remaining[..1].to_string()));
1338 remaining = &remaining[1..];
1339 }
1340 }
1341 _ => {
1342 elements.push(Element::Text(remaining.to_string()));
1344 break;
1345 }
1346 }
1347 }
1348 }
1349
1350 elements
1351}
1352
1353fn should_insert_space_before_join(current: &str) -> bool {
1354 !current.is_empty()
1355 && !current.ends_with(' ')
1356 && !current.ends_with('(')
1357 && !current.ends_with('[')
1358 && !current.ends_with('-')
1359}
1360
1361fn reflow_elements_sentence_per_line(
1363 elements: &[Element],
1364 custom_abbreviations: &Option<Vec<String>>,
1365 require_sentence_capital: bool,
1366) -> Vec<String> {
1367 let abbreviations = get_abbreviations(custom_abbreviations);
1368 let mut lines = Vec::new();
1369 let mut current_line = String::new();
1370
1371 for (idx, element) in elements.iter().enumerate() {
1372 let element_str = format!("{element}");
1373
1374 if let Element::Text(text) = element {
1376 let combined = format!("{current_line}{text}");
1378 let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
1380
1381 if sentences.len() > 1 {
1382 for (i, sentence) in sentences.iter().enumerate() {
1384 if i == 0 {
1385 let trimmed = sentence.trim();
1388
1389 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1390 current_line.clone_from(sentence);
1392 } else {
1393 lines.push(sentence.clone());
1395 current_line.clear();
1396 }
1397 } else if i == sentences.len() - 1 {
1398 let trimmed = sentence.trim();
1400 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1401
1402 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1403 lines.push(sentence.clone());
1405 current_line.clear();
1406 } else {
1407 current_line.clone_from(sentence);
1409 }
1410 } else {
1411 lines.push(sentence.clone());
1413 }
1414 }
1415 } else {
1416 let trimmed = combined.trim();
1418
1419 if trimmed.is_empty() {
1423 continue;
1424 }
1425
1426 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1427
1428 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1429 lines.push(trimmed.to_string());
1431 current_line.clear();
1432 } else {
1433 current_line = combined;
1435 }
1436 }
1437 } else if let Element::Italic { content, underscore } = element {
1438 let marker = if *underscore { "_" } else { "*" };
1440 handle_emphasis_sentence_split(
1441 content,
1442 marker,
1443 &abbreviations,
1444 require_sentence_capital,
1445 &mut current_line,
1446 &mut lines,
1447 );
1448 } else if let Element::Bold { content, underscore } = element {
1449 let marker = if *underscore { "__" } else { "**" };
1451 handle_emphasis_sentence_split(
1452 content,
1453 marker,
1454 &abbreviations,
1455 require_sentence_capital,
1456 &mut current_line,
1457 &mut lines,
1458 );
1459 } else if let Element::Strikethrough(content) = element {
1460 handle_emphasis_sentence_split(
1462 content,
1463 "~~",
1464 &abbreviations,
1465 require_sentence_capital,
1466 &mut current_line,
1467 &mut lines,
1468 );
1469 } else {
1470 let is_adjacent = if idx > 0 {
1473 match &elements[idx - 1] {
1474 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1475 _ => true,
1476 }
1477 } else {
1478 false
1479 };
1480
1481 if !is_adjacent && should_insert_space_before_join(¤t_line) {
1483 current_line.push(' ');
1484 }
1485 current_line.push_str(&element_str);
1486 }
1487 }
1488
1489 if !current_line.is_empty() {
1491 lines.push(current_line.trim().to_string());
1492 }
1493 lines
1494}
1495
1496fn handle_emphasis_sentence_split(
1498 content: &str,
1499 marker: &str,
1500 abbreviations: &HashSet<String>,
1501 require_sentence_capital: bool,
1502 current_line: &mut String,
1503 lines: &mut Vec<String>,
1504) {
1505 let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
1507
1508 if sentences.len() <= 1 {
1509 if should_insert_space_before_join(current_line) {
1511 current_line.push(' ');
1512 }
1513 current_line.push_str(marker);
1514 current_line.push_str(content);
1515 current_line.push_str(marker);
1516
1517 let trimmed = content.trim();
1519 let ends_with_punct = ends_with_sentence_punct(trimmed);
1520 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1521 lines.push(current_line.clone());
1522 current_line.clear();
1523 }
1524 } else {
1525 for (i, sentence) in sentences.iter().enumerate() {
1527 let trimmed = sentence.trim();
1528 if trimmed.is_empty() {
1529 continue;
1530 }
1531
1532 if i == 0 {
1533 if should_insert_space_before_join(current_line) {
1535 current_line.push(' ');
1536 }
1537 current_line.push_str(marker);
1538 current_line.push_str(trimmed);
1539 current_line.push_str(marker);
1540
1541 let ends_with_punct = ends_with_sentence_punct(trimmed);
1543 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1544 lines.push(current_line.clone());
1545 current_line.clear();
1546 }
1547 } else if i == sentences.len() - 1 {
1548 let ends_with_punct = ends_with_sentence_punct(trimmed);
1550
1551 let mut line = String::new();
1552 line.push_str(marker);
1553 line.push_str(trimmed);
1554 line.push_str(marker);
1555
1556 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1557 lines.push(line);
1558 } else {
1559 *current_line = line;
1561 }
1562 } else {
1563 let mut line = String::new();
1565 line.push_str(marker);
1566 line.push_str(trimmed);
1567 line.push_str(marker);
1568 lines.push(line);
1569 }
1570 }
1571 }
1572}
1573
1574const BREAK_WORDS: &[&str] = &[
1578 "and",
1579 "or",
1580 "but",
1581 "nor",
1582 "yet",
1583 "so",
1584 "for",
1585 "which",
1586 "that",
1587 "because",
1588 "when",
1589 "if",
1590 "while",
1591 "where",
1592 "although",
1593 "though",
1594 "unless",
1595 "since",
1596 "after",
1597 "before",
1598 "until",
1599 "as",
1600 "once",
1601 "whether",
1602 "however",
1603 "therefore",
1604 "moreover",
1605 "furthermore",
1606 "nevertheless",
1607 "whereas",
1608];
1609
1610fn is_clause_punctuation(c: char) -> bool {
1612 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1614
1615fn paren_group_end<'a>(slice: &'a str, element_spans: &[(usize, usize)], offset: usize) -> Option<(usize, &'a str)> {
1629 debug_assert!(slice.starts_with('('));
1630 let mut depth: i32 = 0;
1631 for (local_byte, c) in slice.char_indices() {
1632 let global_byte = offset + local_byte;
1633 if depth > 0 && is_inside_element(global_byte, element_spans) {
1638 continue;
1639 }
1640 match c {
1641 '(' => depth += 1,
1642 ')' => {
1643 depth -= 1;
1644 if depth == 0 {
1645 let end = local_byte + 1;
1646 let inner = &slice[1..local_byte];
1647 return Some((end, inner));
1648 }
1649 }
1650 _ => {}
1651 }
1652 }
1653 None
1654}
1655
1656fn split_at_parenthetical(
1673 text: &str,
1674 line_length: usize,
1675 element_spans: &[(usize, usize)],
1676 length_mode: ReflowLengthMode,
1677) -> Option<(String, String)> {
1678 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1679
1680 if text.starts_with('(')
1682 && let Some((end_local, inner)) = paren_group_end(text, element_spans, 0)
1683 && inner.contains(' ')
1684 {
1685 let tail = &text[end_local..];
1689 let attached_len = tail
1690 .char_indices()
1691 .take_while(|(_, c)| is_closing_quote(*c) || is_clause_punctuation(*c))
1692 .last()
1693 .map_or(0, |(idx, c)| idx + c.len_utf8());
1694 let first_end = end_local + attached_len;
1695 let rest_start = first_end;
1696 let first = &text[..first_end];
1697 let first_len = display_len(first, length_mode);
1698 if first_len <= line_length {
1701 let rest = text[rest_start..].trim_start();
1702 if !rest.is_empty() {
1703 return Some((first.to_string(), rest.to_string()));
1704 }
1705 }
1706 }
1707
1708 let mut best_open_byte: Option<usize> = None;
1710 let mut pos = 0usize;
1711 while pos < text.len() {
1712 if text.as_bytes()[pos] != b'(' {
1714 let c = text[pos..].chars().next().unwrap();
1715 pos += c.len_utf8();
1716 continue;
1717 }
1718 if is_inside_element(pos, element_spans) {
1720 pos += 1;
1721 continue;
1722 }
1723 if let Some((end_local, inner)) = paren_group_end(&text[pos..], element_spans, pos) {
1724 let first = text[..pos].trim_end();
1725 let first_len = display_len(first, length_mode);
1726 if !first.is_empty()
1727 && first_len >= min_first_len
1728 && first_len <= line_length
1729 && inner.contains(' ')
1730 && best_open_byte.is_none_or(|prev| pos > prev)
1731 {
1732 best_open_byte = Some(pos);
1733 }
1734 pos += end_local;
1735 } else {
1736 pos += 1;
1737 }
1738 }
1739
1740 let open_byte = best_open_byte?;
1741 let first = text[..open_byte].trim_end().to_string();
1742 let rest = text[open_byte..].to_string();
1743 if first.is_empty() || rest.trim().is_empty() {
1744 return None;
1745 }
1746 Some((first, rest))
1747}
1748
1749fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1753 let mut spans = Vec::new();
1754 let mut offset = 0;
1755 for element in elements {
1756 let rendered = format!("{element}");
1757 let len = rendered.len();
1758 if !matches!(element, Element::Text(_)) {
1759 spans.push((offset, offset + len));
1760 }
1761 offset += len;
1762 }
1763 spans
1764}
1765
1766fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1768 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1769}
1770
1771const MIN_SPLIT_RATIO: f64 = 0.3;
1774
1775fn split_at_clause_punctuation(
1779 text: &str,
1780 line_length: usize,
1781 element_spans: &[(usize, usize)],
1782 length_mode: ReflowLengthMode,
1783) -> Option<(String, String)> {
1784 let chars: Vec<char> = text.chars().collect();
1785 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1786
1787 let mut width_acc = 0;
1789 let mut search_end_char = 0;
1790 for (idx, &c) in chars.iter().enumerate() {
1791 let c_width = display_len(&c.to_string(), length_mode);
1792 if width_acc + c_width > line_length {
1793 break;
1794 }
1795 width_acc += c_width;
1796 search_end_char = idx + 1;
1797 }
1798
1799 let mut paren_depth: i32 = 0;
1806 let mut best_pos = None;
1807 for i in (0..search_end_char).rev() {
1808 let byte_start: usize = chars[..i].iter().map(|c| c.len_utf8()).sum();
1810 let byte_after: usize = byte_start + chars[i].len_utf8();
1812
1813 if !is_inside_element(byte_start, element_spans) {
1814 match chars[i] {
1815 ')' => paren_depth += 1,
1816 '(' => paren_depth = paren_depth.saturating_sub(1),
1817 _ => {}
1818 }
1819 }
1820
1821 if paren_depth == 0 && is_clause_punctuation(chars[i]) && !is_inside_element(byte_after, element_spans) {
1822 best_pos = Some(i);
1823 break;
1824 }
1825 }
1826
1827 let pos = best_pos?;
1828
1829 let first: String = chars[..=pos].iter().collect();
1831 let first_display_len = display_len(&first, length_mode);
1832 if first_display_len < min_first_len {
1833 return None;
1834 }
1835
1836 let rest: String = chars[pos + 1..].iter().collect();
1838 let rest = rest.trim_start().to_string();
1839
1840 if rest.is_empty() {
1841 return None;
1842 }
1843
1844 Some((first, rest))
1845}
1846
1847fn paren_depth_map(text: &str, element_spans: &[(usize, usize)]) -> Vec<i32> {
1854 let mut map = vec![0i32; text.len()];
1855 let mut depth = 0i32;
1856 for (byte, c) in text.char_indices() {
1857 if !is_inside_element(byte, element_spans) {
1858 match c {
1859 '(' => depth += 1,
1860 ')' => depth = depth.saturating_sub(1),
1861 _ => {}
1862 }
1863 }
1864 let end = (byte + c.len_utf8()).min(map.len());
1866 for slot in &mut map[byte..end] {
1867 *slot = depth;
1868 }
1869 }
1870 map
1871}
1872
1873fn is_standalone_parenthetical(line: &str) -> bool {
1882 let trimmed = line.trim();
1883 if !trimmed.starts_with('(') {
1884 return false;
1885 }
1886 let core = trimmed.trim_end_matches(|c: char| is_clause_punctuation(c));
1888 if !core.ends_with(')') {
1889 return false;
1890 }
1891 let inner = &core[1..core.len() - 1];
1893 if !inner.contains(' ') {
1894 return false;
1895 }
1896 let mut depth = 0i32;
1898 for c in core.chars() {
1899 match c {
1900 '(' => depth += 1,
1901 ')' => depth -= 1,
1902 _ => {}
1903 }
1904 if depth < 0 {
1905 return false;
1906 }
1907 }
1908 depth == 0
1909}
1910
1911fn split_at_break_word(
1915 text: &str,
1916 line_length: usize,
1917 element_spans: &[(usize, usize)],
1918 length_mode: ReflowLengthMode,
1919) -> Option<(String, String)> {
1920 let lower = text.to_lowercase();
1921 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1922 let mut best_split: Option<(usize, usize)> = None; let depth_map = paren_depth_map(text, element_spans);
1927
1928 for &word in BREAK_WORDS {
1929 let mut search_start = 0;
1930 while let Some(pos) = lower[search_start..].find(word) {
1931 let abs_pos = search_start + pos;
1932
1933 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1935 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1936
1937 if preceded_by_space && followed_by_space {
1938 let first_part = text[..abs_pos].trim_end();
1940 let first_part_len = display_len(first_part, length_mode);
1941
1942 let inside_paren = depth_map.get(abs_pos).is_some_and(|&d| d > 0);
1944
1945 if first_part_len >= min_first_len
1946 && first_part_len <= line_length
1947 && !is_inside_element(abs_pos, element_spans)
1948 && !inside_paren
1949 {
1950 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1952 best_split = Some((abs_pos, word.len()));
1953 }
1954 }
1955 }
1956
1957 search_start = abs_pos + word.len();
1958 }
1959 }
1960
1961 let (byte_start, _word_len) = best_split?;
1962
1963 let first = text[..byte_start].trim_end().to_string();
1964 let rest = text[byte_start..].to_string();
1965
1966 if first.is_empty() || rest.trim().is_empty() {
1967 return None;
1968 }
1969
1970 Some((first, rest))
1971}
1972
1973fn cascade_split_line(
1976 text: &str,
1977 line_length: usize,
1978 abbreviations: &Option<Vec<String>>,
1979 length_mode: ReflowLengthMode,
1980 attr_lists: bool,
1981) -> Vec<String> {
1982 if line_length == 0 || display_len(text, length_mode) <= line_length {
1983 return vec![text.to_string()];
1984 }
1985
1986 let elements = parse_markdown_elements_inner(text, attr_lists);
1987 let element_spans = compute_element_spans(&elements);
1988
1989 if let Some((first, rest)) = split_at_parenthetical(text, line_length, &element_spans, length_mode) {
1992 let mut result = vec![first];
1993 result.extend(cascade_split_line(
1994 &rest,
1995 line_length,
1996 abbreviations,
1997 length_mode,
1998 attr_lists,
1999 ));
2000 return result;
2001 }
2002
2003 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
2005 let mut result = vec![first];
2006 result.extend(cascade_split_line(
2007 &rest,
2008 line_length,
2009 abbreviations,
2010 length_mode,
2011 attr_lists,
2012 ));
2013 return result;
2014 }
2015
2016 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
2018 let mut result = vec![first];
2019 result.extend(cascade_split_line(
2020 &rest,
2021 line_length,
2022 abbreviations,
2023 length_mode,
2024 attr_lists,
2025 ));
2026 return result;
2027 }
2028
2029 let options = ReflowOptions {
2031 line_length,
2032 break_on_sentences: false,
2033 preserve_breaks: false,
2034 sentence_per_line: false,
2035 semantic_line_breaks: false,
2036 abbreviations: abbreviations.clone(),
2037 length_mode,
2038 attr_lists,
2039 require_sentence_capital: true,
2040 max_list_continuation_indent: None,
2041 };
2042 reflow_elements(&elements, &options)
2043}
2044
2045fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2049 let sentence_lines =
2051 reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
2052
2053 if options.line_length == 0 {
2056 return sentence_lines;
2057 }
2058
2059 let length_mode = options.length_mode;
2060 let mut result = Vec::new();
2061 for line in sentence_lines {
2062 if display_len(&line, length_mode) <= options.line_length {
2063 result.push(line);
2064 } else {
2065 result.extend(cascade_split_line(
2066 &line,
2067 options.line_length,
2068 &options.abbreviations,
2069 length_mode,
2070 options.attr_lists,
2071 ));
2072 }
2073 }
2074
2075 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
2078 let mut merged: Vec<String> = Vec::with_capacity(result.len());
2079 for line in result {
2080 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
2081 if is_standalone_parenthetical(&line) {
2084 merged.push(line);
2085 continue;
2086 }
2087
2088 let prev_ends_at_sentence = {
2090 let trimmed = merged.last().unwrap().trim_end();
2091 trimmed
2092 .chars()
2093 .rev()
2094 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
2095 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
2096 };
2097
2098 if !prev_ends_at_sentence {
2099 let prev = merged.last_mut().unwrap();
2100 let combined = format!("{prev} {line}");
2101 if display_len(&combined, length_mode) <= options.line_length {
2103 *prev = combined;
2104 continue;
2105 }
2106 }
2107 }
2108 merged.push(line);
2109 }
2110 merged
2111}
2112
2113fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
2121 line.char_indices()
2122 .rev()
2123 .map(|(pos, _)| pos)
2124 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
2125}
2126
2127fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2129 let mut lines = Vec::new();
2130 let mut current_line = String::new();
2131 let mut current_length = 0;
2132 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
2134 let length_mode = options.length_mode;
2135
2136 for (idx, element) in elements.iter().enumerate() {
2137 let element_str = format!("{element}");
2138 let element_len = element.display_width(length_mode);
2139
2140 let is_adjacent_to_prev = if idx > 0 {
2146 match (&elements[idx - 1], element) {
2147 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
2148 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
2149 _ => true,
2150 }
2151 } else {
2152 false
2153 };
2154
2155 if let Element::Text(text) = element {
2157 let has_leading_space = text.starts_with(char::is_whitespace);
2159 let words: Vec<&str> = text.split_whitespace().collect();
2161
2162 for (i, word) in words.iter().enumerate() {
2163 let word_len = display_len(word, length_mode);
2164 let is_trailing_punct = word
2166 .chars()
2167 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
2168
2169 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
2172
2173 if is_first_adjacent {
2174 if current_length + word_len > options.line_length && current_length > 0 {
2176 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
2179 let before = current_line[..last_space].trim_end().to_string();
2180 let after = current_line[last_space + 1..].to_string();
2181 lines.push(before);
2182 current_line = format!("{after}{word}");
2183 current_length = display_len(¤t_line, length_mode);
2184 current_line_element_spans.clear();
2185 } else {
2186 current_line.push_str(word);
2187 current_length += word_len;
2188 }
2189 } else {
2190 current_line.push_str(word);
2191 current_length += word_len;
2192 }
2193 } else if current_length > 0
2194 && current_length + 1 + word_len > options.line_length
2195 && !is_trailing_punct
2196 {
2197 lines.push(current_line.trim().to_string());
2199 current_line = word.to_string();
2200 current_length = word_len;
2201 current_line_element_spans.clear();
2202 } else {
2203 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
2207 current_line.push(' ');
2208 current_length += 1;
2209 }
2210 current_line.push_str(word);
2211 current_length += word_len;
2212 }
2213 }
2214 } else if matches!(
2215 element,
2216 Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
2217 ) && element_len > options.line_length
2218 {
2219 let (content, marker): (&str, &str) = match element {
2223 Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
2224 Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
2225 Element::Strikethrough(content) => (content.as_str(), "~~"),
2226 _ => unreachable!(),
2227 };
2228
2229 let words: Vec<&str> = content.split_whitespace().collect();
2230 let n = words.len();
2231
2232 if n == 0 {
2233 let full = format!("{marker}{marker}");
2235 let full_len = display_len(&full, length_mode);
2236 if !is_adjacent_to_prev && current_length > 0 {
2237 current_line.push(' ');
2238 current_length += 1;
2239 }
2240 current_line.push_str(&full);
2241 current_length += full_len;
2242 } else {
2243 for (i, word) in words.iter().enumerate() {
2244 let is_first = i == 0;
2245 let is_last = i == n - 1;
2246 let word_str: String = match (is_first, is_last) {
2247 (true, true) => format!("{marker}{word}{marker}"),
2248 (true, false) => format!("{marker}{word}"),
2249 (false, true) => format!("{word}{marker}"),
2250 (false, false) => word.to_string(),
2251 };
2252 let word_len = display_len(&word_str, length_mode);
2253
2254 let needs_space = if is_first {
2255 !is_adjacent_to_prev && current_length > 0
2256 } else {
2257 current_length > 0
2258 };
2259
2260 if needs_space && current_length + 1 + word_len > options.line_length {
2261 lines.push(current_line.trim_end().to_string());
2262 current_line = word_str;
2263 current_length = word_len;
2264 current_line_element_spans.clear();
2265 } else {
2266 if needs_space {
2267 current_line.push(' ');
2268 current_length += 1;
2269 }
2270 current_line.push_str(&word_str);
2271 current_length += word_len;
2272 }
2273 }
2274 }
2275 } else {
2276 if is_adjacent_to_prev {
2280 if current_length + element_len > options.line_length {
2282 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
2285 let before = current_line[..last_space].trim_end().to_string();
2286 let after = current_line[last_space + 1..].to_string();
2287 lines.push(before);
2288 current_line = format!("{after}{element_str}");
2289 current_length = display_len(¤t_line, length_mode);
2290 current_line_element_spans.clear();
2291 let start = after.len();
2293 current_line_element_spans.push((start, start + element_str.len()));
2294 } else {
2295 let start = current_line.len();
2297 current_line.push_str(&element_str);
2298 current_length += element_len;
2299 current_line_element_spans.push((start, current_line.len()));
2300 }
2301 } else {
2302 let start = current_line.len();
2303 current_line.push_str(&element_str);
2304 current_length += element_len;
2305 current_line_element_spans.push((start, current_line.len()));
2306 }
2307 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
2308 lines.push(current_line.trim().to_string());
2310 current_line.clone_from(&element_str);
2311 current_length = element_len;
2312 current_line_element_spans.clear();
2313 current_line_element_spans.push((0, element_str.len()));
2314 } else {
2315 let ends_with_opener =
2317 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
2318 if current_length > 0 && !ends_with_opener {
2319 current_line.push(' ');
2320 current_length += 1;
2321 }
2322 let start = current_line.len();
2323 current_line.push_str(&element_str);
2324 current_length += element_len;
2325 current_line_element_spans.push((start, current_line.len()));
2326 }
2327 }
2328 }
2329
2330 if !current_line.is_empty() {
2332 lines.push(current_line.trim_end().to_string());
2333 }
2334
2335 lines
2336}
2337
2338pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
2340 let lines: Vec<&str> = content.lines().collect();
2341 let mut result = Vec::new();
2342 let mut i = 0;
2343
2344 while i < lines.len() {
2345 let line = lines[i];
2346 let trimmed = line.trim();
2347
2348 if trimmed.is_empty() {
2350 result.push(String::new());
2351 i += 1;
2352 continue;
2353 }
2354
2355 if trimmed.starts_with('#') {
2357 result.push(line.to_string());
2358 i += 1;
2359 continue;
2360 }
2361
2362 if trimmed.starts_with(":::") {
2364 result.push(line.to_string());
2365 i += 1;
2366 continue;
2367 }
2368
2369 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2371 result.push(line.to_string());
2372 i += 1;
2373 while i < lines.len() {
2375 result.push(lines[i].to_string());
2376 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2377 i += 1;
2378 break;
2379 }
2380 i += 1;
2381 }
2382 continue;
2383 }
2384
2385 if calculate_indentation_width_default(line) >= 4 {
2387 result.push(line.to_string());
2389 i += 1;
2390 while i < lines.len() {
2391 let next_line = lines[i];
2392 if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2394 result.push(next_line.to_string());
2395 i += 1;
2396 } else {
2397 break;
2398 }
2399 }
2400 continue;
2401 }
2402
2403 if trimmed.starts_with('>') {
2405 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2408 let quote_prefix = line[0..=gt_pos].to_string();
2409 let quote_content = &line[quote_prefix.len()..].trim_start();
2410
2411 let reflowed = reflow_line(quote_content, options);
2412 for reflowed_line in &reflowed {
2413 result.push(format!("{quote_prefix} {reflowed_line}"));
2414 }
2415 i += 1;
2416 continue;
2417 }
2418
2419 if is_horizontal_rule(trimmed) {
2421 result.push(line.to_string());
2422 i += 1;
2423 continue;
2424 }
2425
2426 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2428 let indent = line.len() - line.trim_start().len();
2430 let indent_str = " ".repeat(indent);
2431
2432 let mut marker_end = indent;
2435 let mut content_start = indent;
2436
2437 if trimmed.chars().next().is_some_and(char::is_numeric) {
2438 if let Some(period_pos) = line[indent..].find('.') {
2440 marker_end = indent + period_pos + 1; content_start = marker_end;
2442 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2446 content_start += 1;
2447 }
2448 }
2449 } else {
2450 marker_end = indent + 1; content_start = marker_end;
2453 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2457 content_start += 1;
2458 }
2459 }
2460
2461 let min_continuation_indent = content_start;
2463
2464 let rest = &line[content_start..];
2467 if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
2468 marker_end = content_start + 3; content_start += 4; }
2471
2472 let marker = &line[indent..marker_end];
2473
2474 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2477 i += 1;
2478
2479 while i < lines.len() {
2483 let next_line = lines[i];
2484 let next_trimmed = next_line.trim();
2485
2486 if is_block_boundary(next_trimmed) {
2488 break;
2489 }
2490
2491 let next_indent = next_line.len() - next_line.trim_start().len();
2493 if next_indent >= min_continuation_indent {
2494 let trimmed_start = next_line.trim_start();
2497 list_content.push(trim_preserving_hard_break(trimmed_start));
2498 i += 1;
2499 } else {
2500 break;
2502 }
2503 }
2504
2505 let combined_content = if options.preserve_breaks {
2508 list_content[0].clone()
2509 } else {
2510 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2512 if has_hard_breaks {
2513 list_content.join("\n")
2515 } else {
2516 list_content.join(" ")
2518 }
2519 };
2520
2521 let trimmed_marker = marker;
2523 let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
2524 indent + (content_start - indent).min(max_indent)
2527 } else {
2528 content_start
2529 };
2530
2531 let prefix_length = indent + trimmed_marker.len() + 1;
2533
2534 let adjusted_options = ReflowOptions {
2536 line_length: options.line_length.saturating_sub(prefix_length),
2537 ..options.clone()
2538 };
2539
2540 let reflowed = reflow_line(&combined_content, &adjusted_options);
2541 for (j, reflowed_line) in reflowed.iter().enumerate() {
2542 if j == 0 {
2543 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2544 } else {
2545 let continuation_indent = " ".repeat(continuation_spaces);
2547 result.push(format!("{continuation_indent}{reflowed_line}"));
2548 }
2549 }
2550 continue;
2551 }
2552
2553 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2555 result.push(line.to_string());
2556 i += 1;
2557 continue;
2558 }
2559
2560 if trimmed.starts_with('[') && line.contains("]:") {
2562 result.push(line.to_string());
2563 i += 1;
2564 continue;
2565 }
2566
2567 if is_definition_list_item(trimmed) {
2569 result.push(line.to_string());
2570 i += 1;
2571 continue;
2572 }
2573
2574 let mut is_single_line_paragraph = true;
2576 if i + 1 < lines.len() {
2577 let next_trimmed = lines[i + 1].trim();
2578 if !is_block_boundary(next_trimmed) {
2580 is_single_line_paragraph = false;
2581 }
2582 }
2583
2584 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2586 result.push(line.to_string());
2587 i += 1;
2588 continue;
2589 }
2590
2591 let mut paragraph_parts = Vec::new();
2593 let mut current_part = vec![line];
2594 i += 1;
2595
2596 if options.preserve_breaks {
2598 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2600 Some("\\")
2601 } else if line.ends_with(" ") {
2602 Some(" ")
2603 } else {
2604 None
2605 };
2606 let reflowed = reflow_line(line, options);
2607
2608 if let Some(break_marker) = hard_break_type {
2610 if !reflowed.is_empty() {
2611 let mut reflowed_with_break = reflowed;
2612 let last_idx = reflowed_with_break.len() - 1;
2613 if !has_hard_break(&reflowed_with_break[last_idx]) {
2614 reflowed_with_break[last_idx].push_str(break_marker);
2615 }
2616 result.extend(reflowed_with_break);
2617 }
2618 } else {
2619 result.extend(reflowed);
2620 }
2621 } else {
2622 while i < lines.len() {
2624 let prev_line = if !current_part.is_empty() {
2625 current_part.last().unwrap()
2626 } else {
2627 ""
2628 };
2629 let next_line = lines[i];
2630 let next_trimmed = next_line.trim();
2631
2632 if is_block_boundary(next_trimmed) {
2634 break;
2635 }
2636
2637 let prev_trimmed = prev_line.trim();
2640 let abbreviations = get_abbreviations(&options.abbreviations);
2641 let ends_with_sentence = (prev_trimmed.ends_with('.')
2642 || prev_trimmed.ends_with('!')
2643 || prev_trimmed.ends_with('?')
2644 || prev_trimmed.ends_with(".*")
2645 || prev_trimmed.ends_with("!*")
2646 || prev_trimmed.ends_with("?*")
2647 || prev_trimmed.ends_with("._")
2648 || prev_trimmed.ends_with("!_")
2649 || prev_trimmed.ends_with("?_")
2650 || prev_trimmed.ends_with(".\"")
2652 || prev_trimmed.ends_with("!\"")
2653 || prev_trimmed.ends_with("?\"")
2654 || prev_trimmed.ends_with(".'")
2655 || prev_trimmed.ends_with("!'")
2656 || prev_trimmed.ends_with("?'")
2657 || prev_trimmed.ends_with(".\u{201D}")
2658 || prev_trimmed.ends_with("!\u{201D}")
2659 || prev_trimmed.ends_with("?\u{201D}")
2660 || prev_trimmed.ends_with(".\u{2019}")
2661 || prev_trimmed.ends_with("!\u{2019}")
2662 || prev_trimmed.ends_with("?\u{2019}"))
2663 && !text_ends_with_abbreviation(
2664 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2665 &abbreviations,
2666 );
2667
2668 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2669 paragraph_parts.push(current_part.join(" "));
2671 current_part = vec![next_line];
2672 } else {
2673 current_part.push(next_line);
2674 }
2675 i += 1;
2676 }
2677
2678 if !current_part.is_empty() {
2680 if current_part.len() == 1 {
2681 paragraph_parts.push(current_part[0].to_string());
2683 } else {
2684 paragraph_parts.push(current_part.join(" "));
2685 }
2686 }
2687
2688 for (j, part) in paragraph_parts.iter().enumerate() {
2690 let reflowed = reflow_line(part, options);
2691 result.extend(reflowed);
2692
2693 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2697 let last_idx = result.len() - 1;
2698 if !has_hard_break(&result[last_idx]) {
2699 result[last_idx].push_str(" ");
2700 }
2701 }
2702 }
2703 }
2704 }
2705
2706 let result_text = result.join("\n");
2708 if content.ends_with('\n') && !result_text.ends_with('\n') {
2709 format!("{result_text}\n")
2710 } else {
2711 result_text
2712 }
2713}
2714
2715#[derive(Debug, Clone)]
2717pub struct ParagraphReflow {
2718 pub start_byte: usize,
2720 pub end_byte: usize,
2722 pub reflowed_text: String,
2724}
2725
2726#[derive(Debug, Clone)]
2732pub struct BlockquoteLineData {
2733 pub(crate) content: String,
2735 pub(crate) is_explicit: bool,
2737 pub(crate) prefix: Option<String>,
2739}
2740
2741impl BlockquoteLineData {
2742 pub fn explicit(content: String, prefix: String) -> Self {
2744 Self {
2745 content,
2746 is_explicit: true,
2747 prefix: Some(prefix),
2748 }
2749 }
2750
2751 pub fn lazy(content: String) -> Self {
2753 Self {
2754 content,
2755 is_explicit: false,
2756 prefix: None,
2757 }
2758 }
2759}
2760
2761#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2763pub enum BlockquoteContinuationStyle {
2764 Explicit,
2765 Lazy,
2766}
2767
2768pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2776 let mut explicit_count = 0usize;
2777 let mut lazy_count = 0usize;
2778
2779 for line in lines.iter().skip(1) {
2780 if line.is_explicit {
2781 explicit_count += 1;
2782 } else {
2783 lazy_count += 1;
2784 }
2785 }
2786
2787 if explicit_count > 0 && lazy_count == 0 {
2788 BlockquoteContinuationStyle::Explicit
2789 } else if lazy_count > 0 && explicit_count == 0 {
2790 BlockquoteContinuationStyle::Lazy
2791 } else if explicit_count >= lazy_count {
2792 BlockquoteContinuationStyle::Explicit
2793 } else {
2794 BlockquoteContinuationStyle::Lazy
2795 }
2796}
2797
2798pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2803 let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2804
2805 for (idx, line) in lines.iter().enumerate() {
2806 let Some(prefix) = line.prefix.as_ref() else {
2807 continue;
2808 };
2809 counts
2810 .entry(prefix.clone())
2811 .and_modify(|entry| entry.0 += 1)
2812 .or_insert((1, idx));
2813 }
2814
2815 counts
2816 .into_iter()
2817 .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2818 count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2819 })
2820 .map_or_else(|| fallback.to_string(), |(prefix, _)| prefix)
2821}
2822
2823pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2828 let trimmed = content_line.trim_start();
2829 trimmed.starts_with('>')
2830 || trimmed.starts_with('#')
2831 || trimmed.starts_with("```")
2832 || trimmed.starts_with("~~~")
2833 || is_unordered_list_marker(trimmed)
2834 || is_numbered_list_item(trimmed)
2835 || is_horizontal_rule(trimmed)
2836 || is_definition_list_item(trimmed)
2837 || (trimmed.starts_with('[') && trimmed.contains("]:"))
2838 || trimmed.starts_with(":::")
2839 || (trimmed.starts_with('<')
2840 && !trimmed.starts_with("<http")
2841 && !trimmed.starts_with("<https")
2842 && !trimmed.starts_with("<mailto:"))
2843}
2844
2845pub fn reflow_blockquote_content(
2854 lines: &[BlockquoteLineData],
2855 explicit_prefix: &str,
2856 continuation_style: BlockquoteContinuationStyle,
2857 options: &ReflowOptions,
2858) -> Vec<String> {
2859 let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2860 let segments = split_into_segments_strs(&content_strs);
2861 let mut reflowed_content_lines: Vec<String> = Vec::new();
2862
2863 for segment in segments {
2864 let hard_break_type = segment.last().and_then(|&line| {
2865 let line = line.strip_suffix('\r').unwrap_or(line);
2866 if line.ends_with('\\') {
2867 Some("\\")
2868 } else if line.ends_with(" ") {
2869 Some(" ")
2870 } else {
2871 None
2872 }
2873 });
2874
2875 let pieces: Vec<&str> = segment
2876 .iter()
2877 .map(|&line| {
2878 if let Some(l) = line.strip_suffix('\\') {
2879 l.trim_end()
2880 } else if let Some(l) = line.strip_suffix(" ") {
2881 l.trim_end()
2882 } else {
2883 line.trim_end()
2884 }
2885 })
2886 .collect();
2887
2888 let segment_text = pieces.join(" ");
2889 let segment_text = segment_text.trim();
2890 if segment_text.is_empty() {
2891 continue;
2892 }
2893
2894 let mut reflowed = reflow_line(segment_text, options);
2895 if let Some(break_marker) = hard_break_type
2896 && !reflowed.is_empty()
2897 {
2898 let last_idx = reflowed.len() - 1;
2899 if !has_hard_break(&reflowed[last_idx]) {
2900 reflowed[last_idx].push_str(break_marker);
2901 }
2902 }
2903 reflowed_content_lines.extend(reflowed);
2904 }
2905
2906 let mut styled_lines: Vec<String> = Vec::new();
2907 for (idx, line) in reflowed_content_lines.iter().enumerate() {
2908 let force_explicit = idx == 0
2909 || continuation_style == BlockquoteContinuationStyle::Explicit
2910 || should_force_explicit_blockquote_line(line);
2911 if force_explicit {
2912 styled_lines.push(format!("{explicit_prefix}{line}"));
2913 } else {
2914 styled_lines.push(line.clone());
2915 }
2916 }
2917
2918 styled_lines
2919}
2920
2921fn is_blockquote_content_boundary(content: &str) -> bool {
2922 let trimmed = content.trim();
2923 trimmed.is_empty()
2924 || is_block_boundary(trimmed)
2925 || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2926 || trimmed.starts_with(":::")
2927 || crate::utils::is_template_directive_only(content)
2928 || is_standalone_attr_list(content)
2929 || is_snippet_block_delimiter(content)
2930}
2931
2932fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2933 let mut segments = Vec::new();
2934 let mut current = Vec::new();
2935
2936 for &line in lines {
2937 current.push(line);
2938 if has_hard_break(line) {
2939 segments.push(current);
2940 current = Vec::new();
2941 }
2942 }
2943
2944 if !current.is_empty() {
2945 segments.push(current);
2946 }
2947
2948 segments
2949}
2950
2951fn reflow_blockquote_paragraph_at_line(
2952 content: &str,
2953 lines: &[&str],
2954 target_idx: usize,
2955 options: &ReflowOptions,
2956) -> Option<ParagraphReflow> {
2957 let mut anchor_idx = target_idx;
2958 let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2959 parsed.nesting_level
2960 } else {
2961 let mut found = None;
2962 let mut idx = target_idx;
2963 loop {
2964 if lines[idx].trim().is_empty() {
2965 break;
2966 }
2967 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2968 found = Some((idx, parsed.nesting_level));
2969 break;
2970 }
2971 if idx == 0 {
2972 break;
2973 }
2974 idx -= 1;
2975 }
2976 let (idx, level) = found?;
2977 anchor_idx = idx;
2978 level
2979 };
2980
2981 let mut para_start = anchor_idx;
2983 while para_start > 0 {
2984 let prev_idx = para_start - 1;
2985 let prev_line = lines[prev_idx];
2986
2987 if prev_line.trim().is_empty() {
2988 break;
2989 }
2990
2991 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2992 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2993 break;
2994 }
2995 para_start = prev_idx;
2996 continue;
2997 }
2998
2999 let prev_lazy = prev_line.trim_start();
3000 if is_blockquote_content_boundary(prev_lazy) {
3001 break;
3002 }
3003 para_start = prev_idx;
3004 }
3005
3006 while para_start < lines.len() {
3008 let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
3009 para_start += 1;
3010 continue;
3011 };
3012 target_level = parsed.nesting_level;
3013 break;
3014 }
3015
3016 if para_start >= lines.len() || para_start > target_idx {
3017 return None;
3018 }
3019
3020 let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
3023 let mut idx = para_start;
3024 while idx < lines.len() {
3025 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
3026 break;
3027 }
3028
3029 let line = lines[idx];
3030 if line.trim().is_empty() {
3031 break;
3032 }
3033
3034 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
3035 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
3036 break;
3037 }
3038 collected.push((
3039 idx,
3040 BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
3041 ));
3042 idx += 1;
3043 continue;
3044 }
3045
3046 let lazy_content = line.trim_start();
3047 if is_blockquote_content_boundary(lazy_content) {
3048 break;
3049 }
3050
3051 collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
3052 idx += 1;
3053 }
3054
3055 if collected.is_empty() {
3056 return None;
3057 }
3058
3059 let para_end = collected[collected.len() - 1].0;
3060 if target_idx < para_start || target_idx > para_end {
3061 return None;
3062 }
3063
3064 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
3065
3066 let fallback_prefix = line_data
3067 .iter()
3068 .find_map(|d| d.prefix.clone())
3069 .unwrap_or_else(|| "> ".to_string());
3070 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
3071 let continuation_style = blockquote_continuation_style(&line_data);
3072
3073 let adjusted_line_length = options
3074 .line_length
3075 .saturating_sub(display_len(&explicit_prefix, options.length_mode))
3076 .max(1);
3077
3078 let adjusted_options = ReflowOptions {
3079 line_length: adjusted_line_length,
3080 ..options.clone()
3081 };
3082
3083 let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
3084
3085 if styled_lines.is_empty() {
3086 return None;
3087 }
3088
3089 let mut start_byte = 0;
3091 for line in lines.iter().take(para_start) {
3092 start_byte += line.len() + 1;
3093 }
3094
3095 let mut end_byte = start_byte;
3096 for line in lines.iter().take(para_end + 1).skip(para_start) {
3097 end_byte += line.len() + 1;
3098 }
3099
3100 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3101 if !includes_trailing_newline {
3102 end_byte -= 1;
3103 }
3104
3105 let reflowed_joined = styled_lines.join("\n");
3106 let reflowed_text = if includes_trailing_newline {
3107 if reflowed_joined.ends_with('\n') {
3108 reflowed_joined
3109 } else {
3110 format!("{reflowed_joined}\n")
3111 }
3112 } else if reflowed_joined.ends_with('\n') {
3113 reflowed_joined.trim_end_matches('\n').to_string()
3114 } else {
3115 reflowed_joined
3116 };
3117
3118 Some(ParagraphReflow {
3119 start_byte,
3120 end_byte,
3121 reflowed_text,
3122 })
3123}
3124
3125pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
3143 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
3144}
3145
3146pub fn reflow_paragraph_at_line_with_mode(
3148 content: &str,
3149 line_number: usize,
3150 line_length: usize,
3151 length_mode: ReflowLengthMode,
3152) -> Option<ParagraphReflow> {
3153 let options = ReflowOptions {
3154 line_length,
3155 length_mode,
3156 ..Default::default()
3157 };
3158 reflow_paragraph_at_line_with_options(content, line_number, &options)
3159}
3160
3161pub fn reflow_paragraph_at_line_with_options(
3172 content: &str,
3173 line_number: usize,
3174 options: &ReflowOptions,
3175) -> Option<ParagraphReflow> {
3176 if line_number == 0 {
3177 return None;
3178 }
3179
3180 let lines: Vec<&str> = content.lines().collect();
3181
3182 if line_number > lines.len() {
3184 return None;
3185 }
3186
3187 let target_idx = line_number - 1; let target_line = lines[target_idx];
3189 let trimmed = target_line.trim();
3190
3191 if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
3194 return Some(blockquote_reflow);
3195 }
3196
3197 if is_paragraph_boundary(trimmed, target_line) {
3199 return None;
3200 }
3201
3202 let mut para_start = target_idx;
3204 while para_start > 0 {
3205 let prev_idx = para_start - 1;
3206 let prev_line = lines[prev_idx];
3207 let prev_trimmed = prev_line.trim();
3208
3209 if is_paragraph_boundary(prev_trimmed, prev_line) {
3211 break;
3212 }
3213
3214 para_start = prev_idx;
3215 }
3216
3217 let mut para_end = target_idx;
3219 while para_end + 1 < lines.len() {
3220 let next_idx = para_end + 1;
3221 let next_line = lines[next_idx];
3222 let next_trimmed = next_line.trim();
3223
3224 if is_paragraph_boundary(next_trimmed, next_line) {
3226 break;
3227 }
3228
3229 para_end = next_idx;
3230 }
3231
3232 let paragraph_lines = &lines[para_start..=para_end];
3234
3235 let mut start_byte = 0;
3237 for line in lines.iter().take(para_start) {
3238 start_byte += line.len() + 1; }
3240
3241 let mut end_byte = start_byte;
3242 for line in paragraph_lines {
3243 end_byte += line.len() + 1; }
3245
3246 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3249
3250 if !includes_trailing_newline {
3252 end_byte -= 1;
3253 }
3254
3255 let paragraph_text = paragraph_lines.join("\n");
3257
3258 let reflowed = reflow_markdown(¶graph_text, options);
3260
3261 let reflowed_text = if includes_trailing_newline {
3265 if reflowed.ends_with('\n') {
3267 reflowed
3268 } else {
3269 format!("{reflowed}\n")
3270 }
3271 } else {
3272 if reflowed.ends_with('\n') {
3274 reflowed.trim_end_matches('\n').to_string()
3275 } else {
3276 reflowed
3277 }
3278 };
3279
3280 Some(ParagraphReflow {
3281 start_byte,
3282 end_byte,
3283 reflowed_text,
3284 })
3285}
3286
3287#[cfg(test)]
3288mod tests {
3289 use super::*;
3290
3291 #[test]
3296 fn test_helper_function_text_ends_with_abbreviation() {
3297 let abbreviations = get_abbreviations(&None);
3299
3300 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
3302 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
3303 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
3304 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
3305 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
3306 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
3307 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
3308 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
3309
3310 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
3312 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
3313 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
3314 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
3315 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
3316 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
3322
3323 #[test]
3324 fn test_is_unordered_list_marker() {
3325 assert!(is_unordered_list_marker("- item"));
3327 assert!(is_unordered_list_marker("* item"));
3328 assert!(is_unordered_list_marker("+ item"));
3329 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
3331 assert!(is_unordered_list_marker("+"));
3332
3333 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
3344
3345 #[test]
3346 fn test_is_block_boundary() {
3347 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
3369 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
3372 }
3373
3374 #[test]
3375 fn test_definition_list_boundary_in_single_line_paragraph() {
3376 let options = ReflowOptions {
3379 line_length: 80,
3380 ..Default::default()
3381 };
3382 let input = "Term\n: Definition of the term";
3383 let result = reflow_markdown(input, &options);
3384 assert!(
3386 result.contains(": Definition"),
3387 "Definition list item should not be merged into previous line. Got: {result:?}"
3388 );
3389 let lines: Vec<&str> = result.lines().collect();
3390 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3391 assert_eq!(lines[0], "Term");
3392 assert_eq!(lines[1], ": Definition of the term");
3393 }
3394
3395 #[test]
3396 fn test_is_paragraph_boundary() {
3397 assert!(is_paragraph_boundary("# Heading", "# Heading"));
3399 assert!(is_paragraph_boundary("- item", "- item"));
3400 assert!(is_paragraph_boundary(":::", ":::"));
3401 assert!(is_paragraph_boundary(": definition", ": definition"));
3402
3403 assert!(is_paragraph_boundary("code", " code"));
3405 assert!(is_paragraph_boundary("code", "\tcode"));
3406
3407 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3409 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
3413 assert!(!is_paragraph_boundary("text", " text")); }
3415
3416 #[test]
3417 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3418 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3421 let result = reflow_paragraph_at_line(content, 3, 80);
3423 assert!(result.is_none(), "Div marker line should not be reflowed");
3424 }
3425}