1use crate::utils::calculate_indentation_width_default;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
13 LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
14 SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18 text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27 Chars,
29 #[default]
31 Visual,
32 Bytes,
34}
35
36fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38 match mode {
39 ReflowLengthMode::Chars => s.chars().count(),
40 ReflowLengthMode::Visual => s.width(),
41 ReflowLengthMode::Bytes => s.len(),
42 }
43}
44
45#[derive(Clone)]
47pub struct ReflowOptions {
48 pub line_length: usize,
50 pub break_on_sentences: bool,
52 pub preserve_breaks: bool,
54 pub sentence_per_line: bool,
56 pub semantic_line_breaks: bool,
58 pub abbreviations: Option<Vec<String>>,
62 pub length_mode: ReflowLengthMode,
64 pub attr_lists: bool,
67 pub require_sentence_capital: bool,
72 pub max_list_continuation_indent: Option<usize>,
76}
77
78impl Default for ReflowOptions {
79 fn default() -> Self {
80 Self {
81 line_length: 80,
82 break_on_sentences: true,
83 preserve_breaks: false,
84 sentence_per_line: false,
85 semantic_line_breaks: false,
86 abbreviations: None,
87 length_mode: ReflowLengthMode::default(),
88 attr_lists: false,
89 require_sentence_capital: true,
90 max_list_continuation_indent: None,
91 }
92 }
93}
94
95fn compute_inline_code_mask(text: &str) -> Vec<bool> {
98 let chars: Vec<char> = text.chars().collect();
99 let len = chars.len();
100 let mut mask = vec![false; len];
101 let mut i = 0;
102
103 while i < len {
104 if chars[i] == '`' {
105 let open_start = i;
107 let mut backtick_count = 0;
108 while i < len && chars[i] == '`' {
109 backtick_count += 1;
110 i += 1;
111 }
112
113 let mut found_close = false;
115 let content_start = i;
116 while i < len {
117 if chars[i] == '`' {
118 let close_start = i;
119 let mut close_count = 0;
120 while i < len && chars[i] == '`' {
121 close_count += 1;
122 i += 1;
123 }
124 if close_count == backtick_count {
125 for item in mask.iter_mut().take(close_start).skip(content_start) {
127 *item = true;
128 }
129 for item in mask.iter_mut().take(content_start).skip(open_start) {
131 *item = true;
132 }
133 for item in mask.iter_mut().take(i).skip(close_start) {
134 *item = true;
135 }
136 found_close = true;
137 break;
138 }
139 } else {
140 i += 1;
141 }
142 }
143
144 if !found_close {
145 i = open_start + backtick_count;
147 }
148 } else {
149 i += 1;
150 }
151 }
152
153 mask
154}
155
156fn is_sentence_boundary(
160 text: &str,
161 pos: usize,
162 abbreviations: &HashSet<String>,
163 require_sentence_capital: bool,
164) -> bool {
165 let chars: Vec<char> = text.chars().collect();
166
167 if pos + 1 >= chars.len() {
168 return false;
169 }
170
171 let c = chars[pos];
172 let next_char = chars[pos + 1];
173
174 if is_cjk_sentence_ending(c) {
177 let mut after_punct_pos = pos + 1;
179 while after_punct_pos < chars.len()
180 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
181 {
182 after_punct_pos += 1;
183 }
184
185 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
187 after_punct_pos += 1;
188 }
189
190 if after_punct_pos >= chars.len() {
192 return false;
193 }
194
195 while after_punct_pos < chars.len()
197 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
198 {
199 after_punct_pos += 1;
200 }
201
202 if after_punct_pos >= chars.len() {
203 return false;
204 }
205
206 return true;
209 }
210
211 if c != '.' && c != '!' && c != '?' {
213 return false;
214 }
215
216 let (_space_pos, after_space_pos) = if next_char == ' ' {
218 (pos + 1, pos + 2)
220 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
221 if chars[pos + 2] == ' ' {
223 (pos + 2, pos + 3)
225 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
226 (pos + 3, pos + 4)
228 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
229 && pos + 4 < chars.len()
230 && chars[pos + 3] == chars[pos + 2]
231 && chars[pos + 4] == ' '
232 {
233 (pos + 4, pos + 5)
235 } else {
236 return false;
237 }
238 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
239 (pos + 2, pos + 3)
241 } else if (next_char == '*' || next_char == '_')
242 && pos + 3 < chars.len()
243 && chars[pos + 2] == next_char
244 && chars[pos + 3] == ' '
245 {
246 (pos + 3, pos + 4)
248 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
249 (pos + 3, pos + 4)
251 } else {
252 return false;
253 };
254
255 let mut next_char_pos = after_space_pos;
257 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
258 next_char_pos += 1;
259 }
260
261 if next_char_pos >= chars.len() {
263 return false;
264 }
265
266 let mut first_letter_pos = next_char_pos;
268 while first_letter_pos < chars.len()
269 && (chars[first_letter_pos] == '*'
270 || chars[first_letter_pos] == '_'
271 || chars[first_letter_pos] == '~'
272 || is_opening_quote(chars[first_letter_pos]))
273 {
274 first_letter_pos += 1;
275 }
276
277 if first_letter_pos >= chars.len() {
279 return false;
280 }
281
282 let first_char = chars[first_letter_pos];
283
284 if c == '!' || c == '?' {
286 return true;
287 }
288
289 if pos > 0 {
293 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
295 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
296 return false;
297 }
298
299 if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
301 return false;
302 }
303
304 if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
308 return false;
309 }
310 }
311
312 if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
315 return false;
316 }
317
318 true
319}
320
321pub fn split_into_sentences(text: &str) -> Vec<String> {
323 split_into_sentences_custom(text, &None)
324}
325
326pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
328 let abbreviations = get_abbreviations(custom_abbreviations);
329 split_into_sentences_with_set(text, &abbreviations, true)
330}
331
332fn split_into_sentences_with_set(
335 text: &str,
336 abbreviations: &HashSet<String>,
337 require_sentence_capital: bool,
338) -> Vec<String> {
339 let in_code = compute_inline_code_mask(text);
341
342 let mut sentences = Vec::new();
343 let mut current_sentence = String::new();
344 let mut chars = text.chars().peekable();
345 let mut pos = 0;
346
347 while let Some(c) = chars.next() {
348 current_sentence.push(c);
349
350 if !in_code[pos] && is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
351 while let Some(&next) = chars.peek() {
353 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
354 current_sentence.push(chars.next().unwrap());
355 pos += 1;
356 } else {
357 break;
358 }
359 }
360
361 if chars.peek() == Some(&' ') {
363 chars.next();
364 pos += 1;
365 }
366
367 sentences.push(current_sentence.trim().to_string());
368 current_sentence.clear();
369 }
370
371 pos += 1;
372 }
373
374 if !current_sentence.trim().is_empty() {
376 sentences.push(current_sentence.trim().to_string());
377 }
378 sentences
379}
380
381fn is_horizontal_rule(line: &str) -> bool {
383 if line.len() < 3 {
384 return false;
385 }
386
387 let chars: Vec<char> = line.chars().collect();
389 if chars.is_empty() {
390 return false;
391 }
392
393 let first_char = chars[0];
394 if first_char != '-' && first_char != '_' && first_char != '*' {
395 return false;
396 }
397
398 for c in &chars {
400 if *c != first_char && *c != ' ' {
401 return false;
402 }
403 }
404
405 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
407 non_space_count >= 3
408}
409
410fn is_numbered_list_item(line: &str) -> bool {
412 let mut chars = line.chars();
413
414 if !chars.next().is_some_and(|c| c.is_numeric()) {
416 return false;
417 }
418
419 while let Some(c) = chars.next() {
421 if c == '.' {
422 return chars.next() == Some(' ');
425 }
426 if !c.is_numeric() {
427 return false;
428 }
429 }
430
431 false
432}
433
434fn is_unordered_list_marker(s: &str) -> bool {
436 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
437 && !is_horizontal_rule(s)
438 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
439}
440
441fn is_block_boundary_core(trimmed: &str) -> bool {
444 trimmed.is_empty()
445 || trimmed.starts_with('#')
446 || trimmed.starts_with("```")
447 || trimmed.starts_with("~~~")
448 || trimmed.starts_with('>')
449 || (trimmed.starts_with('[') && trimmed.contains("]:"))
450 || is_horizontal_rule(trimmed)
451 || is_unordered_list_marker(trimmed)
452 || is_numbered_list_item(trimmed)
453 || is_definition_list_item(trimmed)
454 || trimmed.starts_with(":::")
455}
456
457fn is_block_boundary(trimmed: &str) -> bool {
460 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
461}
462
463fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
467 is_block_boundary_core(trimmed)
468 || calculate_indentation_width_default(line) >= 4
469 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
470}
471
472fn has_hard_break(line: &str) -> bool {
478 let line = line.strip_suffix('\r').unwrap_or(line);
479 line.ends_with(" ") || line.ends_with('\\')
480}
481
482fn ends_with_sentence_punct(text: &str) -> bool {
484 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
485}
486
487fn trim_preserving_hard_break(s: &str) -> String {
493 let s = s.strip_suffix('\r').unwrap_or(s);
495
496 if s.ends_with('\\') {
498 return s.to_string();
500 }
501
502 if s.ends_with(" ") {
504 let content_end = s.trim_end().len();
506 if content_end == 0 {
507 return String::new();
509 }
510 format!("{} ", &s[..content_end])
512 } else {
513 s.trim_end().to_string()
515 }
516}
517
518fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
520 if options.attr_lists {
521 parse_markdown_elements_with_attr_lists(text)
522 } else {
523 parse_markdown_elements(text)
524 }
525}
526
527pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
528 if options.sentence_per_line {
530 let elements = parse_elements(line, options);
531 return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
532 }
533
534 if options.semantic_line_breaks {
536 let elements = parse_elements(line, options);
537 return reflow_elements_semantic(&elements, options);
538 }
539
540 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
543 return vec![line.to_string()];
544 }
545
546 let elements = parse_elements(line, options);
548
549 reflow_elements(&elements, options)
551}
552
553#[derive(Debug, Clone)]
555enum LinkedImageSource {
556 Inline(String),
558 Reference(String),
560}
561
562#[derive(Debug, Clone)]
564enum LinkedImageTarget {
565 Inline(String),
567 Reference(String),
569}
570
571#[derive(Debug, Clone)]
573enum Element {
574 Text(String),
576 Link { text: String, url: String },
578 ReferenceLink { text: String, reference: String },
580 EmptyReferenceLink { text: String },
582 ShortcutReference { reference: String },
584 InlineImage { alt: String, url: String },
586 ReferenceImage { alt: String, reference: String },
588 EmptyReferenceImage { alt: String },
590 LinkedImage {
596 alt: String,
597 img_source: LinkedImageSource,
598 link_target: LinkedImageTarget,
599 },
600 FootnoteReference { note: String },
602 Strikethrough(String),
604 WikiLink(String),
606 InlineMath(String),
608 DisplayMath(String),
610 EmojiShortcode(String),
612 Autolink(String),
614 HtmlTag(String),
616 HtmlEntity(String),
618 HugoShortcode(String),
620 AttrList(String),
622 Code(String),
624 Bold {
626 content: String,
627 underscore: bool,
629 },
630 Italic {
632 content: String,
633 underscore: bool,
635 },
636}
637
638impl std::fmt::Display for Element {
639 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
640 match self {
641 Element::Text(s) => write!(f, "{s}"),
642 Element::Link { text, url } => write!(f, "[{text}]({url})"),
643 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
644 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
645 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
646 Element::InlineImage { alt, url } => write!(f, ""),
647 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
648 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
649 Element::LinkedImage {
650 alt,
651 img_source,
652 link_target,
653 } => {
654 let img_part = match img_source {
656 LinkedImageSource::Inline(url) => format!(""),
657 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
658 };
659 match link_target {
661 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
662 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
663 }
664 }
665 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
666 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
667 Element::WikiLink(s) => write!(f, "[[{s}]]"),
668 Element::InlineMath(s) => write!(f, "${s}$"),
669 Element::DisplayMath(s) => write!(f, "$${s}$$"),
670 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
671 Element::Autolink(s) => write!(f, "{s}"),
672 Element::HtmlTag(s) => write!(f, "{s}"),
673 Element::HtmlEntity(s) => write!(f, "{s}"),
674 Element::HugoShortcode(s) => write!(f, "{s}"),
675 Element::AttrList(s) => write!(f, "{s}"),
676 Element::Code(s) => write!(f, "`{s}`"),
677 Element::Bold { content, underscore } => {
678 if *underscore {
679 write!(f, "__{content}__")
680 } else {
681 write!(f, "**{content}**")
682 }
683 }
684 Element::Italic { content, underscore } => {
685 if *underscore {
686 write!(f, "_{content}_")
687 } else {
688 write!(f, "*{content}*")
689 }
690 }
691 }
692 }
693}
694
695impl Element {
696 fn display_width(&self, mode: ReflowLengthMode) -> usize {
700 let formatted = format!("{self}");
701 display_len(&formatted, mode)
702 }
703}
704
705#[derive(Debug, Clone)]
707struct EmphasisSpan {
708 start: usize,
710 end: usize,
712 content: String,
714 is_strong: bool,
716 is_strikethrough: bool,
718 uses_underscore: bool,
720}
721
722fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
732 let mut spans = Vec::new();
733 let mut options = Options::empty();
734 options.insert(Options::ENABLE_STRIKETHROUGH);
735
736 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
739 let mut strikethrough_stack: Vec<usize> = Vec::new();
740
741 let parser = Parser::new_ext(text, options).into_offset_iter();
742
743 for (event, range) in parser {
744 match event {
745 Event::Start(Tag::Emphasis) => {
746 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
748 emphasis_stack.push((range.start, uses_underscore));
749 }
750 Event::End(TagEnd::Emphasis) => {
751 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
752 let content_start = start_byte + 1;
754 let content_end = range.end - 1;
755 if content_end > content_start
756 && let Some(content) = text.get(content_start..content_end)
757 {
758 spans.push(EmphasisSpan {
759 start: start_byte,
760 end: range.end,
761 content: content.to_string(),
762 is_strong: false,
763 is_strikethrough: false,
764 uses_underscore,
765 });
766 }
767 }
768 }
769 Event::Start(Tag::Strong) => {
770 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
772 strong_stack.push((range.start, uses_underscore));
773 }
774 Event::End(TagEnd::Strong) => {
775 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
776 let content_start = start_byte + 2;
778 let content_end = range.end - 2;
779 if content_end > content_start
780 && let Some(content) = text.get(content_start..content_end)
781 {
782 spans.push(EmphasisSpan {
783 start: start_byte,
784 end: range.end,
785 content: content.to_string(),
786 is_strong: true,
787 is_strikethrough: false,
788 uses_underscore,
789 });
790 }
791 }
792 }
793 Event::Start(Tag::Strikethrough) => {
794 strikethrough_stack.push(range.start);
795 }
796 Event::End(TagEnd::Strikethrough) => {
797 if let Some(start_byte) = strikethrough_stack.pop() {
798 let content_start = start_byte + 2;
800 let content_end = range.end - 2;
801 if content_end > content_start
802 && let Some(content) = text.get(content_start..content_end)
803 {
804 spans.push(EmphasisSpan {
805 start: start_byte,
806 end: range.end,
807 content: content.to_string(),
808 is_strong: false,
809 is_strikethrough: true,
810 uses_underscore: false,
811 });
812 }
813 }
814 }
815 _ => {}
816 }
817 }
818
819 spans.sort_by_key(|s| s.start);
821 spans
822}
823
824fn parse_markdown_elements(text: &str) -> Vec<Element> {
835 parse_markdown_elements_inner(text, false)
836}
837
838fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
839 parse_markdown_elements_inner(text, true)
840}
841
842fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
843 let mut elements = Vec::new();
844 let mut remaining = text;
845
846 let emphasis_spans = extract_emphasis_spans(text);
848
849 while !remaining.is_empty() {
850 let current_offset = text.len() - remaining.len();
852 let mut earliest_match: Option<(usize, usize, &str)> = None;
855
856 if remaining.contains("[!") {
860 if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
862 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
863 {
864 earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
865 }
866
867 if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
869 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
870 {
871 earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
872 }
873
874 if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
876 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
877 {
878 earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
879 }
880
881 if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
883 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
884 {
885 earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
886 }
887 }
888
889 if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
892 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
893 {
894 earliest_match = Some((m.start(), m.end(), "inline_image"));
895 }
896
897 if let Some(m) = REF_IMAGE_REGEX.find(remaining)
899 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
900 {
901 earliest_match = Some((m.start(), m.end(), "ref_image"));
902 }
903
904 if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
906 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
907 {
908 earliest_match = Some((m.start(), m.end(), "footnote_ref"));
909 }
910
911 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
913 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
914 {
915 earliest_match = Some((m.start(), m.end(), "inline_link"));
916 }
917
918 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
920 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
921 {
922 earliest_match = Some((m.start(), m.end(), "ref_link"));
923 }
924
925 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
928 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
929 {
930 earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
931 }
932
933 if let Some(m) = WIKI_LINK_REGEX.find(remaining)
935 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
936 {
937 earliest_match = Some((m.start(), m.end(), "wiki_link"));
938 }
939
940 if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
942 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
943 {
944 earliest_match = Some((m.start(), m.end(), "display_math"));
945 }
946
947 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
949 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
950 {
951 earliest_match = Some((m.start(), m.end(), "inline_math"));
952 }
953
954 if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
958 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
959 {
960 earliest_match = Some((m.start(), m.end(), "emoji"));
961 }
962
963 if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
965 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
966 {
967 earliest_match = Some((m.start(), m.end(), "html_entity"));
968 }
969
970 if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
973 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
974 {
975 earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
976 }
977
978 if let Some(m) = HTML_TAG_PATTERN.find(remaining)
981 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
982 {
983 let matched_text = &remaining[m.start()..m.end()];
985 let is_url_autolink = matched_text.starts_with("<http://")
986 || matched_text.starts_with("<https://")
987 || matched_text.starts_with("<mailto:")
988 || matched_text.starts_with("<ftp://")
989 || matched_text.starts_with("<ftps://");
990
991 let is_email_autolink = {
994 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
995 EMAIL_PATTERN.is_match(content)
996 };
997
998 if is_url_autolink || is_email_autolink {
999 earliest_match = Some((m.start(), m.end(), "autolink"));
1000 } else {
1001 earliest_match = Some((m.start(), m.end(), "html_tag"));
1002 }
1003 }
1004
1005 let mut next_special = remaining.len();
1007 let mut special_type = "";
1008 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
1009 let mut attr_list_len: usize = 0;
1010
1011 if let Some(pos) = remaining.find('`')
1013 && pos < next_special
1014 {
1015 next_special = pos;
1016 special_type = "code";
1017 }
1018
1019 if attr_lists
1021 && let Some(pos) = remaining.find('{')
1022 && pos < next_special
1023 && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
1024 && m.start() == 0
1025 {
1026 next_special = pos;
1027 special_type = "attr_list";
1028 attr_list_len = m.end();
1029 }
1030
1031 for span in &emphasis_spans {
1034 if span.start >= current_offset && span.start < current_offset + remaining.len() {
1035 let pos_in_remaining = span.start - current_offset;
1036 if pos_in_remaining < next_special {
1037 next_special = pos_in_remaining;
1038 special_type = "pulldown_emphasis";
1039 pulldown_emphasis = Some(span);
1040 }
1041 break; }
1043 }
1044
1045 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
1047 pos < next_special
1048 } else {
1049 false
1050 };
1051
1052 if should_process_markdown_link {
1053 let (pos, match_end, pattern_type) = earliest_match.unwrap();
1054
1055 if pos > 0 {
1057 elements.push(Element::Text(remaining[..pos].to_string()));
1058 }
1059
1060 match pattern_type {
1062 "linked_image_ii" => {
1064 if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
1065 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1066 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1067 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1068 elements.push(Element::LinkedImage {
1069 alt: alt.to_string(),
1070 img_source: LinkedImageSource::Inline(img_url.to_string()),
1071 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1072 });
1073 remaining = &remaining[match_end..];
1074 } else {
1075 elements.push(Element::Text("[".to_string()));
1076 remaining = &remaining[1..];
1077 }
1078 }
1079 "linked_image_ri" => {
1081 if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
1082 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1083 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1084 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1085 elements.push(Element::LinkedImage {
1086 alt: alt.to_string(),
1087 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1088 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1089 });
1090 remaining = &remaining[match_end..];
1091 } else {
1092 elements.push(Element::Text("[".to_string()));
1093 remaining = &remaining[1..];
1094 }
1095 }
1096 "linked_image_ir" => {
1098 if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1099 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1100 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1101 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1102 elements.push(Element::LinkedImage {
1103 alt: alt.to_string(),
1104 img_source: LinkedImageSource::Inline(img_url.to_string()),
1105 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1106 });
1107 remaining = &remaining[match_end..];
1108 } else {
1109 elements.push(Element::Text("[".to_string()));
1110 remaining = &remaining[1..];
1111 }
1112 }
1113 "linked_image_rr" => {
1115 if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
1116 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1117 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1118 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1119 elements.push(Element::LinkedImage {
1120 alt: alt.to_string(),
1121 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1122 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1123 });
1124 remaining = &remaining[match_end..];
1125 } else {
1126 elements.push(Element::Text("[".to_string()));
1127 remaining = &remaining[1..];
1128 }
1129 }
1130 "inline_image" => {
1131 if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
1132 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1133 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1134 elements.push(Element::InlineImage {
1135 alt: alt.to_string(),
1136 url: url.to_string(),
1137 });
1138 remaining = &remaining[match_end..];
1139 } else {
1140 elements.push(Element::Text("!".to_string()));
1141 remaining = &remaining[1..];
1142 }
1143 }
1144 "ref_image" => {
1145 if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
1146 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1147 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1148
1149 if reference.is_empty() {
1150 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1151 } else {
1152 elements.push(Element::ReferenceImage {
1153 alt: alt.to_string(),
1154 reference: reference.to_string(),
1155 });
1156 }
1157 remaining = &remaining[match_end..];
1158 } else {
1159 elements.push(Element::Text("!".to_string()));
1160 remaining = &remaining[1..];
1161 }
1162 }
1163 "footnote_ref" => {
1164 if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
1165 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1166 elements.push(Element::FootnoteReference { note: note.to_string() });
1167 remaining = &remaining[match_end..];
1168 } else {
1169 elements.push(Element::Text("[".to_string()));
1170 remaining = &remaining[1..];
1171 }
1172 }
1173 "inline_link" => {
1174 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1175 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1176 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1177 elements.push(Element::Link {
1178 text: text.to_string(),
1179 url: url.to_string(),
1180 });
1181 remaining = &remaining[match_end..];
1182 } else {
1183 elements.push(Element::Text("[".to_string()));
1185 remaining = &remaining[1..];
1186 }
1187 }
1188 "ref_link" => {
1189 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1190 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1191 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1192
1193 if reference.is_empty() {
1194 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1196 } else {
1197 elements.push(Element::ReferenceLink {
1199 text: text.to_string(),
1200 reference: reference.to_string(),
1201 });
1202 }
1203 remaining = &remaining[match_end..];
1204 } else {
1205 elements.push(Element::Text("[".to_string()));
1207 remaining = &remaining[1..];
1208 }
1209 }
1210 "shortcut_ref" => {
1211 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1212 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1213 elements.push(Element::ShortcutReference {
1214 reference: reference.to_string(),
1215 });
1216 remaining = &remaining[match_end..];
1217 } else {
1218 elements.push(Element::Text("[".to_string()));
1220 remaining = &remaining[1..];
1221 }
1222 }
1223 "wiki_link" => {
1224 if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
1225 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1226 elements.push(Element::WikiLink(content.to_string()));
1227 remaining = &remaining[match_end..];
1228 } else {
1229 elements.push(Element::Text("[[".to_string()));
1230 remaining = &remaining[2..];
1231 }
1232 }
1233 "display_math" => {
1234 if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
1235 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1236 elements.push(Element::DisplayMath(math.to_string()));
1237 remaining = &remaining[match_end..];
1238 } else {
1239 elements.push(Element::Text("$$".to_string()));
1240 remaining = &remaining[2..];
1241 }
1242 }
1243 "inline_math" => {
1244 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1245 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1246 elements.push(Element::InlineMath(math.to_string()));
1247 remaining = &remaining[match_end..];
1248 } else {
1249 elements.push(Element::Text("$".to_string()));
1250 remaining = &remaining[1..];
1251 }
1252 }
1253 "emoji" => {
1255 if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1256 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1257 elements.push(Element::EmojiShortcode(emoji.to_string()));
1258 remaining = &remaining[match_end..];
1259 } else {
1260 elements.push(Element::Text(":".to_string()));
1261 remaining = &remaining[1..];
1262 }
1263 }
1264 "html_entity" => {
1265 elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
1267 remaining = &remaining[match_end..];
1268 }
1269 "hugo_shortcode" => {
1270 elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
1272 remaining = &remaining[match_end..];
1273 }
1274 "autolink" => {
1275 elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
1277 remaining = &remaining[match_end..];
1278 }
1279 "html_tag" => {
1280 elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
1282 remaining = &remaining[match_end..];
1283 }
1284 _ => {
1285 elements.push(Element::Text("[".to_string()));
1287 remaining = &remaining[1..];
1288 }
1289 }
1290 } else {
1291 if next_special > 0 && next_special < remaining.len() {
1295 elements.push(Element::Text(remaining[..next_special].to_string()));
1296 remaining = &remaining[next_special..];
1297 }
1298
1299 match special_type {
1301 "code" => {
1302 if let Some(code_end) = remaining[1..].find('`') {
1304 let code = &remaining[1..1 + code_end];
1305 elements.push(Element::Code(code.to_string()));
1306 remaining = &remaining[1 + code_end + 1..];
1307 } else {
1308 elements.push(Element::Text(remaining.to_string()));
1310 break;
1311 }
1312 }
1313 "attr_list" => {
1314 elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1315 remaining = &remaining[attr_list_len..];
1316 }
1317 "pulldown_emphasis" => {
1318 if let Some(span) = pulldown_emphasis {
1320 let span_len = span.end - span.start;
1321 if span.is_strikethrough {
1322 elements.push(Element::Strikethrough(span.content.clone()));
1323 } else if span.is_strong {
1324 elements.push(Element::Bold {
1325 content: span.content.clone(),
1326 underscore: span.uses_underscore,
1327 });
1328 } else {
1329 elements.push(Element::Italic {
1330 content: span.content.clone(),
1331 underscore: span.uses_underscore,
1332 });
1333 }
1334 remaining = &remaining[span_len..];
1335 } else {
1336 elements.push(Element::Text(remaining[..1].to_string()));
1338 remaining = &remaining[1..];
1339 }
1340 }
1341 _ => {
1342 elements.push(Element::Text(remaining.to_string()));
1344 break;
1345 }
1346 }
1347 }
1348 }
1349
1350 elements
1351}
1352
1353fn reflow_elements_sentence_per_line(
1355 elements: &[Element],
1356 custom_abbreviations: &Option<Vec<String>>,
1357 require_sentence_capital: bool,
1358) -> Vec<String> {
1359 let abbreviations = get_abbreviations(custom_abbreviations);
1360 let mut lines = Vec::new();
1361 let mut current_line = String::new();
1362
1363 for (idx, element) in elements.iter().enumerate() {
1364 let element_str = format!("{element}");
1365
1366 if let Element::Text(text) = element {
1368 let combined = format!("{current_line}{text}");
1370 let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
1372
1373 if sentences.len() > 1 {
1374 for (i, sentence) in sentences.iter().enumerate() {
1376 if i == 0 {
1377 let trimmed = sentence.trim();
1380
1381 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1382 current_line = sentence.to_string();
1384 } else {
1385 lines.push(sentence.to_string());
1387 current_line.clear();
1388 }
1389 } else if i == sentences.len() - 1 {
1390 let trimmed = sentence.trim();
1392 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1393
1394 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1395 lines.push(sentence.to_string());
1397 current_line.clear();
1398 } else {
1399 current_line = sentence.to_string();
1401 }
1402 } else {
1403 lines.push(sentence.to_string());
1405 }
1406 }
1407 } else {
1408 let trimmed = combined.trim();
1410
1411 if trimmed.is_empty() {
1415 continue;
1416 }
1417
1418 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1419
1420 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1421 lines.push(trimmed.to_string());
1423 current_line.clear();
1424 } else {
1425 current_line = combined;
1427 }
1428 }
1429 } else if let Element::Italic { content, underscore } = element {
1430 let marker = if *underscore { "_" } else { "*" };
1432 handle_emphasis_sentence_split(
1433 content,
1434 marker,
1435 &abbreviations,
1436 require_sentence_capital,
1437 &mut current_line,
1438 &mut lines,
1439 );
1440 } else if let Element::Bold { content, underscore } = element {
1441 let marker = if *underscore { "__" } else { "**" };
1443 handle_emphasis_sentence_split(
1444 content,
1445 marker,
1446 &abbreviations,
1447 require_sentence_capital,
1448 &mut current_line,
1449 &mut lines,
1450 );
1451 } else if let Element::Strikethrough(content) = element {
1452 handle_emphasis_sentence_split(
1454 content,
1455 "~~",
1456 &abbreviations,
1457 require_sentence_capital,
1458 &mut current_line,
1459 &mut lines,
1460 );
1461 } else {
1462 let is_adjacent = if idx > 0 {
1465 match &elements[idx - 1] {
1466 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1467 _ => true,
1468 }
1469 } else {
1470 false
1471 };
1472
1473 if !is_adjacent
1475 && !current_line.is_empty()
1476 && !current_line.ends_with(' ')
1477 && !current_line.ends_with('(')
1478 && !current_line.ends_with('[')
1479 {
1480 current_line.push(' ');
1481 }
1482 current_line.push_str(&element_str);
1483 }
1484 }
1485
1486 if !current_line.is_empty() {
1488 lines.push(current_line.trim().to_string());
1489 }
1490 lines
1491}
1492
1493fn handle_emphasis_sentence_split(
1495 content: &str,
1496 marker: &str,
1497 abbreviations: &HashSet<String>,
1498 require_sentence_capital: bool,
1499 current_line: &mut String,
1500 lines: &mut Vec<String>,
1501) {
1502 let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
1504
1505 if sentences.len() <= 1 {
1506 if !current_line.is_empty()
1508 && !current_line.ends_with(' ')
1509 && !current_line.ends_with('(')
1510 && !current_line.ends_with('[')
1511 {
1512 current_line.push(' ');
1513 }
1514 current_line.push_str(marker);
1515 current_line.push_str(content);
1516 current_line.push_str(marker);
1517
1518 let trimmed = content.trim();
1520 let ends_with_punct = ends_with_sentence_punct(trimmed);
1521 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1522 lines.push(current_line.clone());
1523 current_line.clear();
1524 }
1525 } else {
1526 for (i, sentence) in sentences.iter().enumerate() {
1528 let trimmed = sentence.trim();
1529 if trimmed.is_empty() {
1530 continue;
1531 }
1532
1533 if i == 0 {
1534 if !current_line.is_empty()
1536 && !current_line.ends_with(' ')
1537 && !current_line.ends_with('(')
1538 && !current_line.ends_with('[')
1539 {
1540 current_line.push(' ');
1541 }
1542 current_line.push_str(marker);
1543 current_line.push_str(trimmed);
1544 current_line.push_str(marker);
1545
1546 let ends_with_punct = ends_with_sentence_punct(trimmed);
1548 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1549 lines.push(current_line.clone());
1550 current_line.clear();
1551 }
1552 } else if i == sentences.len() - 1 {
1553 let ends_with_punct = ends_with_sentence_punct(trimmed);
1555
1556 let mut line = String::new();
1557 line.push_str(marker);
1558 line.push_str(trimmed);
1559 line.push_str(marker);
1560
1561 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1562 lines.push(line);
1563 } else {
1564 *current_line = line;
1566 }
1567 } else {
1568 let mut line = String::new();
1570 line.push_str(marker);
1571 line.push_str(trimmed);
1572 line.push_str(marker);
1573 lines.push(line);
1574 }
1575 }
1576 }
1577}
1578
1579const BREAK_WORDS: &[&str] = &[
1583 "and",
1584 "or",
1585 "but",
1586 "nor",
1587 "yet",
1588 "so",
1589 "for",
1590 "which",
1591 "that",
1592 "because",
1593 "when",
1594 "if",
1595 "while",
1596 "where",
1597 "although",
1598 "though",
1599 "unless",
1600 "since",
1601 "after",
1602 "before",
1603 "until",
1604 "as",
1605 "once",
1606 "whether",
1607 "however",
1608 "therefore",
1609 "moreover",
1610 "furthermore",
1611 "nevertheless",
1612 "whereas",
1613];
1614
1615fn is_clause_punctuation(c: char) -> bool {
1617 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1619
1620fn paren_group_end<'a>(slice: &'a str, element_spans: &[(usize, usize)], offset: usize) -> Option<(usize, &'a str)> {
1634 debug_assert!(slice.starts_with('('));
1635 let mut depth: i32 = 0;
1636 for (local_byte, c) in slice.char_indices() {
1637 let global_byte = offset + local_byte;
1638 if depth > 0 && is_inside_element(global_byte, element_spans) {
1643 continue;
1644 }
1645 match c {
1646 '(' => depth += 1,
1647 ')' => {
1648 depth -= 1;
1649 if depth == 0 {
1650 let end = local_byte + 1;
1651 let inner = &slice[1..local_byte];
1652 return Some((end, inner));
1653 }
1654 }
1655 _ => {}
1656 }
1657 }
1658 None
1659}
1660
1661fn split_at_parenthetical(
1678 text: &str,
1679 line_length: usize,
1680 element_spans: &[(usize, usize)],
1681 length_mode: ReflowLengthMode,
1682) -> Option<(String, String)> {
1683 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1684
1685 if text.starts_with('(')
1687 && let Some((end_local, inner)) = paren_group_end(text, element_spans, 0)
1688 && inner.contains(' ')
1689 {
1690 let tail = &text[end_local..];
1695 let (first_end, rest_start) = match tail.chars().next() {
1696 Some(c) if is_clause_punctuation(c) => (end_local + c.len_utf8(), end_local + c.len_utf8()),
1697 _ => (end_local, end_local),
1698 };
1699 let first = &text[..first_end];
1700 let first_len = display_len(first, length_mode);
1701 if first_len <= line_length {
1704 let rest = text[rest_start..].trim_start();
1705 if !rest.is_empty() {
1706 return Some((first.to_string(), rest.to_string()));
1707 }
1708 }
1709 }
1710
1711 let mut best_open_byte: Option<usize> = None;
1713 let mut pos = 0usize;
1714 while pos < text.len() {
1715 if text.as_bytes()[pos] != b'(' {
1717 let c = text[pos..].chars().next().unwrap();
1718 pos += c.len_utf8();
1719 continue;
1720 }
1721 if is_inside_element(pos, element_spans) {
1723 pos += 1;
1724 continue;
1725 }
1726 if let Some((end_local, inner)) = paren_group_end(&text[pos..], element_spans, pos) {
1727 let first = text[..pos].trim_end();
1728 let first_len = display_len(first, length_mode);
1729 if !first.is_empty()
1730 && first_len >= min_first_len
1731 && first_len <= line_length
1732 && inner.contains(' ')
1733 && best_open_byte.is_none_or(|prev| pos > prev)
1734 {
1735 best_open_byte = Some(pos);
1736 }
1737 pos += end_local;
1738 } else {
1739 pos += 1;
1740 }
1741 }
1742
1743 let open_byte = best_open_byte?;
1744 let first = text[..open_byte].trim_end().to_string();
1745 let rest = text[open_byte..].to_string();
1746 if first.is_empty() || rest.trim().is_empty() {
1747 return None;
1748 }
1749 Some((first, rest))
1750}
1751
1752fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1756 let mut spans = Vec::new();
1757 let mut offset = 0;
1758 for element in elements {
1759 let rendered = format!("{element}");
1760 let len = rendered.len();
1761 if !matches!(element, Element::Text(_)) {
1762 spans.push((offset, offset + len));
1763 }
1764 offset += len;
1765 }
1766 spans
1767}
1768
1769fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1771 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1772}
1773
1774const MIN_SPLIT_RATIO: f64 = 0.3;
1777
1778fn split_at_clause_punctuation(
1782 text: &str,
1783 line_length: usize,
1784 element_spans: &[(usize, usize)],
1785 length_mode: ReflowLengthMode,
1786) -> Option<(String, String)> {
1787 let chars: Vec<char> = text.chars().collect();
1788 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1789
1790 let mut width_acc = 0;
1792 let mut search_end_char = 0;
1793 for (idx, &c) in chars.iter().enumerate() {
1794 let c_width = display_len(&c.to_string(), length_mode);
1795 if width_acc + c_width > line_length {
1796 break;
1797 }
1798 width_acc += c_width;
1799 search_end_char = idx + 1;
1800 }
1801
1802 let mut paren_depth: i32 = 0;
1809 let mut best_pos = None;
1810 for i in (0..search_end_char).rev() {
1811 let byte_start: usize = chars[..i].iter().map(|c| c.len_utf8()).sum();
1813 let byte_after: usize = byte_start + chars[i].len_utf8();
1815
1816 if !is_inside_element(byte_start, element_spans) {
1817 match chars[i] {
1818 ')' => paren_depth += 1,
1819 '(' => paren_depth = paren_depth.saturating_sub(1),
1820 _ => {}
1821 }
1822 }
1823
1824 if paren_depth == 0 && is_clause_punctuation(chars[i]) && !is_inside_element(byte_after, element_spans) {
1825 best_pos = Some(i);
1826 break;
1827 }
1828 }
1829
1830 let pos = best_pos?;
1831
1832 let first: String = chars[..=pos].iter().collect();
1834 let first_display_len = display_len(&first, length_mode);
1835 if first_display_len < min_first_len {
1836 return None;
1837 }
1838
1839 let rest: String = chars[pos + 1..].iter().collect();
1841 let rest = rest.trim_start().to_string();
1842
1843 if rest.is_empty() {
1844 return None;
1845 }
1846
1847 Some((first, rest))
1848}
1849
1850fn paren_depth_map(text: &str, element_spans: &[(usize, usize)]) -> Vec<i32> {
1857 let mut map = vec![0i32; text.len()];
1858 let mut depth = 0i32;
1859 for (byte, c) in text.char_indices() {
1860 if !is_inside_element(byte, element_spans) {
1861 match c {
1862 '(' => depth += 1,
1863 ')' => depth = depth.saturating_sub(1),
1864 _ => {}
1865 }
1866 }
1867 let end = (byte + c.len_utf8()).min(map.len());
1869 for slot in &mut map[byte..end] {
1870 *slot = depth;
1871 }
1872 }
1873 map
1874}
1875
1876fn is_standalone_parenthetical(line: &str) -> bool {
1885 let trimmed = line.trim();
1886 if !trimmed.starts_with('(') {
1887 return false;
1888 }
1889 let core = trimmed.trim_end_matches(|c: char| is_clause_punctuation(c));
1891 if !core.ends_with(')') {
1892 return false;
1893 }
1894 let inner = &core[1..core.len() - 1];
1896 if !inner.contains(' ') {
1897 return false;
1898 }
1899 let mut depth = 0i32;
1901 for c in core.chars() {
1902 match c {
1903 '(' => depth += 1,
1904 ')' => depth -= 1,
1905 _ => {}
1906 }
1907 if depth < 0 {
1908 return false;
1909 }
1910 }
1911 depth == 0
1912}
1913
1914fn split_at_break_word(
1918 text: &str,
1919 line_length: usize,
1920 element_spans: &[(usize, usize)],
1921 length_mode: ReflowLengthMode,
1922) -> Option<(String, String)> {
1923 let lower = text.to_lowercase();
1924 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1925 let mut best_split: Option<(usize, usize)> = None; let depth_map = paren_depth_map(text, element_spans);
1930
1931 for &word in BREAK_WORDS {
1932 let mut search_start = 0;
1933 while let Some(pos) = lower[search_start..].find(word) {
1934 let abs_pos = search_start + pos;
1935
1936 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1938 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1939
1940 if preceded_by_space && followed_by_space {
1941 let first_part = text[..abs_pos].trim_end();
1943 let first_part_len = display_len(first_part, length_mode);
1944
1945 let inside_paren = depth_map.get(abs_pos).is_some_and(|&d| d > 0);
1947
1948 if first_part_len >= min_first_len
1949 && first_part_len <= line_length
1950 && !is_inside_element(abs_pos, element_spans)
1951 && !inside_paren
1952 {
1953 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1955 best_split = Some((abs_pos, word.len()));
1956 }
1957 }
1958 }
1959
1960 search_start = abs_pos + word.len();
1961 }
1962 }
1963
1964 let (byte_start, _word_len) = best_split?;
1965
1966 let first = text[..byte_start].trim_end().to_string();
1967 let rest = text[byte_start..].to_string();
1968
1969 if first.is_empty() || rest.trim().is_empty() {
1970 return None;
1971 }
1972
1973 Some((first, rest))
1974}
1975
1976fn cascade_split_line(
1979 text: &str,
1980 line_length: usize,
1981 abbreviations: &Option<Vec<String>>,
1982 length_mode: ReflowLengthMode,
1983 attr_lists: bool,
1984) -> Vec<String> {
1985 if line_length == 0 || display_len(text, length_mode) <= line_length {
1986 return vec![text.to_string()];
1987 }
1988
1989 let elements = parse_markdown_elements_inner(text, attr_lists);
1990 let element_spans = compute_element_spans(&elements);
1991
1992 if let Some((first, rest)) = split_at_parenthetical(text, line_length, &element_spans, length_mode) {
1995 let mut result = vec![first];
1996 result.extend(cascade_split_line(
1997 &rest,
1998 line_length,
1999 abbreviations,
2000 length_mode,
2001 attr_lists,
2002 ));
2003 return result;
2004 }
2005
2006 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
2008 let mut result = vec![first];
2009 result.extend(cascade_split_line(
2010 &rest,
2011 line_length,
2012 abbreviations,
2013 length_mode,
2014 attr_lists,
2015 ));
2016 return result;
2017 }
2018
2019 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
2021 let mut result = vec![first];
2022 result.extend(cascade_split_line(
2023 &rest,
2024 line_length,
2025 abbreviations,
2026 length_mode,
2027 attr_lists,
2028 ));
2029 return result;
2030 }
2031
2032 let options = ReflowOptions {
2034 line_length,
2035 break_on_sentences: false,
2036 preserve_breaks: false,
2037 sentence_per_line: false,
2038 semantic_line_breaks: false,
2039 abbreviations: abbreviations.clone(),
2040 length_mode,
2041 attr_lists,
2042 require_sentence_capital: true,
2043 max_list_continuation_indent: None,
2044 };
2045 reflow_elements(&elements, &options)
2046}
2047
2048fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2052 let sentence_lines =
2054 reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
2055
2056 if options.line_length == 0 {
2059 return sentence_lines;
2060 }
2061
2062 let length_mode = options.length_mode;
2063 let mut result = Vec::new();
2064 for line in sentence_lines {
2065 if display_len(&line, length_mode) <= options.line_length {
2066 result.push(line);
2067 } else {
2068 result.extend(cascade_split_line(
2069 &line,
2070 options.line_length,
2071 &options.abbreviations,
2072 length_mode,
2073 options.attr_lists,
2074 ));
2075 }
2076 }
2077
2078 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
2081 let mut merged: Vec<String> = Vec::with_capacity(result.len());
2082 for line in result {
2083 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
2084 if is_standalone_parenthetical(&line) {
2087 merged.push(line);
2088 continue;
2089 }
2090
2091 let prev_ends_at_sentence = {
2093 let trimmed = merged.last().unwrap().trim_end();
2094 trimmed
2095 .chars()
2096 .rev()
2097 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
2098 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
2099 };
2100
2101 if !prev_ends_at_sentence {
2102 let prev = merged.last_mut().unwrap();
2103 let combined = format!("{prev} {line}");
2104 if display_len(&combined, length_mode) <= options.line_length {
2106 *prev = combined;
2107 continue;
2108 }
2109 }
2110 }
2111 merged.push(line);
2112 }
2113 merged
2114}
2115
2116fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
2124 line.char_indices()
2125 .rev()
2126 .map(|(pos, _)| pos)
2127 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
2128}
2129
2130fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2132 let mut lines = Vec::new();
2133 let mut current_line = String::new();
2134 let mut current_length = 0;
2135 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
2137 let length_mode = options.length_mode;
2138
2139 for (idx, element) in elements.iter().enumerate() {
2140 let element_str = format!("{element}");
2141 let element_len = element.display_width(length_mode);
2142
2143 let is_adjacent_to_prev = if idx > 0 {
2149 match (&elements[idx - 1], element) {
2150 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
2151 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
2152 _ => true,
2153 }
2154 } else {
2155 false
2156 };
2157
2158 if let Element::Text(text) = element {
2160 let has_leading_space = text.starts_with(char::is_whitespace);
2162 let words: Vec<&str> = text.split_whitespace().collect();
2164
2165 for (i, word) in words.iter().enumerate() {
2166 let word_len = display_len(word, length_mode);
2167 let is_trailing_punct = word
2169 .chars()
2170 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
2171
2172 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
2175
2176 if is_first_adjacent {
2177 if current_length + word_len > options.line_length && current_length > 0 {
2179 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
2182 let before = current_line[..last_space].trim_end().to_string();
2183 let after = current_line[last_space + 1..].to_string();
2184 lines.push(before);
2185 current_line = format!("{after}{word}");
2186 current_length = display_len(¤t_line, length_mode);
2187 current_line_element_spans.clear();
2188 } else {
2189 current_line.push_str(word);
2190 current_length += word_len;
2191 }
2192 } else {
2193 current_line.push_str(word);
2194 current_length += word_len;
2195 }
2196 } else if current_length > 0
2197 && current_length + 1 + word_len > options.line_length
2198 && !is_trailing_punct
2199 {
2200 lines.push(current_line.trim().to_string());
2202 current_line = word.to_string();
2203 current_length = word_len;
2204 current_line_element_spans.clear();
2205 } else {
2206 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
2210 current_line.push(' ');
2211 current_length += 1;
2212 }
2213 current_line.push_str(word);
2214 current_length += word_len;
2215 }
2216 }
2217 } else if matches!(
2218 element,
2219 Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
2220 ) && element_len > options.line_length
2221 {
2222 let (content, marker): (&str, &str) = match element {
2226 Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
2227 Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
2228 Element::Strikethrough(content) => (content.as_str(), "~~"),
2229 _ => unreachable!(),
2230 };
2231
2232 let words: Vec<&str> = content.split_whitespace().collect();
2233 let n = words.len();
2234
2235 if n == 0 {
2236 let full = format!("{marker}{marker}");
2238 let full_len = display_len(&full, length_mode);
2239 if !is_adjacent_to_prev && current_length > 0 {
2240 current_line.push(' ');
2241 current_length += 1;
2242 }
2243 current_line.push_str(&full);
2244 current_length += full_len;
2245 } else {
2246 for (i, word) in words.iter().enumerate() {
2247 let is_first = i == 0;
2248 let is_last = i == n - 1;
2249 let word_str: String = match (is_first, is_last) {
2250 (true, true) => format!("{marker}{word}{marker}"),
2251 (true, false) => format!("{marker}{word}"),
2252 (false, true) => format!("{word}{marker}"),
2253 (false, false) => word.to_string(),
2254 };
2255 let word_len = display_len(&word_str, length_mode);
2256
2257 let needs_space = if is_first {
2258 !is_adjacent_to_prev && current_length > 0
2259 } else {
2260 current_length > 0
2261 };
2262
2263 if needs_space && current_length + 1 + word_len > options.line_length {
2264 lines.push(current_line.trim_end().to_string());
2265 current_line = word_str;
2266 current_length = word_len;
2267 current_line_element_spans.clear();
2268 } else {
2269 if needs_space {
2270 current_line.push(' ');
2271 current_length += 1;
2272 }
2273 current_line.push_str(&word_str);
2274 current_length += word_len;
2275 }
2276 }
2277 }
2278 } else {
2279 if is_adjacent_to_prev {
2283 if current_length + element_len > options.line_length {
2285 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
2288 let before = current_line[..last_space].trim_end().to_string();
2289 let after = current_line[last_space + 1..].to_string();
2290 lines.push(before);
2291 current_line = format!("{after}{element_str}");
2292 current_length = display_len(¤t_line, length_mode);
2293 current_line_element_spans.clear();
2294 let start = after.len();
2296 current_line_element_spans.push((start, start + element_str.len()));
2297 } else {
2298 let start = current_line.len();
2300 current_line.push_str(&element_str);
2301 current_length += element_len;
2302 current_line_element_spans.push((start, current_line.len()));
2303 }
2304 } else {
2305 let start = current_line.len();
2306 current_line.push_str(&element_str);
2307 current_length += element_len;
2308 current_line_element_spans.push((start, current_line.len()));
2309 }
2310 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
2311 lines.push(current_line.trim().to_string());
2313 current_line = element_str.clone();
2314 current_length = element_len;
2315 current_line_element_spans.clear();
2316 current_line_element_spans.push((0, element_str.len()));
2317 } else {
2318 let ends_with_opener =
2320 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
2321 if current_length > 0 && !ends_with_opener {
2322 current_line.push(' ');
2323 current_length += 1;
2324 }
2325 let start = current_line.len();
2326 current_line.push_str(&element_str);
2327 current_length += element_len;
2328 current_line_element_spans.push((start, current_line.len()));
2329 }
2330 }
2331 }
2332
2333 if !current_line.is_empty() {
2335 lines.push(current_line.trim_end().to_string());
2336 }
2337
2338 lines
2339}
2340
2341pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
2343 let lines: Vec<&str> = content.lines().collect();
2344 let mut result = Vec::new();
2345 let mut i = 0;
2346
2347 while i < lines.len() {
2348 let line = lines[i];
2349 let trimmed = line.trim();
2350
2351 if trimmed.is_empty() {
2353 result.push(String::new());
2354 i += 1;
2355 continue;
2356 }
2357
2358 if trimmed.starts_with('#') {
2360 result.push(line.to_string());
2361 i += 1;
2362 continue;
2363 }
2364
2365 if trimmed.starts_with(":::") {
2367 result.push(line.to_string());
2368 i += 1;
2369 continue;
2370 }
2371
2372 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2374 result.push(line.to_string());
2375 i += 1;
2376 while i < lines.len() {
2378 result.push(lines[i].to_string());
2379 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2380 i += 1;
2381 break;
2382 }
2383 i += 1;
2384 }
2385 continue;
2386 }
2387
2388 if calculate_indentation_width_default(line) >= 4 {
2390 result.push(line.to_string());
2392 i += 1;
2393 while i < lines.len() {
2394 let next_line = lines[i];
2395 if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2397 result.push(next_line.to_string());
2398 i += 1;
2399 } else {
2400 break;
2401 }
2402 }
2403 continue;
2404 }
2405
2406 if trimmed.starts_with('>') {
2408 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2411 let quote_prefix = line[0..gt_pos + 1].to_string();
2412 let quote_content = &line[quote_prefix.len()..].trim_start();
2413
2414 let reflowed = reflow_line(quote_content, options);
2415 for reflowed_line in reflowed.iter() {
2416 result.push(format!("{quote_prefix} {reflowed_line}"));
2417 }
2418 i += 1;
2419 continue;
2420 }
2421
2422 if is_horizontal_rule(trimmed) {
2424 result.push(line.to_string());
2425 i += 1;
2426 continue;
2427 }
2428
2429 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2431 let indent = line.len() - line.trim_start().len();
2433 let indent_str = " ".repeat(indent);
2434
2435 let mut marker_end = indent;
2438 let mut content_start = indent;
2439
2440 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
2441 if let Some(period_pos) = line[indent..].find('.') {
2443 marker_end = indent + period_pos + 1; content_start = marker_end;
2445 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2449 content_start += 1;
2450 }
2451 }
2452 } else {
2453 marker_end = indent + 1; content_start = marker_end;
2456 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2460 content_start += 1;
2461 }
2462 }
2463
2464 let min_continuation_indent = content_start;
2466
2467 let rest = &line[content_start..];
2470 if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
2471 marker_end = content_start + 3; content_start += 4; }
2474
2475 let marker = &line[indent..marker_end];
2476
2477 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2480 i += 1;
2481
2482 while i < lines.len() {
2486 let next_line = lines[i];
2487 let next_trimmed = next_line.trim();
2488
2489 if is_block_boundary(next_trimmed) {
2491 break;
2492 }
2493
2494 let next_indent = next_line.len() - next_line.trim_start().len();
2496 if next_indent >= min_continuation_indent {
2497 let trimmed_start = next_line.trim_start();
2500 list_content.push(trim_preserving_hard_break(trimmed_start));
2501 i += 1;
2502 } else {
2503 break;
2505 }
2506 }
2507
2508 let combined_content = if options.preserve_breaks {
2511 list_content[0].clone()
2512 } else {
2513 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2515 if has_hard_breaks {
2516 list_content.join("\n")
2518 } else {
2519 list_content.join(" ")
2521 }
2522 };
2523
2524 let trimmed_marker = marker;
2526 let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
2527 indent + (content_start - indent).min(max_indent)
2530 } else {
2531 content_start
2532 };
2533
2534 let prefix_length = indent + trimmed_marker.len() + 1;
2536
2537 let adjusted_options = ReflowOptions {
2539 line_length: options.line_length.saturating_sub(prefix_length),
2540 ..options.clone()
2541 };
2542
2543 let reflowed = reflow_line(&combined_content, &adjusted_options);
2544 for (j, reflowed_line) in reflowed.iter().enumerate() {
2545 if j == 0 {
2546 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2547 } else {
2548 let continuation_indent = " ".repeat(continuation_spaces);
2550 result.push(format!("{continuation_indent}{reflowed_line}"));
2551 }
2552 }
2553 continue;
2554 }
2555
2556 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2558 result.push(line.to_string());
2559 i += 1;
2560 continue;
2561 }
2562
2563 if trimmed.starts_with('[') && line.contains("]:") {
2565 result.push(line.to_string());
2566 i += 1;
2567 continue;
2568 }
2569
2570 if is_definition_list_item(trimmed) {
2572 result.push(line.to_string());
2573 i += 1;
2574 continue;
2575 }
2576
2577 let mut is_single_line_paragraph = true;
2579 if i + 1 < lines.len() {
2580 let next_trimmed = lines[i + 1].trim();
2581 if !is_block_boundary(next_trimmed) {
2583 is_single_line_paragraph = false;
2584 }
2585 }
2586
2587 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2589 result.push(line.to_string());
2590 i += 1;
2591 continue;
2592 }
2593
2594 let mut paragraph_parts = Vec::new();
2596 let mut current_part = vec![line];
2597 i += 1;
2598
2599 if options.preserve_breaks {
2601 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2603 Some("\\")
2604 } else if line.ends_with(" ") {
2605 Some(" ")
2606 } else {
2607 None
2608 };
2609 let reflowed = reflow_line(line, options);
2610
2611 if let Some(break_marker) = hard_break_type {
2613 if !reflowed.is_empty() {
2614 let mut reflowed_with_break = reflowed;
2615 let last_idx = reflowed_with_break.len() - 1;
2616 if !has_hard_break(&reflowed_with_break[last_idx]) {
2617 reflowed_with_break[last_idx].push_str(break_marker);
2618 }
2619 result.extend(reflowed_with_break);
2620 }
2621 } else {
2622 result.extend(reflowed);
2623 }
2624 } else {
2625 while i < lines.len() {
2627 let prev_line = if !current_part.is_empty() {
2628 current_part.last().unwrap()
2629 } else {
2630 ""
2631 };
2632 let next_line = lines[i];
2633 let next_trimmed = next_line.trim();
2634
2635 if is_block_boundary(next_trimmed) {
2637 break;
2638 }
2639
2640 let prev_trimmed = prev_line.trim();
2643 let abbreviations = get_abbreviations(&options.abbreviations);
2644 let ends_with_sentence = (prev_trimmed.ends_with('.')
2645 || prev_trimmed.ends_with('!')
2646 || prev_trimmed.ends_with('?')
2647 || prev_trimmed.ends_with(".*")
2648 || prev_trimmed.ends_with("!*")
2649 || prev_trimmed.ends_with("?*")
2650 || prev_trimmed.ends_with("._")
2651 || prev_trimmed.ends_with("!_")
2652 || prev_trimmed.ends_with("?_")
2653 || prev_trimmed.ends_with(".\"")
2655 || prev_trimmed.ends_with("!\"")
2656 || prev_trimmed.ends_with("?\"")
2657 || prev_trimmed.ends_with(".'")
2658 || prev_trimmed.ends_with("!'")
2659 || prev_trimmed.ends_with("?'")
2660 || prev_trimmed.ends_with(".\u{201D}")
2661 || prev_trimmed.ends_with("!\u{201D}")
2662 || prev_trimmed.ends_with("?\u{201D}")
2663 || prev_trimmed.ends_with(".\u{2019}")
2664 || prev_trimmed.ends_with("!\u{2019}")
2665 || prev_trimmed.ends_with("?\u{2019}"))
2666 && !text_ends_with_abbreviation(
2667 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2668 &abbreviations,
2669 );
2670
2671 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2672 paragraph_parts.push(current_part.join(" "));
2674 current_part = vec![next_line];
2675 } else {
2676 current_part.push(next_line);
2677 }
2678 i += 1;
2679 }
2680
2681 if !current_part.is_empty() {
2683 if current_part.len() == 1 {
2684 paragraph_parts.push(current_part[0].to_string());
2686 } else {
2687 paragraph_parts.push(current_part.join(" "));
2688 }
2689 }
2690
2691 for (j, part) in paragraph_parts.iter().enumerate() {
2693 let reflowed = reflow_line(part, options);
2694 result.extend(reflowed);
2695
2696 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2700 let last_idx = result.len() - 1;
2701 if !has_hard_break(&result[last_idx]) {
2702 result[last_idx].push_str(" ");
2703 }
2704 }
2705 }
2706 }
2707 }
2708
2709 let result_text = result.join("\n");
2711 if content.ends_with('\n') && !result_text.ends_with('\n') {
2712 format!("{result_text}\n")
2713 } else {
2714 result_text
2715 }
2716}
2717
2718#[derive(Debug, Clone)]
2720pub struct ParagraphReflow {
2721 pub start_byte: usize,
2723 pub end_byte: usize,
2725 pub reflowed_text: String,
2727}
2728
2729#[derive(Debug, Clone)]
2735pub struct BlockquoteLineData {
2736 pub(crate) content: String,
2738 pub(crate) is_explicit: bool,
2740 pub(crate) prefix: Option<String>,
2742}
2743
2744impl BlockquoteLineData {
2745 pub fn explicit(content: String, prefix: String) -> Self {
2747 Self {
2748 content,
2749 is_explicit: true,
2750 prefix: Some(prefix),
2751 }
2752 }
2753
2754 pub fn lazy(content: String) -> Self {
2756 Self {
2757 content,
2758 is_explicit: false,
2759 prefix: None,
2760 }
2761 }
2762}
2763
2764#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2766pub enum BlockquoteContinuationStyle {
2767 Explicit,
2768 Lazy,
2769}
2770
2771pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2779 let mut explicit_count = 0usize;
2780 let mut lazy_count = 0usize;
2781
2782 for line in lines.iter().skip(1) {
2783 if line.is_explicit {
2784 explicit_count += 1;
2785 } else {
2786 lazy_count += 1;
2787 }
2788 }
2789
2790 if explicit_count > 0 && lazy_count == 0 {
2791 BlockquoteContinuationStyle::Explicit
2792 } else if lazy_count > 0 && explicit_count == 0 {
2793 BlockquoteContinuationStyle::Lazy
2794 } else if explicit_count >= lazy_count {
2795 BlockquoteContinuationStyle::Explicit
2796 } else {
2797 BlockquoteContinuationStyle::Lazy
2798 }
2799}
2800
2801pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2806 let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2807
2808 for (idx, line) in lines.iter().enumerate() {
2809 let Some(prefix) = line.prefix.as_ref() else {
2810 continue;
2811 };
2812 counts
2813 .entry(prefix.clone())
2814 .and_modify(|entry| entry.0 += 1)
2815 .or_insert((1, idx));
2816 }
2817
2818 counts
2819 .into_iter()
2820 .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2821 count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2822 })
2823 .map(|(prefix, _)| prefix)
2824 .unwrap_or_else(|| fallback.to_string())
2825}
2826
2827pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2832 let trimmed = content_line.trim_start();
2833 trimmed.starts_with('>')
2834 || trimmed.starts_with('#')
2835 || trimmed.starts_with("```")
2836 || trimmed.starts_with("~~~")
2837 || is_unordered_list_marker(trimmed)
2838 || is_numbered_list_item(trimmed)
2839 || is_horizontal_rule(trimmed)
2840 || is_definition_list_item(trimmed)
2841 || (trimmed.starts_with('[') && trimmed.contains("]:"))
2842 || trimmed.starts_with(":::")
2843 || (trimmed.starts_with('<')
2844 && !trimmed.starts_with("<http")
2845 && !trimmed.starts_with("<https")
2846 && !trimmed.starts_with("<mailto:"))
2847}
2848
2849pub fn reflow_blockquote_content(
2858 lines: &[BlockquoteLineData],
2859 explicit_prefix: &str,
2860 continuation_style: BlockquoteContinuationStyle,
2861 options: &ReflowOptions,
2862) -> Vec<String> {
2863 let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2864 let segments = split_into_segments_strs(&content_strs);
2865 let mut reflowed_content_lines: Vec<String> = Vec::new();
2866
2867 for segment in segments {
2868 let hard_break_type = segment.last().and_then(|&line| {
2869 let line = line.strip_suffix('\r').unwrap_or(line);
2870 if line.ends_with('\\') {
2871 Some("\\")
2872 } else if line.ends_with(" ") {
2873 Some(" ")
2874 } else {
2875 None
2876 }
2877 });
2878
2879 let pieces: Vec<&str> = segment
2880 .iter()
2881 .map(|&line| {
2882 if let Some(l) = line.strip_suffix('\\') {
2883 l.trim_end()
2884 } else if let Some(l) = line.strip_suffix(" ") {
2885 l.trim_end()
2886 } else {
2887 line.trim_end()
2888 }
2889 })
2890 .collect();
2891
2892 let segment_text = pieces.join(" ");
2893 let segment_text = segment_text.trim();
2894 if segment_text.is_empty() {
2895 continue;
2896 }
2897
2898 let mut reflowed = reflow_line(segment_text, options);
2899 if let Some(break_marker) = hard_break_type
2900 && !reflowed.is_empty()
2901 {
2902 let last_idx = reflowed.len() - 1;
2903 if !has_hard_break(&reflowed[last_idx]) {
2904 reflowed[last_idx].push_str(break_marker);
2905 }
2906 }
2907 reflowed_content_lines.extend(reflowed);
2908 }
2909
2910 let mut styled_lines: Vec<String> = Vec::new();
2911 for (idx, line) in reflowed_content_lines.iter().enumerate() {
2912 let force_explicit = idx == 0
2913 || continuation_style == BlockquoteContinuationStyle::Explicit
2914 || should_force_explicit_blockquote_line(line);
2915 if force_explicit {
2916 styled_lines.push(format!("{explicit_prefix}{line}"));
2917 } else {
2918 styled_lines.push(line.clone());
2919 }
2920 }
2921
2922 styled_lines
2923}
2924
2925fn is_blockquote_content_boundary(content: &str) -> bool {
2926 let trimmed = content.trim();
2927 trimmed.is_empty()
2928 || is_block_boundary(trimmed)
2929 || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2930 || trimmed.starts_with(":::")
2931 || crate::utils::is_template_directive_only(content)
2932 || is_standalone_attr_list(content)
2933 || is_snippet_block_delimiter(content)
2934}
2935
2936fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2937 let mut segments = Vec::new();
2938 let mut current = Vec::new();
2939
2940 for &line in lines {
2941 current.push(line);
2942 if has_hard_break(line) {
2943 segments.push(current);
2944 current = Vec::new();
2945 }
2946 }
2947
2948 if !current.is_empty() {
2949 segments.push(current);
2950 }
2951
2952 segments
2953}
2954
2955fn reflow_blockquote_paragraph_at_line(
2956 content: &str,
2957 lines: &[&str],
2958 target_idx: usize,
2959 options: &ReflowOptions,
2960) -> Option<ParagraphReflow> {
2961 let mut anchor_idx = target_idx;
2962 let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2963 parsed.nesting_level
2964 } else {
2965 let mut found = None;
2966 let mut idx = target_idx;
2967 loop {
2968 if lines[idx].trim().is_empty() {
2969 break;
2970 }
2971 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2972 found = Some((idx, parsed.nesting_level));
2973 break;
2974 }
2975 if idx == 0 {
2976 break;
2977 }
2978 idx -= 1;
2979 }
2980 let (idx, level) = found?;
2981 anchor_idx = idx;
2982 level
2983 };
2984
2985 let mut para_start = anchor_idx;
2987 while para_start > 0 {
2988 let prev_idx = para_start - 1;
2989 let prev_line = lines[prev_idx];
2990
2991 if prev_line.trim().is_empty() {
2992 break;
2993 }
2994
2995 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2996 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2997 break;
2998 }
2999 para_start = prev_idx;
3000 continue;
3001 }
3002
3003 let prev_lazy = prev_line.trim_start();
3004 if is_blockquote_content_boundary(prev_lazy) {
3005 break;
3006 }
3007 para_start = prev_idx;
3008 }
3009
3010 while para_start < lines.len() {
3012 let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
3013 para_start += 1;
3014 continue;
3015 };
3016 target_level = parsed.nesting_level;
3017 break;
3018 }
3019
3020 if para_start >= lines.len() || para_start > target_idx {
3021 return None;
3022 }
3023
3024 let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
3027 let mut idx = para_start;
3028 while idx < lines.len() {
3029 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
3030 break;
3031 }
3032
3033 let line = lines[idx];
3034 if line.trim().is_empty() {
3035 break;
3036 }
3037
3038 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
3039 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
3040 break;
3041 }
3042 collected.push((
3043 idx,
3044 BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
3045 ));
3046 idx += 1;
3047 continue;
3048 }
3049
3050 let lazy_content = line.trim_start();
3051 if is_blockquote_content_boundary(lazy_content) {
3052 break;
3053 }
3054
3055 collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
3056 idx += 1;
3057 }
3058
3059 if collected.is_empty() {
3060 return None;
3061 }
3062
3063 let para_end = collected[collected.len() - 1].0;
3064 if target_idx < para_start || target_idx > para_end {
3065 return None;
3066 }
3067
3068 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
3069
3070 let fallback_prefix = line_data
3071 .iter()
3072 .find_map(|d| d.prefix.clone())
3073 .unwrap_or_else(|| "> ".to_string());
3074 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
3075 let continuation_style = blockquote_continuation_style(&line_data);
3076
3077 let adjusted_line_length = options
3078 .line_length
3079 .saturating_sub(display_len(&explicit_prefix, options.length_mode))
3080 .max(1);
3081
3082 let adjusted_options = ReflowOptions {
3083 line_length: adjusted_line_length,
3084 ..options.clone()
3085 };
3086
3087 let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
3088
3089 if styled_lines.is_empty() {
3090 return None;
3091 }
3092
3093 let mut start_byte = 0;
3095 for line in lines.iter().take(para_start) {
3096 start_byte += line.len() + 1;
3097 }
3098
3099 let mut end_byte = start_byte;
3100 for line in lines.iter().take(para_end + 1).skip(para_start) {
3101 end_byte += line.len() + 1;
3102 }
3103
3104 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3105 if !includes_trailing_newline {
3106 end_byte -= 1;
3107 }
3108
3109 let reflowed_joined = styled_lines.join("\n");
3110 let reflowed_text = if includes_trailing_newline {
3111 if reflowed_joined.ends_with('\n') {
3112 reflowed_joined
3113 } else {
3114 format!("{reflowed_joined}\n")
3115 }
3116 } else if reflowed_joined.ends_with('\n') {
3117 reflowed_joined.trim_end_matches('\n').to_string()
3118 } else {
3119 reflowed_joined
3120 };
3121
3122 Some(ParagraphReflow {
3123 start_byte,
3124 end_byte,
3125 reflowed_text,
3126 })
3127}
3128
3129pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
3147 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
3148}
3149
3150pub fn reflow_paragraph_at_line_with_mode(
3152 content: &str,
3153 line_number: usize,
3154 line_length: usize,
3155 length_mode: ReflowLengthMode,
3156) -> Option<ParagraphReflow> {
3157 let options = ReflowOptions {
3158 line_length,
3159 length_mode,
3160 ..Default::default()
3161 };
3162 reflow_paragraph_at_line_with_options(content, line_number, &options)
3163}
3164
3165pub fn reflow_paragraph_at_line_with_options(
3176 content: &str,
3177 line_number: usize,
3178 options: &ReflowOptions,
3179) -> Option<ParagraphReflow> {
3180 if line_number == 0 {
3181 return None;
3182 }
3183
3184 let lines: Vec<&str> = content.lines().collect();
3185
3186 if line_number > lines.len() {
3188 return None;
3189 }
3190
3191 let target_idx = line_number - 1; let target_line = lines[target_idx];
3193 let trimmed = target_line.trim();
3194
3195 if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
3198 return Some(blockquote_reflow);
3199 }
3200
3201 if is_paragraph_boundary(trimmed, target_line) {
3203 return None;
3204 }
3205
3206 let mut para_start = target_idx;
3208 while para_start > 0 {
3209 let prev_idx = para_start - 1;
3210 let prev_line = lines[prev_idx];
3211 let prev_trimmed = prev_line.trim();
3212
3213 if is_paragraph_boundary(prev_trimmed, prev_line) {
3215 break;
3216 }
3217
3218 para_start = prev_idx;
3219 }
3220
3221 let mut para_end = target_idx;
3223 while para_end + 1 < lines.len() {
3224 let next_idx = para_end + 1;
3225 let next_line = lines[next_idx];
3226 let next_trimmed = next_line.trim();
3227
3228 if is_paragraph_boundary(next_trimmed, next_line) {
3230 break;
3231 }
3232
3233 para_end = next_idx;
3234 }
3235
3236 let paragraph_lines = &lines[para_start..=para_end];
3238
3239 let mut start_byte = 0;
3241 for line in lines.iter().take(para_start) {
3242 start_byte += line.len() + 1; }
3244
3245 let mut end_byte = start_byte;
3246 for line in paragraph_lines.iter() {
3247 end_byte += line.len() + 1; }
3249
3250 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3253
3254 if !includes_trailing_newline {
3256 end_byte -= 1;
3257 }
3258
3259 let paragraph_text = paragraph_lines.join("\n");
3261
3262 let reflowed = reflow_markdown(¶graph_text, options);
3264
3265 let reflowed_text = if includes_trailing_newline {
3269 if reflowed.ends_with('\n') {
3271 reflowed
3272 } else {
3273 format!("{reflowed}\n")
3274 }
3275 } else {
3276 if reflowed.ends_with('\n') {
3278 reflowed.trim_end_matches('\n').to_string()
3279 } else {
3280 reflowed
3281 }
3282 };
3283
3284 Some(ParagraphReflow {
3285 start_byte,
3286 end_byte,
3287 reflowed_text,
3288 })
3289}
3290
3291#[cfg(test)]
3292mod tests {
3293 use super::*;
3294
3295 #[test]
3300 fn test_helper_function_text_ends_with_abbreviation() {
3301 let abbreviations = get_abbreviations(&None);
3303
3304 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
3306 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
3307 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
3308 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
3309 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
3310 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
3311 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
3312 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
3313
3314 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
3316 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
3317 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
3318 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
3319 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
3320 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
3326
3327 #[test]
3328 fn test_is_unordered_list_marker() {
3329 assert!(is_unordered_list_marker("- item"));
3331 assert!(is_unordered_list_marker("* item"));
3332 assert!(is_unordered_list_marker("+ item"));
3333 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
3335 assert!(is_unordered_list_marker("+"));
3336
3337 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
3348
3349 #[test]
3350 fn test_is_block_boundary() {
3351 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
3373 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
3376 }
3377
3378 #[test]
3379 fn test_definition_list_boundary_in_single_line_paragraph() {
3380 let options = ReflowOptions {
3383 line_length: 80,
3384 ..Default::default()
3385 };
3386 let input = "Term\n: Definition of the term";
3387 let result = reflow_markdown(input, &options);
3388 assert!(
3390 result.contains(": Definition"),
3391 "Definition list item should not be merged into previous line. Got: {result:?}"
3392 );
3393 let lines: Vec<&str> = result.lines().collect();
3394 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3395 assert_eq!(lines[0], "Term");
3396 assert_eq!(lines[1], ": Definition of the term");
3397 }
3398
3399 #[test]
3400 fn test_is_paragraph_boundary() {
3401 assert!(is_paragraph_boundary("# Heading", "# Heading"));
3403 assert!(is_paragraph_boundary("- item", "- item"));
3404 assert!(is_paragraph_boundary(":::", ":::"));
3405 assert!(is_paragraph_boundary(": definition", ": definition"));
3406
3407 assert!(is_paragraph_boundary("code", " code"));
3409 assert!(is_paragraph_boundary("code", "\tcode"));
3410
3411 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3413 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
3417 assert!(!is_paragraph_boundary("text", " text")); }
3419
3420 #[test]
3421 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3422 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3425 let result = reflow_paragraph_at_line(content, 3, 80);
3427 assert!(result.is_none(), "Div marker line should not be reflowed");
3428 }
3429}