1use crate::utils::calculate_indentation_width_default;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
13 LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
14 SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18 text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27 Chars,
29 #[default]
31 Visual,
32 Bytes,
34}
35
36fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38 match mode {
39 ReflowLengthMode::Chars => s.chars().count(),
40 ReflowLengthMode::Visual => s.width(),
41 ReflowLengthMode::Bytes => s.len(),
42 }
43}
44
45#[derive(Clone)]
47pub struct ReflowOptions {
48 pub line_length: usize,
50 pub break_on_sentences: bool,
52 pub preserve_breaks: bool,
54 pub sentence_per_line: bool,
56 pub semantic_line_breaks: bool,
58 pub abbreviations: Option<Vec<String>>,
62 pub length_mode: ReflowLengthMode,
64 pub attr_lists: bool,
67 pub require_sentence_capital: bool,
72 pub max_list_continuation_indent: Option<usize>,
76}
77
78impl Default for ReflowOptions {
79 fn default() -> Self {
80 Self {
81 line_length: 80,
82 break_on_sentences: true,
83 preserve_breaks: false,
84 sentence_per_line: false,
85 semantic_line_breaks: false,
86 abbreviations: None,
87 length_mode: ReflowLengthMode::default(),
88 attr_lists: false,
89 require_sentence_capital: true,
90 max_list_continuation_indent: None,
91 }
92 }
93}
94
95fn compute_inline_code_mask(text: &str) -> Vec<bool> {
98 let chars: Vec<char> = text.chars().collect();
99 let len = chars.len();
100 let mut mask = vec![false; len];
101 let mut i = 0;
102
103 while i < len {
104 if chars[i] == '`' {
105 let open_start = i;
107 let mut backtick_count = 0;
108 while i < len && chars[i] == '`' {
109 backtick_count += 1;
110 i += 1;
111 }
112
113 let mut found_close = false;
115 let content_start = i;
116 while i < len {
117 if chars[i] == '`' {
118 let close_start = i;
119 let mut close_count = 0;
120 while i < len && chars[i] == '`' {
121 close_count += 1;
122 i += 1;
123 }
124 if close_count == backtick_count {
125 for item in mask.iter_mut().take(close_start).skip(content_start) {
127 *item = true;
128 }
129 for item in mask.iter_mut().take(content_start).skip(open_start) {
131 *item = true;
132 }
133 for item in mask.iter_mut().take(i).skip(close_start) {
134 *item = true;
135 }
136 found_close = true;
137 break;
138 }
139 } else {
140 i += 1;
141 }
142 }
143
144 if !found_close {
145 i = open_start + backtick_count;
147 }
148 } else {
149 i += 1;
150 }
151 }
152
153 mask
154}
155
156fn is_sentence_boundary(
160 text: &str,
161 pos: usize,
162 abbreviations: &HashSet<String>,
163 require_sentence_capital: bool,
164) -> bool {
165 let chars: Vec<char> = text.chars().collect();
166
167 if pos + 1 >= chars.len() {
168 return false;
169 }
170
171 let c = chars[pos];
172 let next_char = chars[pos + 1];
173
174 if is_cjk_sentence_ending(c) {
177 let mut after_punct_pos = pos + 1;
179 while after_punct_pos < chars.len()
180 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
181 {
182 after_punct_pos += 1;
183 }
184
185 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
187 after_punct_pos += 1;
188 }
189
190 if after_punct_pos >= chars.len() {
192 return false;
193 }
194
195 while after_punct_pos < chars.len()
197 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
198 {
199 after_punct_pos += 1;
200 }
201
202 if after_punct_pos >= chars.len() {
203 return false;
204 }
205
206 return true;
209 }
210
211 if c != '.' && c != '!' && c != '?' {
213 return false;
214 }
215
216 let (_space_pos, after_space_pos) = if next_char == ' ' {
218 (pos + 1, pos + 2)
220 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
221 if chars[pos + 2] == ' ' {
223 (pos + 2, pos + 3)
225 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
226 (pos + 3, pos + 4)
228 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
229 && pos + 4 < chars.len()
230 && chars[pos + 3] == chars[pos + 2]
231 && chars[pos + 4] == ' '
232 {
233 (pos + 4, pos + 5)
235 } else {
236 return false;
237 }
238 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
239 (pos + 2, pos + 3)
241 } else if (next_char == '*' || next_char == '_')
242 && pos + 3 < chars.len()
243 && chars[pos + 2] == next_char
244 && chars[pos + 3] == ' '
245 {
246 (pos + 3, pos + 4)
248 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
249 (pos + 3, pos + 4)
251 } else {
252 return false;
253 };
254
255 let mut next_char_pos = after_space_pos;
257 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
258 next_char_pos += 1;
259 }
260
261 if next_char_pos >= chars.len() {
263 return false;
264 }
265
266 let mut first_letter_pos = next_char_pos;
268 while first_letter_pos < chars.len()
269 && (chars[first_letter_pos] == '*'
270 || chars[first_letter_pos] == '_'
271 || chars[first_letter_pos] == '~'
272 || is_opening_quote(chars[first_letter_pos]))
273 {
274 first_letter_pos += 1;
275 }
276
277 if first_letter_pos >= chars.len() {
279 return false;
280 }
281
282 let first_char = chars[first_letter_pos];
283
284 if c == '!' || c == '?' {
286 return true;
287 }
288
289 if pos > 0 {
293 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
295 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
296 return false;
297 }
298
299 if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
301 return false;
302 }
303
304 if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
308 return false;
309 }
310 }
311
312 if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
315 return false;
316 }
317
318 true
319}
320
321pub fn split_into_sentences(text: &str) -> Vec<String> {
323 split_into_sentences_custom(text, &None)
324}
325
326pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
328 let abbreviations = get_abbreviations(custom_abbreviations);
329 split_into_sentences_with_set(text, &abbreviations, true)
330}
331
332fn split_into_sentences_with_set(
335 text: &str,
336 abbreviations: &HashSet<String>,
337 require_sentence_capital: bool,
338) -> Vec<String> {
339 let in_code = compute_inline_code_mask(text);
341
342 let mut sentences = Vec::new();
343 let mut current_sentence = String::new();
344 let mut chars = text.chars().peekable();
345 let mut pos = 0;
346
347 while let Some(c) = chars.next() {
348 current_sentence.push(c);
349
350 if !in_code[pos] && is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
351 while let Some(&next) = chars.peek() {
353 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
354 current_sentence.push(chars.next().unwrap());
355 pos += 1;
356 } else {
357 break;
358 }
359 }
360
361 if chars.peek() == Some(&' ') {
363 chars.next();
364 pos += 1;
365 }
366
367 sentences.push(current_sentence.trim().to_string());
368 current_sentence.clear();
369 }
370
371 pos += 1;
372 }
373
374 if !current_sentence.trim().is_empty() {
376 sentences.push(current_sentence.trim().to_string());
377 }
378 sentences
379}
380
381fn is_horizontal_rule(line: &str) -> bool {
383 if line.len() < 3 {
384 return false;
385 }
386
387 let chars: Vec<char> = line.chars().collect();
389 if chars.is_empty() {
390 return false;
391 }
392
393 let first_char = chars[0];
394 if first_char != '-' && first_char != '_' && first_char != '*' {
395 return false;
396 }
397
398 for c in &chars {
400 if *c != first_char && *c != ' ' {
401 return false;
402 }
403 }
404
405 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
407 non_space_count >= 3
408}
409
410fn is_numbered_list_item(line: &str) -> bool {
412 let mut chars = line.chars();
413
414 if !chars.next().is_some_and(|c| c.is_numeric()) {
416 return false;
417 }
418
419 while let Some(c) = chars.next() {
421 if c == '.' {
422 return chars.next() == Some(' ');
425 }
426 if !c.is_numeric() {
427 return false;
428 }
429 }
430
431 false
432}
433
434fn is_unordered_list_marker(s: &str) -> bool {
436 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
437 && !is_horizontal_rule(s)
438 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
439}
440
441fn is_block_boundary_core(trimmed: &str) -> bool {
444 trimmed.is_empty()
445 || trimmed.starts_with('#')
446 || trimmed.starts_with("```")
447 || trimmed.starts_with("~~~")
448 || trimmed.starts_with('>')
449 || (trimmed.starts_with('[') && trimmed.contains("]:"))
450 || is_horizontal_rule(trimmed)
451 || is_unordered_list_marker(trimmed)
452 || is_numbered_list_item(trimmed)
453 || is_definition_list_item(trimmed)
454 || trimmed.starts_with(":::")
455}
456
457fn is_block_boundary(trimmed: &str) -> bool {
460 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
461}
462
463fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
467 is_block_boundary_core(trimmed)
468 || calculate_indentation_width_default(line) >= 4
469 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
470}
471
472fn has_hard_break(line: &str) -> bool {
478 let line = line.strip_suffix('\r').unwrap_or(line);
479 line.ends_with(" ") || line.ends_with('\\')
480}
481
482fn ends_with_sentence_punct(text: &str) -> bool {
484 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
485}
486
487fn trim_preserving_hard_break(s: &str) -> String {
493 let s = s.strip_suffix('\r').unwrap_or(s);
495
496 if s.ends_with('\\') {
498 return s.to_string();
500 }
501
502 if s.ends_with(" ") {
504 let content_end = s.trim_end().len();
506 if content_end == 0 {
507 return String::new();
509 }
510 format!("{} ", &s[..content_end])
512 } else {
513 s.trim_end().to_string()
515 }
516}
517
518fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
520 if options.attr_lists {
521 parse_markdown_elements_with_attr_lists(text)
522 } else {
523 parse_markdown_elements(text)
524 }
525}
526
527pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
528 if options.sentence_per_line {
530 let elements = parse_elements(line, options);
531 return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
532 }
533
534 if options.semantic_line_breaks {
536 let elements = parse_elements(line, options);
537 return reflow_elements_semantic(&elements, options);
538 }
539
540 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
543 return vec![line.to_string()];
544 }
545
546 let elements = parse_elements(line, options);
548
549 reflow_elements(&elements, options)
551}
552
553#[derive(Debug, Clone)]
555enum LinkedImageSource {
556 Inline(String),
558 Reference(String),
560}
561
562#[derive(Debug, Clone)]
564enum LinkedImageTarget {
565 Inline(String),
567 Reference(String),
569}
570
571#[derive(Debug, Clone)]
573enum Element {
574 Text(String),
576 Link { text: String, url: String },
578 ReferenceLink { text: String, reference: String },
580 EmptyReferenceLink { text: String },
582 ShortcutReference { reference: String },
584 InlineImage { alt: String, url: String },
586 ReferenceImage { alt: String, reference: String },
588 EmptyReferenceImage { alt: String },
590 LinkedImage {
596 alt: String,
597 img_source: LinkedImageSource,
598 link_target: LinkedImageTarget,
599 },
600 FootnoteReference { note: String },
602 Strikethrough(String),
604 WikiLink(String),
606 InlineMath(String),
608 DisplayMath(String),
610 EmojiShortcode(String),
612 Autolink(String),
614 HtmlTag(String),
616 HtmlEntity(String),
618 HugoShortcode(String),
620 AttrList(String),
622 Code(String),
624 Bold {
626 content: String,
627 underscore: bool,
629 },
630 Italic {
632 content: String,
633 underscore: bool,
635 },
636}
637
638impl std::fmt::Display for Element {
639 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
640 match self {
641 Element::Text(s) => write!(f, "{s}"),
642 Element::Link { text, url } => write!(f, "[{text}]({url})"),
643 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
644 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
645 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
646 Element::InlineImage { alt, url } => write!(f, ""),
647 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
648 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
649 Element::LinkedImage {
650 alt,
651 img_source,
652 link_target,
653 } => {
654 let img_part = match img_source {
656 LinkedImageSource::Inline(url) => format!(""),
657 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
658 };
659 match link_target {
661 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
662 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
663 }
664 }
665 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
666 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
667 Element::WikiLink(s) => write!(f, "[[{s}]]"),
668 Element::InlineMath(s) => write!(f, "${s}$"),
669 Element::DisplayMath(s) => write!(f, "$${s}$$"),
670 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
671 Element::Autolink(s) => write!(f, "{s}"),
672 Element::HtmlTag(s) => write!(f, "{s}"),
673 Element::HtmlEntity(s) => write!(f, "{s}"),
674 Element::HugoShortcode(s) => write!(f, "{s}"),
675 Element::AttrList(s) => write!(f, "{s}"),
676 Element::Code(s) => write!(f, "`{s}`"),
677 Element::Bold { content, underscore } => {
678 if *underscore {
679 write!(f, "__{content}__")
680 } else {
681 write!(f, "**{content}**")
682 }
683 }
684 Element::Italic { content, underscore } => {
685 if *underscore {
686 write!(f, "_{content}_")
687 } else {
688 write!(f, "*{content}*")
689 }
690 }
691 }
692 }
693}
694
695impl Element {
696 fn display_width(&self, mode: ReflowLengthMode) -> usize {
700 let formatted = format!("{self}");
701 display_len(&formatted, mode)
702 }
703}
704
705#[derive(Debug, Clone)]
707struct EmphasisSpan {
708 start: usize,
710 end: usize,
712 content: String,
714 is_strong: bool,
716 is_strikethrough: bool,
718 uses_underscore: bool,
720}
721
722fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
732 let mut spans = Vec::new();
733 let mut options = Options::empty();
734 options.insert(Options::ENABLE_STRIKETHROUGH);
735
736 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
739 let mut strikethrough_stack: Vec<usize> = Vec::new();
740
741 let parser = Parser::new_ext(text, options).into_offset_iter();
742
743 for (event, range) in parser {
744 match event {
745 Event::Start(Tag::Emphasis) => {
746 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
748 emphasis_stack.push((range.start, uses_underscore));
749 }
750 Event::End(TagEnd::Emphasis) => {
751 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
752 let content_start = start_byte + 1;
754 let content_end = range.end - 1;
755 if content_end > content_start
756 && let Some(content) = text.get(content_start..content_end)
757 {
758 spans.push(EmphasisSpan {
759 start: start_byte,
760 end: range.end,
761 content: content.to_string(),
762 is_strong: false,
763 is_strikethrough: false,
764 uses_underscore,
765 });
766 }
767 }
768 }
769 Event::Start(Tag::Strong) => {
770 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
772 strong_stack.push((range.start, uses_underscore));
773 }
774 Event::End(TagEnd::Strong) => {
775 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
776 let content_start = start_byte + 2;
778 let content_end = range.end - 2;
779 if content_end > content_start
780 && let Some(content) = text.get(content_start..content_end)
781 {
782 spans.push(EmphasisSpan {
783 start: start_byte,
784 end: range.end,
785 content: content.to_string(),
786 is_strong: true,
787 is_strikethrough: false,
788 uses_underscore,
789 });
790 }
791 }
792 }
793 Event::Start(Tag::Strikethrough) => {
794 strikethrough_stack.push(range.start);
795 }
796 Event::End(TagEnd::Strikethrough) => {
797 if let Some(start_byte) = strikethrough_stack.pop() {
798 let content_start = start_byte + 2;
800 let content_end = range.end - 2;
801 if content_end > content_start
802 && let Some(content) = text.get(content_start..content_end)
803 {
804 spans.push(EmphasisSpan {
805 start: start_byte,
806 end: range.end,
807 content: content.to_string(),
808 is_strong: false,
809 is_strikethrough: true,
810 uses_underscore: false,
811 });
812 }
813 }
814 }
815 _ => {}
816 }
817 }
818
819 spans.sort_by_key(|s| s.start);
821 spans
822}
823
824fn parse_markdown_elements(text: &str) -> Vec<Element> {
835 parse_markdown_elements_inner(text, false)
836}
837
838fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
839 parse_markdown_elements_inner(text, true)
840}
841
842fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
843 let mut elements = Vec::new();
844 let mut remaining = text;
845
846 let emphasis_spans = extract_emphasis_spans(text);
848
849 while !remaining.is_empty() {
850 let current_offset = text.len() - remaining.len();
852 let mut earliest_match: Option<(usize, usize, &str)> = None;
855
856 if remaining.contains("[!") {
860 if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
862 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
863 {
864 earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
865 }
866
867 if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
869 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
870 {
871 earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
872 }
873
874 if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
876 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
877 {
878 earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
879 }
880
881 if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
883 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
884 {
885 earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
886 }
887 }
888
889 if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
892 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
893 {
894 earliest_match = Some((m.start(), m.end(), "inline_image"));
895 }
896
897 if let Some(m) = REF_IMAGE_REGEX.find(remaining)
899 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
900 {
901 earliest_match = Some((m.start(), m.end(), "ref_image"));
902 }
903
904 if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
906 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
907 {
908 earliest_match = Some((m.start(), m.end(), "footnote_ref"));
909 }
910
911 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
913 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
914 {
915 earliest_match = Some((m.start(), m.end(), "inline_link"));
916 }
917
918 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
920 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
921 {
922 earliest_match = Some((m.start(), m.end(), "ref_link"));
923 }
924
925 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
928 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
929 {
930 earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
931 }
932
933 if let Some(m) = WIKI_LINK_REGEX.find(remaining)
935 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
936 {
937 earliest_match = Some((m.start(), m.end(), "wiki_link"));
938 }
939
940 if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
942 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
943 {
944 earliest_match = Some((m.start(), m.end(), "display_math"));
945 }
946
947 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
949 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
950 {
951 earliest_match = Some((m.start(), m.end(), "inline_math"));
952 }
953
954 if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
958 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
959 {
960 earliest_match = Some((m.start(), m.end(), "emoji"));
961 }
962
963 if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
965 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
966 {
967 earliest_match = Some((m.start(), m.end(), "html_entity"));
968 }
969
970 if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
973 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
974 {
975 earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
976 }
977
978 if let Some(m) = HTML_TAG_PATTERN.find(remaining)
981 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
982 {
983 let matched_text = &remaining[m.start()..m.end()];
985 let is_url_autolink = matched_text.starts_with("<http://")
986 || matched_text.starts_with("<https://")
987 || matched_text.starts_with("<mailto:")
988 || matched_text.starts_with("<ftp://")
989 || matched_text.starts_with("<ftps://");
990
991 let is_email_autolink = {
994 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
995 EMAIL_PATTERN.is_match(content)
996 };
997
998 if is_url_autolink || is_email_autolink {
999 earliest_match = Some((m.start(), m.end(), "autolink"));
1000 } else {
1001 earliest_match = Some((m.start(), m.end(), "html_tag"));
1002 }
1003 }
1004
1005 let mut next_special = remaining.len();
1007 let mut special_type = "";
1008 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
1009 let mut attr_list_len: usize = 0;
1010
1011 if let Some(pos) = remaining.find('`')
1013 && pos < next_special
1014 {
1015 next_special = pos;
1016 special_type = "code";
1017 }
1018
1019 if attr_lists
1021 && let Some(pos) = remaining.find('{')
1022 && pos < next_special
1023 && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
1024 && m.start() == 0
1025 {
1026 next_special = pos;
1027 special_type = "attr_list";
1028 attr_list_len = m.end();
1029 }
1030
1031 for span in &emphasis_spans {
1034 if span.start >= current_offset && span.start < current_offset + remaining.len() {
1035 let pos_in_remaining = span.start - current_offset;
1036 if pos_in_remaining < next_special {
1037 next_special = pos_in_remaining;
1038 special_type = "pulldown_emphasis";
1039 pulldown_emphasis = Some(span);
1040 }
1041 break; }
1043 }
1044
1045 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
1047 pos < next_special
1048 } else {
1049 false
1050 };
1051
1052 if should_process_markdown_link {
1053 let (pos, match_end, pattern_type) = earliest_match.unwrap();
1054
1055 if pos > 0 {
1057 elements.push(Element::Text(remaining[..pos].to_string()));
1058 }
1059
1060 match pattern_type {
1062 "linked_image_ii" => {
1064 if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
1065 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1066 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1067 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1068 elements.push(Element::LinkedImage {
1069 alt: alt.to_string(),
1070 img_source: LinkedImageSource::Inline(img_url.to_string()),
1071 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1072 });
1073 remaining = &remaining[match_end..];
1074 } else {
1075 elements.push(Element::Text("[".to_string()));
1076 remaining = &remaining[1..];
1077 }
1078 }
1079 "linked_image_ri" => {
1081 if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
1082 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1083 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1084 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1085 elements.push(Element::LinkedImage {
1086 alt: alt.to_string(),
1087 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1088 link_target: LinkedImageTarget::Inline(link_url.to_string()),
1089 });
1090 remaining = &remaining[match_end..];
1091 } else {
1092 elements.push(Element::Text("[".to_string()));
1093 remaining = &remaining[1..];
1094 }
1095 }
1096 "linked_image_ir" => {
1098 if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1099 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1100 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1101 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1102 elements.push(Element::LinkedImage {
1103 alt: alt.to_string(),
1104 img_source: LinkedImageSource::Inline(img_url.to_string()),
1105 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1106 });
1107 remaining = &remaining[match_end..];
1108 } else {
1109 elements.push(Element::Text("[".to_string()));
1110 remaining = &remaining[1..];
1111 }
1112 }
1113 "linked_image_rr" => {
1115 if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
1116 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1117 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1118 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1119 elements.push(Element::LinkedImage {
1120 alt: alt.to_string(),
1121 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1122 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1123 });
1124 remaining = &remaining[match_end..];
1125 } else {
1126 elements.push(Element::Text("[".to_string()));
1127 remaining = &remaining[1..];
1128 }
1129 }
1130 "inline_image" => {
1131 if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
1132 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1133 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1134 elements.push(Element::InlineImage {
1135 alt: alt.to_string(),
1136 url: url.to_string(),
1137 });
1138 remaining = &remaining[match_end..];
1139 } else {
1140 elements.push(Element::Text("!".to_string()));
1141 remaining = &remaining[1..];
1142 }
1143 }
1144 "ref_image" => {
1145 if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
1146 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1147 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1148
1149 if reference.is_empty() {
1150 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1151 } else {
1152 elements.push(Element::ReferenceImage {
1153 alt: alt.to_string(),
1154 reference: reference.to_string(),
1155 });
1156 }
1157 remaining = &remaining[match_end..];
1158 } else {
1159 elements.push(Element::Text("!".to_string()));
1160 remaining = &remaining[1..];
1161 }
1162 }
1163 "footnote_ref" => {
1164 if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
1165 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1166 elements.push(Element::FootnoteReference { note: note.to_string() });
1167 remaining = &remaining[match_end..];
1168 } else {
1169 elements.push(Element::Text("[".to_string()));
1170 remaining = &remaining[1..];
1171 }
1172 }
1173 "inline_link" => {
1174 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1175 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1176 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1177 elements.push(Element::Link {
1178 text: text.to_string(),
1179 url: url.to_string(),
1180 });
1181 remaining = &remaining[match_end..];
1182 } else {
1183 elements.push(Element::Text("[".to_string()));
1185 remaining = &remaining[1..];
1186 }
1187 }
1188 "ref_link" => {
1189 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1190 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1191 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1192
1193 if reference.is_empty() {
1194 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1196 } else {
1197 elements.push(Element::ReferenceLink {
1199 text: text.to_string(),
1200 reference: reference.to_string(),
1201 });
1202 }
1203 remaining = &remaining[match_end..];
1204 } else {
1205 elements.push(Element::Text("[".to_string()));
1207 remaining = &remaining[1..];
1208 }
1209 }
1210 "shortcut_ref" => {
1211 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1212 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1213 elements.push(Element::ShortcutReference {
1214 reference: reference.to_string(),
1215 });
1216 remaining = &remaining[match_end..];
1217 } else {
1218 elements.push(Element::Text("[".to_string()));
1220 remaining = &remaining[1..];
1221 }
1222 }
1223 "wiki_link" => {
1224 if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
1225 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1226 elements.push(Element::WikiLink(content.to_string()));
1227 remaining = &remaining[match_end..];
1228 } else {
1229 elements.push(Element::Text("[[".to_string()));
1230 remaining = &remaining[2..];
1231 }
1232 }
1233 "display_math" => {
1234 if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
1235 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1236 elements.push(Element::DisplayMath(math.to_string()));
1237 remaining = &remaining[match_end..];
1238 } else {
1239 elements.push(Element::Text("$$".to_string()));
1240 remaining = &remaining[2..];
1241 }
1242 }
1243 "inline_math" => {
1244 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1245 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1246 elements.push(Element::InlineMath(math.to_string()));
1247 remaining = &remaining[match_end..];
1248 } else {
1249 elements.push(Element::Text("$".to_string()));
1250 remaining = &remaining[1..];
1251 }
1252 }
1253 "emoji" => {
1255 if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1256 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1257 elements.push(Element::EmojiShortcode(emoji.to_string()));
1258 remaining = &remaining[match_end..];
1259 } else {
1260 elements.push(Element::Text(":".to_string()));
1261 remaining = &remaining[1..];
1262 }
1263 }
1264 "html_entity" => {
1265 elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
1267 remaining = &remaining[match_end..];
1268 }
1269 "hugo_shortcode" => {
1270 elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
1272 remaining = &remaining[match_end..];
1273 }
1274 "autolink" => {
1275 elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
1277 remaining = &remaining[match_end..];
1278 }
1279 "html_tag" => {
1280 elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
1282 remaining = &remaining[match_end..];
1283 }
1284 _ => {
1285 elements.push(Element::Text("[".to_string()));
1287 remaining = &remaining[1..];
1288 }
1289 }
1290 } else {
1291 if next_special > 0 && next_special < remaining.len() {
1295 elements.push(Element::Text(remaining[..next_special].to_string()));
1296 remaining = &remaining[next_special..];
1297 }
1298
1299 match special_type {
1301 "code" => {
1302 if let Some(code_end) = remaining[1..].find('`') {
1304 let code = &remaining[1..1 + code_end];
1305 elements.push(Element::Code(code.to_string()));
1306 remaining = &remaining[1 + code_end + 1..];
1307 } else {
1308 elements.push(Element::Text(remaining.to_string()));
1310 break;
1311 }
1312 }
1313 "attr_list" => {
1314 elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1315 remaining = &remaining[attr_list_len..];
1316 }
1317 "pulldown_emphasis" => {
1318 if let Some(span) = pulldown_emphasis {
1320 let span_len = span.end - span.start;
1321 if span.is_strikethrough {
1322 elements.push(Element::Strikethrough(span.content.clone()));
1323 } else if span.is_strong {
1324 elements.push(Element::Bold {
1325 content: span.content.clone(),
1326 underscore: span.uses_underscore,
1327 });
1328 } else {
1329 elements.push(Element::Italic {
1330 content: span.content.clone(),
1331 underscore: span.uses_underscore,
1332 });
1333 }
1334 remaining = &remaining[span_len..];
1335 } else {
1336 elements.push(Element::Text(remaining[..1].to_string()));
1338 remaining = &remaining[1..];
1339 }
1340 }
1341 _ => {
1342 elements.push(Element::Text(remaining.to_string()));
1344 break;
1345 }
1346 }
1347 }
1348 }
1349
1350 elements
1351}
1352
1353fn reflow_elements_sentence_per_line(
1355 elements: &[Element],
1356 custom_abbreviations: &Option<Vec<String>>,
1357 require_sentence_capital: bool,
1358) -> Vec<String> {
1359 let abbreviations = get_abbreviations(custom_abbreviations);
1360 let mut lines = Vec::new();
1361 let mut current_line = String::new();
1362
1363 for (idx, element) in elements.iter().enumerate() {
1364 let element_str = format!("{element}");
1365
1366 if let Element::Text(text) = element {
1368 let combined = format!("{current_line}{text}");
1370 let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
1372
1373 if sentences.len() > 1 {
1374 for (i, sentence) in sentences.iter().enumerate() {
1376 if i == 0 {
1377 let trimmed = sentence.trim();
1380
1381 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1382 current_line = sentence.to_string();
1384 } else {
1385 lines.push(sentence.to_string());
1387 current_line.clear();
1388 }
1389 } else if i == sentences.len() - 1 {
1390 let trimmed = sentence.trim();
1392 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1393
1394 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1395 lines.push(sentence.to_string());
1397 current_line.clear();
1398 } else {
1399 current_line = sentence.to_string();
1401 }
1402 } else {
1403 lines.push(sentence.to_string());
1405 }
1406 }
1407 } else {
1408 let trimmed = combined.trim();
1410
1411 if trimmed.is_empty() {
1415 continue;
1416 }
1417
1418 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1419
1420 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1421 lines.push(trimmed.to_string());
1423 current_line.clear();
1424 } else {
1425 current_line = combined;
1427 }
1428 }
1429 } else if let Element::Italic { content, underscore } = element {
1430 let marker = if *underscore { "_" } else { "*" };
1432 handle_emphasis_sentence_split(
1433 content,
1434 marker,
1435 &abbreviations,
1436 require_sentence_capital,
1437 &mut current_line,
1438 &mut lines,
1439 );
1440 } else if let Element::Bold { content, underscore } = element {
1441 let marker = if *underscore { "__" } else { "**" };
1443 handle_emphasis_sentence_split(
1444 content,
1445 marker,
1446 &abbreviations,
1447 require_sentence_capital,
1448 &mut current_line,
1449 &mut lines,
1450 );
1451 } else if let Element::Strikethrough(content) = element {
1452 handle_emphasis_sentence_split(
1454 content,
1455 "~~",
1456 &abbreviations,
1457 require_sentence_capital,
1458 &mut current_line,
1459 &mut lines,
1460 );
1461 } else {
1462 let is_adjacent = if idx > 0 {
1465 match &elements[idx - 1] {
1466 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1467 _ => true,
1468 }
1469 } else {
1470 false
1471 };
1472
1473 if !is_adjacent
1475 && !current_line.is_empty()
1476 && !current_line.ends_with(' ')
1477 && !current_line.ends_with('(')
1478 && !current_line.ends_with('[')
1479 {
1480 current_line.push(' ');
1481 }
1482 current_line.push_str(&element_str);
1483 }
1484 }
1485
1486 if !current_line.is_empty() {
1488 lines.push(current_line.trim().to_string());
1489 }
1490 lines
1491}
1492
1493fn handle_emphasis_sentence_split(
1495 content: &str,
1496 marker: &str,
1497 abbreviations: &HashSet<String>,
1498 require_sentence_capital: bool,
1499 current_line: &mut String,
1500 lines: &mut Vec<String>,
1501) {
1502 let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
1504
1505 if sentences.len() <= 1 {
1506 if !current_line.is_empty()
1508 && !current_line.ends_with(' ')
1509 && !current_line.ends_with('(')
1510 && !current_line.ends_with('[')
1511 {
1512 current_line.push(' ');
1513 }
1514 current_line.push_str(marker);
1515 current_line.push_str(content);
1516 current_line.push_str(marker);
1517
1518 let trimmed = content.trim();
1520 let ends_with_punct = ends_with_sentence_punct(trimmed);
1521 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1522 lines.push(current_line.clone());
1523 current_line.clear();
1524 }
1525 } else {
1526 for (i, sentence) in sentences.iter().enumerate() {
1528 let trimmed = sentence.trim();
1529 if trimmed.is_empty() {
1530 continue;
1531 }
1532
1533 if i == 0 {
1534 if !current_line.is_empty()
1536 && !current_line.ends_with(' ')
1537 && !current_line.ends_with('(')
1538 && !current_line.ends_with('[')
1539 {
1540 current_line.push(' ');
1541 }
1542 current_line.push_str(marker);
1543 current_line.push_str(trimmed);
1544 current_line.push_str(marker);
1545
1546 let ends_with_punct = ends_with_sentence_punct(trimmed);
1548 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1549 lines.push(current_line.clone());
1550 current_line.clear();
1551 }
1552 } else if i == sentences.len() - 1 {
1553 let ends_with_punct = ends_with_sentence_punct(trimmed);
1555
1556 let mut line = String::new();
1557 line.push_str(marker);
1558 line.push_str(trimmed);
1559 line.push_str(marker);
1560
1561 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1562 lines.push(line);
1563 } else {
1564 *current_line = line;
1566 }
1567 } else {
1568 let mut line = String::new();
1570 line.push_str(marker);
1571 line.push_str(trimmed);
1572 line.push_str(marker);
1573 lines.push(line);
1574 }
1575 }
1576 }
1577}
1578
1579const BREAK_WORDS: &[&str] = &[
1583 "and",
1584 "or",
1585 "but",
1586 "nor",
1587 "yet",
1588 "so",
1589 "for",
1590 "which",
1591 "that",
1592 "because",
1593 "when",
1594 "if",
1595 "while",
1596 "where",
1597 "although",
1598 "though",
1599 "unless",
1600 "since",
1601 "after",
1602 "before",
1603 "until",
1604 "as",
1605 "once",
1606 "whether",
1607 "however",
1608 "therefore",
1609 "moreover",
1610 "furthermore",
1611 "nevertheless",
1612 "whereas",
1613];
1614
1615fn is_clause_punctuation(c: char) -> bool {
1617 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1619
1620fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1624 let mut spans = Vec::new();
1625 let mut offset = 0;
1626 for element in elements {
1627 let rendered = format!("{element}");
1628 let len = rendered.len();
1629 if !matches!(element, Element::Text(_)) {
1630 spans.push((offset, offset + len));
1631 }
1632 offset += len;
1633 }
1634 spans
1635}
1636
1637fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1639 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1640}
1641
1642const MIN_SPLIT_RATIO: f64 = 0.3;
1645
1646fn split_at_clause_punctuation(
1650 text: &str,
1651 line_length: usize,
1652 element_spans: &[(usize, usize)],
1653 length_mode: ReflowLengthMode,
1654) -> Option<(String, String)> {
1655 let chars: Vec<char> = text.chars().collect();
1656 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1657
1658 let mut width_acc = 0;
1660 let mut search_end_char = 0;
1661 for (idx, &c) in chars.iter().enumerate() {
1662 let c_width = display_len(&c.to_string(), length_mode);
1663 if width_acc + c_width > line_length {
1664 break;
1665 }
1666 width_acc += c_width;
1667 search_end_char = idx + 1;
1668 }
1669
1670 let mut best_pos = None;
1671 for i in (0..search_end_char).rev() {
1672 if is_clause_punctuation(chars[i]) {
1673 let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1675 if !is_inside_element(byte_pos, element_spans) {
1676 best_pos = Some(i);
1677 break;
1678 }
1679 }
1680 }
1681
1682 let pos = best_pos?;
1683
1684 let first: String = chars[..=pos].iter().collect();
1686 let first_display_len = display_len(&first, length_mode);
1687 if first_display_len < min_first_len {
1688 return None;
1689 }
1690
1691 let rest: String = chars[pos + 1..].iter().collect();
1693 let rest = rest.trim_start().to_string();
1694
1695 if rest.is_empty() {
1696 return None;
1697 }
1698
1699 Some((first, rest))
1700}
1701
1702fn split_at_break_word(
1706 text: &str,
1707 line_length: usize,
1708 element_spans: &[(usize, usize)],
1709 length_mode: ReflowLengthMode,
1710) -> Option<(String, String)> {
1711 let lower = text.to_lowercase();
1712 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1713 let mut best_split: Option<(usize, usize)> = None; for &word in BREAK_WORDS {
1716 let mut search_start = 0;
1717 while let Some(pos) = lower[search_start..].find(word) {
1718 let abs_pos = search_start + pos;
1719
1720 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1722 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1723
1724 if preceded_by_space && followed_by_space {
1725 let first_part = text[..abs_pos].trim_end();
1727 let first_part_len = display_len(first_part, length_mode);
1728
1729 if first_part_len >= min_first_len
1730 && first_part_len <= line_length
1731 && !is_inside_element(abs_pos, element_spans)
1732 {
1733 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1735 best_split = Some((abs_pos, word.len()));
1736 }
1737 }
1738 }
1739
1740 search_start = abs_pos + word.len();
1741 }
1742 }
1743
1744 let (byte_start, _word_len) = best_split?;
1745
1746 let first = text[..byte_start].trim_end().to_string();
1747 let rest = text[byte_start..].to_string();
1748
1749 if first.is_empty() || rest.trim().is_empty() {
1750 return None;
1751 }
1752
1753 Some((first, rest))
1754}
1755
1756fn cascade_split_line(
1759 text: &str,
1760 line_length: usize,
1761 abbreviations: &Option<Vec<String>>,
1762 length_mode: ReflowLengthMode,
1763 attr_lists: bool,
1764) -> Vec<String> {
1765 if line_length == 0 || display_len(text, length_mode) <= line_length {
1766 return vec![text.to_string()];
1767 }
1768
1769 let elements = parse_markdown_elements_inner(text, attr_lists);
1770 let element_spans = compute_element_spans(&elements);
1771
1772 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
1774 let mut result = vec![first];
1775 result.extend(cascade_split_line(
1776 &rest,
1777 line_length,
1778 abbreviations,
1779 length_mode,
1780 attr_lists,
1781 ));
1782 return result;
1783 }
1784
1785 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
1787 let mut result = vec![first];
1788 result.extend(cascade_split_line(
1789 &rest,
1790 line_length,
1791 abbreviations,
1792 length_mode,
1793 attr_lists,
1794 ));
1795 return result;
1796 }
1797
1798 let options = ReflowOptions {
1800 line_length,
1801 break_on_sentences: false,
1802 preserve_breaks: false,
1803 sentence_per_line: false,
1804 semantic_line_breaks: false,
1805 abbreviations: abbreviations.clone(),
1806 length_mode,
1807 attr_lists,
1808 require_sentence_capital: true,
1809 max_list_continuation_indent: None,
1810 };
1811 reflow_elements(&elements, &options)
1812}
1813
1814fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1818 let sentence_lines =
1820 reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
1821
1822 if options.line_length == 0 {
1825 return sentence_lines;
1826 }
1827
1828 let length_mode = options.length_mode;
1829 let mut result = Vec::new();
1830 for line in sentence_lines {
1831 if display_len(&line, length_mode) <= options.line_length {
1832 result.push(line);
1833 } else {
1834 result.extend(cascade_split_line(
1835 &line,
1836 options.line_length,
1837 &options.abbreviations,
1838 length_mode,
1839 options.attr_lists,
1840 ));
1841 }
1842 }
1843
1844 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1847 let mut merged: Vec<String> = Vec::with_capacity(result.len());
1848 for line in result {
1849 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
1850 let prev_ends_at_sentence = {
1852 let trimmed = merged.last().unwrap().trim_end();
1853 trimmed
1854 .chars()
1855 .rev()
1856 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1857 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1858 };
1859
1860 if !prev_ends_at_sentence {
1861 let prev = merged.last_mut().unwrap();
1862 let combined = format!("{prev} {line}");
1863 if display_len(&combined, length_mode) <= options.line_length {
1865 *prev = combined;
1866 continue;
1867 }
1868 }
1869 }
1870 merged.push(line);
1871 }
1872 merged
1873}
1874
1875fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
1883 line.char_indices()
1884 .rev()
1885 .map(|(pos, _)| pos)
1886 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
1887}
1888
1889fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1891 let mut lines = Vec::new();
1892 let mut current_line = String::new();
1893 let mut current_length = 0;
1894 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
1896 let length_mode = options.length_mode;
1897
1898 for (idx, element) in elements.iter().enumerate() {
1899 let element_str = format!("{element}");
1900 let element_len = element.display_width(length_mode);
1901
1902 let is_adjacent_to_prev = if idx > 0 {
1908 match (&elements[idx - 1], element) {
1909 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1910 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1911 _ => true,
1912 }
1913 } else {
1914 false
1915 };
1916
1917 if let Element::Text(text) = element {
1919 let has_leading_space = text.starts_with(char::is_whitespace);
1921 let words: Vec<&str> = text.split_whitespace().collect();
1923
1924 for (i, word) in words.iter().enumerate() {
1925 let word_len = display_len(word, length_mode);
1926 let is_trailing_punct = word
1928 .chars()
1929 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1930
1931 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1934
1935 if is_first_adjacent {
1936 if current_length + word_len > options.line_length && current_length > 0 {
1938 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1941 let before = current_line[..last_space].trim_end().to_string();
1942 let after = current_line[last_space + 1..].to_string();
1943 lines.push(before);
1944 current_line = format!("{after}{word}");
1945 current_length = display_len(¤t_line, length_mode);
1946 current_line_element_spans.clear();
1947 } else {
1948 current_line.push_str(word);
1949 current_length += word_len;
1950 }
1951 } else {
1952 current_line.push_str(word);
1953 current_length += word_len;
1954 }
1955 } else if current_length > 0
1956 && current_length + 1 + word_len > options.line_length
1957 && !is_trailing_punct
1958 {
1959 lines.push(current_line.trim().to_string());
1961 current_line = word.to_string();
1962 current_length = word_len;
1963 current_line_element_spans.clear();
1964 } else {
1965 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1969 current_line.push(' ');
1970 current_length += 1;
1971 }
1972 current_line.push_str(word);
1973 current_length += word_len;
1974 }
1975 }
1976 } else if matches!(
1977 element,
1978 Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
1979 ) && element_len > options.line_length
1980 {
1981 let (content, marker): (&str, &str) = match element {
1985 Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
1986 Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
1987 Element::Strikethrough(content) => (content.as_str(), "~~"),
1988 _ => unreachable!(),
1989 };
1990
1991 let words: Vec<&str> = content.split_whitespace().collect();
1992 let n = words.len();
1993
1994 if n == 0 {
1995 let full = format!("{marker}{marker}");
1997 let full_len = display_len(&full, length_mode);
1998 if !is_adjacent_to_prev && current_length > 0 {
1999 current_line.push(' ');
2000 current_length += 1;
2001 }
2002 current_line.push_str(&full);
2003 current_length += full_len;
2004 } else {
2005 for (i, word) in words.iter().enumerate() {
2006 let is_first = i == 0;
2007 let is_last = i == n - 1;
2008 let word_str: String = match (is_first, is_last) {
2009 (true, true) => format!("{marker}{word}{marker}"),
2010 (true, false) => format!("{marker}{word}"),
2011 (false, true) => format!("{word}{marker}"),
2012 (false, false) => word.to_string(),
2013 };
2014 let word_len = display_len(&word_str, length_mode);
2015
2016 let needs_space = if is_first {
2017 !is_adjacent_to_prev && current_length > 0
2018 } else {
2019 current_length > 0
2020 };
2021
2022 if needs_space && current_length + 1 + word_len > options.line_length {
2023 lines.push(current_line.trim_end().to_string());
2024 current_line = word_str;
2025 current_length = word_len;
2026 current_line_element_spans.clear();
2027 } else {
2028 if needs_space {
2029 current_line.push(' ');
2030 current_length += 1;
2031 }
2032 current_line.push_str(&word_str);
2033 current_length += word_len;
2034 }
2035 }
2036 }
2037 } else {
2038 if is_adjacent_to_prev {
2042 if current_length + element_len > options.line_length {
2044 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
2047 let before = current_line[..last_space].trim_end().to_string();
2048 let after = current_line[last_space + 1..].to_string();
2049 lines.push(before);
2050 current_line = format!("{after}{element_str}");
2051 current_length = display_len(¤t_line, length_mode);
2052 current_line_element_spans.clear();
2053 let start = after.len();
2055 current_line_element_spans.push((start, start + element_str.len()));
2056 } else {
2057 let start = current_line.len();
2059 current_line.push_str(&element_str);
2060 current_length += element_len;
2061 current_line_element_spans.push((start, current_line.len()));
2062 }
2063 } else {
2064 let start = current_line.len();
2065 current_line.push_str(&element_str);
2066 current_length += element_len;
2067 current_line_element_spans.push((start, current_line.len()));
2068 }
2069 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
2070 lines.push(current_line.trim().to_string());
2072 current_line = element_str.clone();
2073 current_length = element_len;
2074 current_line_element_spans.clear();
2075 current_line_element_spans.push((0, element_str.len()));
2076 } else {
2077 let ends_with_opener =
2079 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
2080 if current_length > 0 && !ends_with_opener {
2081 current_line.push(' ');
2082 current_length += 1;
2083 }
2084 let start = current_line.len();
2085 current_line.push_str(&element_str);
2086 current_length += element_len;
2087 current_line_element_spans.push((start, current_line.len()));
2088 }
2089 }
2090 }
2091
2092 if !current_line.is_empty() {
2094 lines.push(current_line.trim_end().to_string());
2095 }
2096
2097 lines
2098}
2099
2100pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
2102 let lines: Vec<&str> = content.lines().collect();
2103 let mut result = Vec::new();
2104 let mut i = 0;
2105
2106 while i < lines.len() {
2107 let line = lines[i];
2108 let trimmed = line.trim();
2109
2110 if trimmed.is_empty() {
2112 result.push(String::new());
2113 i += 1;
2114 continue;
2115 }
2116
2117 if trimmed.starts_with('#') {
2119 result.push(line.to_string());
2120 i += 1;
2121 continue;
2122 }
2123
2124 if trimmed.starts_with(":::") {
2126 result.push(line.to_string());
2127 i += 1;
2128 continue;
2129 }
2130
2131 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2133 result.push(line.to_string());
2134 i += 1;
2135 while i < lines.len() {
2137 result.push(lines[i].to_string());
2138 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2139 i += 1;
2140 break;
2141 }
2142 i += 1;
2143 }
2144 continue;
2145 }
2146
2147 if calculate_indentation_width_default(line) >= 4 {
2149 result.push(line.to_string());
2151 i += 1;
2152 while i < lines.len() {
2153 let next_line = lines[i];
2154 if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2156 result.push(next_line.to_string());
2157 i += 1;
2158 } else {
2159 break;
2160 }
2161 }
2162 continue;
2163 }
2164
2165 if trimmed.starts_with('>') {
2167 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2170 let quote_prefix = line[0..gt_pos + 1].to_string();
2171 let quote_content = &line[quote_prefix.len()..].trim_start();
2172
2173 let reflowed = reflow_line(quote_content, options);
2174 for reflowed_line in reflowed.iter() {
2175 result.push(format!("{quote_prefix} {reflowed_line}"));
2176 }
2177 i += 1;
2178 continue;
2179 }
2180
2181 if is_horizontal_rule(trimmed) {
2183 result.push(line.to_string());
2184 i += 1;
2185 continue;
2186 }
2187
2188 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2190 let indent = line.len() - line.trim_start().len();
2192 let indent_str = " ".repeat(indent);
2193
2194 let mut marker_end = indent;
2197 let mut content_start = indent;
2198
2199 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
2200 if let Some(period_pos) = line[indent..].find('.') {
2202 marker_end = indent + period_pos + 1; content_start = marker_end;
2204 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2208 content_start += 1;
2209 }
2210 }
2211 } else {
2212 marker_end = indent + 1; content_start = marker_end;
2215 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2219 content_start += 1;
2220 }
2221 }
2222
2223 let min_continuation_indent = content_start;
2225
2226 let rest = &line[content_start..];
2229 if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
2230 marker_end = content_start + 3; content_start += 4; }
2233
2234 let marker = &line[indent..marker_end];
2235
2236 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2239 i += 1;
2240
2241 while i < lines.len() {
2245 let next_line = lines[i];
2246 let next_trimmed = next_line.trim();
2247
2248 if is_block_boundary(next_trimmed) {
2250 break;
2251 }
2252
2253 let next_indent = next_line.len() - next_line.trim_start().len();
2255 if next_indent >= min_continuation_indent {
2256 let trimmed_start = next_line.trim_start();
2259 list_content.push(trim_preserving_hard_break(trimmed_start));
2260 i += 1;
2261 } else {
2262 break;
2264 }
2265 }
2266
2267 let combined_content = if options.preserve_breaks {
2270 list_content[0].clone()
2271 } else {
2272 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2274 if has_hard_breaks {
2275 list_content.join("\n")
2277 } else {
2278 list_content.join(" ")
2280 }
2281 };
2282
2283 let trimmed_marker = marker;
2285 let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
2286 indent + (content_start - indent).min(max_indent)
2289 } else {
2290 content_start
2291 };
2292
2293 let prefix_length = indent + trimmed_marker.len() + 1;
2295
2296 let adjusted_options = ReflowOptions {
2298 line_length: options.line_length.saturating_sub(prefix_length),
2299 ..options.clone()
2300 };
2301
2302 let reflowed = reflow_line(&combined_content, &adjusted_options);
2303 for (j, reflowed_line) in reflowed.iter().enumerate() {
2304 if j == 0 {
2305 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2306 } else {
2307 let continuation_indent = " ".repeat(continuation_spaces);
2309 result.push(format!("{continuation_indent}{reflowed_line}"));
2310 }
2311 }
2312 continue;
2313 }
2314
2315 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2317 result.push(line.to_string());
2318 i += 1;
2319 continue;
2320 }
2321
2322 if trimmed.starts_with('[') && line.contains("]:") {
2324 result.push(line.to_string());
2325 i += 1;
2326 continue;
2327 }
2328
2329 if is_definition_list_item(trimmed) {
2331 result.push(line.to_string());
2332 i += 1;
2333 continue;
2334 }
2335
2336 let mut is_single_line_paragraph = true;
2338 if i + 1 < lines.len() {
2339 let next_trimmed = lines[i + 1].trim();
2340 if !is_block_boundary(next_trimmed) {
2342 is_single_line_paragraph = false;
2343 }
2344 }
2345
2346 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2348 result.push(line.to_string());
2349 i += 1;
2350 continue;
2351 }
2352
2353 let mut paragraph_parts = Vec::new();
2355 let mut current_part = vec![line];
2356 i += 1;
2357
2358 if options.preserve_breaks {
2360 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2362 Some("\\")
2363 } else if line.ends_with(" ") {
2364 Some(" ")
2365 } else {
2366 None
2367 };
2368 let reflowed = reflow_line(line, options);
2369
2370 if let Some(break_marker) = hard_break_type {
2372 if !reflowed.is_empty() {
2373 let mut reflowed_with_break = reflowed;
2374 let last_idx = reflowed_with_break.len() - 1;
2375 if !has_hard_break(&reflowed_with_break[last_idx]) {
2376 reflowed_with_break[last_idx].push_str(break_marker);
2377 }
2378 result.extend(reflowed_with_break);
2379 }
2380 } else {
2381 result.extend(reflowed);
2382 }
2383 } else {
2384 while i < lines.len() {
2386 let prev_line = if !current_part.is_empty() {
2387 current_part.last().unwrap()
2388 } else {
2389 ""
2390 };
2391 let next_line = lines[i];
2392 let next_trimmed = next_line.trim();
2393
2394 if is_block_boundary(next_trimmed) {
2396 break;
2397 }
2398
2399 let prev_trimmed = prev_line.trim();
2402 let abbreviations = get_abbreviations(&options.abbreviations);
2403 let ends_with_sentence = (prev_trimmed.ends_with('.')
2404 || prev_trimmed.ends_with('!')
2405 || prev_trimmed.ends_with('?')
2406 || prev_trimmed.ends_with(".*")
2407 || prev_trimmed.ends_with("!*")
2408 || prev_trimmed.ends_with("?*")
2409 || prev_trimmed.ends_with("._")
2410 || prev_trimmed.ends_with("!_")
2411 || prev_trimmed.ends_with("?_")
2412 || prev_trimmed.ends_with(".\"")
2414 || prev_trimmed.ends_with("!\"")
2415 || prev_trimmed.ends_with("?\"")
2416 || prev_trimmed.ends_with(".'")
2417 || prev_trimmed.ends_with("!'")
2418 || prev_trimmed.ends_with("?'")
2419 || prev_trimmed.ends_with(".\u{201D}")
2420 || prev_trimmed.ends_with("!\u{201D}")
2421 || prev_trimmed.ends_with("?\u{201D}")
2422 || prev_trimmed.ends_with(".\u{2019}")
2423 || prev_trimmed.ends_with("!\u{2019}")
2424 || prev_trimmed.ends_with("?\u{2019}"))
2425 && !text_ends_with_abbreviation(
2426 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2427 &abbreviations,
2428 );
2429
2430 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2431 paragraph_parts.push(current_part.join(" "));
2433 current_part = vec![next_line];
2434 } else {
2435 current_part.push(next_line);
2436 }
2437 i += 1;
2438 }
2439
2440 if !current_part.is_empty() {
2442 if current_part.len() == 1 {
2443 paragraph_parts.push(current_part[0].to_string());
2445 } else {
2446 paragraph_parts.push(current_part.join(" "));
2447 }
2448 }
2449
2450 for (j, part) in paragraph_parts.iter().enumerate() {
2452 let reflowed = reflow_line(part, options);
2453 result.extend(reflowed);
2454
2455 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2459 let last_idx = result.len() - 1;
2460 if !has_hard_break(&result[last_idx]) {
2461 result[last_idx].push_str(" ");
2462 }
2463 }
2464 }
2465 }
2466 }
2467
2468 let result_text = result.join("\n");
2470 if content.ends_with('\n') && !result_text.ends_with('\n') {
2471 format!("{result_text}\n")
2472 } else {
2473 result_text
2474 }
2475}
2476
2477#[derive(Debug, Clone)]
2479pub struct ParagraphReflow {
2480 pub start_byte: usize,
2482 pub end_byte: usize,
2484 pub reflowed_text: String,
2486}
2487
2488#[derive(Debug, Clone)]
2494pub struct BlockquoteLineData {
2495 pub(crate) content: String,
2497 pub(crate) is_explicit: bool,
2499 pub(crate) prefix: Option<String>,
2501}
2502
2503impl BlockquoteLineData {
2504 pub fn explicit(content: String, prefix: String) -> Self {
2506 Self {
2507 content,
2508 is_explicit: true,
2509 prefix: Some(prefix),
2510 }
2511 }
2512
2513 pub fn lazy(content: String) -> Self {
2515 Self {
2516 content,
2517 is_explicit: false,
2518 prefix: None,
2519 }
2520 }
2521}
2522
2523#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2525pub enum BlockquoteContinuationStyle {
2526 Explicit,
2527 Lazy,
2528}
2529
2530pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2538 let mut explicit_count = 0usize;
2539 let mut lazy_count = 0usize;
2540
2541 for line in lines.iter().skip(1) {
2542 if line.is_explicit {
2543 explicit_count += 1;
2544 } else {
2545 lazy_count += 1;
2546 }
2547 }
2548
2549 if explicit_count > 0 && lazy_count == 0 {
2550 BlockquoteContinuationStyle::Explicit
2551 } else if lazy_count > 0 && explicit_count == 0 {
2552 BlockquoteContinuationStyle::Lazy
2553 } else if explicit_count >= lazy_count {
2554 BlockquoteContinuationStyle::Explicit
2555 } else {
2556 BlockquoteContinuationStyle::Lazy
2557 }
2558}
2559
2560pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2565 let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2566
2567 for (idx, line) in lines.iter().enumerate() {
2568 let Some(prefix) = line.prefix.as_ref() else {
2569 continue;
2570 };
2571 counts
2572 .entry(prefix.clone())
2573 .and_modify(|entry| entry.0 += 1)
2574 .or_insert((1, idx));
2575 }
2576
2577 counts
2578 .into_iter()
2579 .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2580 count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2581 })
2582 .map(|(prefix, _)| prefix)
2583 .unwrap_or_else(|| fallback.to_string())
2584}
2585
2586pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2591 let trimmed = content_line.trim_start();
2592 trimmed.starts_with('>')
2593 || trimmed.starts_with('#')
2594 || trimmed.starts_with("```")
2595 || trimmed.starts_with("~~~")
2596 || is_unordered_list_marker(trimmed)
2597 || is_numbered_list_item(trimmed)
2598 || is_horizontal_rule(trimmed)
2599 || is_definition_list_item(trimmed)
2600 || (trimmed.starts_with('[') && trimmed.contains("]:"))
2601 || trimmed.starts_with(":::")
2602 || (trimmed.starts_with('<')
2603 && !trimmed.starts_with("<http")
2604 && !trimmed.starts_with("<https")
2605 && !trimmed.starts_with("<mailto:"))
2606}
2607
2608pub fn reflow_blockquote_content(
2617 lines: &[BlockquoteLineData],
2618 explicit_prefix: &str,
2619 continuation_style: BlockquoteContinuationStyle,
2620 options: &ReflowOptions,
2621) -> Vec<String> {
2622 let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2623 let segments = split_into_segments_strs(&content_strs);
2624 let mut reflowed_content_lines: Vec<String> = Vec::new();
2625
2626 for segment in segments {
2627 let hard_break_type = segment.last().and_then(|&line| {
2628 let line = line.strip_suffix('\r').unwrap_or(line);
2629 if line.ends_with('\\') {
2630 Some("\\")
2631 } else if line.ends_with(" ") {
2632 Some(" ")
2633 } else {
2634 None
2635 }
2636 });
2637
2638 let pieces: Vec<&str> = segment
2639 .iter()
2640 .map(|&line| {
2641 if let Some(l) = line.strip_suffix('\\') {
2642 l.trim_end()
2643 } else if let Some(l) = line.strip_suffix(" ") {
2644 l.trim_end()
2645 } else {
2646 line.trim_end()
2647 }
2648 })
2649 .collect();
2650
2651 let segment_text = pieces.join(" ");
2652 let segment_text = segment_text.trim();
2653 if segment_text.is_empty() {
2654 continue;
2655 }
2656
2657 let mut reflowed = reflow_line(segment_text, options);
2658 if let Some(break_marker) = hard_break_type
2659 && !reflowed.is_empty()
2660 {
2661 let last_idx = reflowed.len() - 1;
2662 if !has_hard_break(&reflowed[last_idx]) {
2663 reflowed[last_idx].push_str(break_marker);
2664 }
2665 }
2666 reflowed_content_lines.extend(reflowed);
2667 }
2668
2669 let mut styled_lines: Vec<String> = Vec::new();
2670 for (idx, line) in reflowed_content_lines.iter().enumerate() {
2671 let force_explicit = idx == 0
2672 || continuation_style == BlockquoteContinuationStyle::Explicit
2673 || should_force_explicit_blockquote_line(line);
2674 if force_explicit {
2675 styled_lines.push(format!("{explicit_prefix}{line}"));
2676 } else {
2677 styled_lines.push(line.clone());
2678 }
2679 }
2680
2681 styled_lines
2682}
2683
2684fn is_blockquote_content_boundary(content: &str) -> bool {
2685 let trimmed = content.trim();
2686 trimmed.is_empty()
2687 || is_block_boundary(trimmed)
2688 || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2689 || trimmed.starts_with(":::")
2690 || crate::utils::is_template_directive_only(content)
2691 || is_standalone_attr_list(content)
2692 || is_snippet_block_delimiter(content)
2693}
2694
2695fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2696 let mut segments = Vec::new();
2697 let mut current = Vec::new();
2698
2699 for &line in lines {
2700 current.push(line);
2701 if has_hard_break(line) {
2702 segments.push(current);
2703 current = Vec::new();
2704 }
2705 }
2706
2707 if !current.is_empty() {
2708 segments.push(current);
2709 }
2710
2711 segments
2712}
2713
2714fn reflow_blockquote_paragraph_at_line(
2715 content: &str,
2716 lines: &[&str],
2717 target_idx: usize,
2718 options: &ReflowOptions,
2719) -> Option<ParagraphReflow> {
2720 let mut anchor_idx = target_idx;
2721 let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2722 parsed.nesting_level
2723 } else {
2724 let mut found = None;
2725 let mut idx = target_idx;
2726 loop {
2727 if lines[idx].trim().is_empty() {
2728 break;
2729 }
2730 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2731 found = Some((idx, parsed.nesting_level));
2732 break;
2733 }
2734 if idx == 0 {
2735 break;
2736 }
2737 idx -= 1;
2738 }
2739 let (idx, level) = found?;
2740 anchor_idx = idx;
2741 level
2742 };
2743
2744 let mut para_start = anchor_idx;
2746 while para_start > 0 {
2747 let prev_idx = para_start - 1;
2748 let prev_line = lines[prev_idx];
2749
2750 if prev_line.trim().is_empty() {
2751 break;
2752 }
2753
2754 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2755 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2756 break;
2757 }
2758 para_start = prev_idx;
2759 continue;
2760 }
2761
2762 let prev_lazy = prev_line.trim_start();
2763 if is_blockquote_content_boundary(prev_lazy) {
2764 break;
2765 }
2766 para_start = prev_idx;
2767 }
2768
2769 while para_start < lines.len() {
2771 let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
2772 para_start += 1;
2773 continue;
2774 };
2775 target_level = parsed.nesting_level;
2776 break;
2777 }
2778
2779 if para_start >= lines.len() || para_start > target_idx {
2780 return None;
2781 }
2782
2783 let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
2786 let mut idx = para_start;
2787 while idx < lines.len() {
2788 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
2789 break;
2790 }
2791
2792 let line = lines[idx];
2793 if line.trim().is_empty() {
2794 break;
2795 }
2796
2797 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
2798 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2799 break;
2800 }
2801 collected.push((
2802 idx,
2803 BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
2804 ));
2805 idx += 1;
2806 continue;
2807 }
2808
2809 let lazy_content = line.trim_start();
2810 if is_blockquote_content_boundary(lazy_content) {
2811 break;
2812 }
2813
2814 collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
2815 idx += 1;
2816 }
2817
2818 if collected.is_empty() {
2819 return None;
2820 }
2821
2822 let para_end = collected[collected.len() - 1].0;
2823 if target_idx < para_start || target_idx > para_end {
2824 return None;
2825 }
2826
2827 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
2828
2829 let fallback_prefix = line_data
2830 .iter()
2831 .find_map(|d| d.prefix.clone())
2832 .unwrap_or_else(|| "> ".to_string());
2833 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
2834 let continuation_style = blockquote_continuation_style(&line_data);
2835
2836 let adjusted_line_length = options
2837 .line_length
2838 .saturating_sub(display_len(&explicit_prefix, options.length_mode))
2839 .max(1);
2840
2841 let adjusted_options = ReflowOptions {
2842 line_length: adjusted_line_length,
2843 ..options.clone()
2844 };
2845
2846 let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
2847
2848 if styled_lines.is_empty() {
2849 return None;
2850 }
2851
2852 let mut start_byte = 0;
2854 for line in lines.iter().take(para_start) {
2855 start_byte += line.len() + 1;
2856 }
2857
2858 let mut end_byte = start_byte;
2859 for line in lines.iter().take(para_end + 1).skip(para_start) {
2860 end_byte += line.len() + 1;
2861 }
2862
2863 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2864 if !includes_trailing_newline {
2865 end_byte -= 1;
2866 }
2867
2868 let reflowed_joined = styled_lines.join("\n");
2869 let reflowed_text = if includes_trailing_newline {
2870 if reflowed_joined.ends_with('\n') {
2871 reflowed_joined
2872 } else {
2873 format!("{reflowed_joined}\n")
2874 }
2875 } else if reflowed_joined.ends_with('\n') {
2876 reflowed_joined.trim_end_matches('\n').to_string()
2877 } else {
2878 reflowed_joined
2879 };
2880
2881 Some(ParagraphReflow {
2882 start_byte,
2883 end_byte,
2884 reflowed_text,
2885 })
2886}
2887
2888pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2906 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
2907}
2908
2909pub fn reflow_paragraph_at_line_with_mode(
2911 content: &str,
2912 line_number: usize,
2913 line_length: usize,
2914 length_mode: ReflowLengthMode,
2915) -> Option<ParagraphReflow> {
2916 let options = ReflowOptions {
2917 line_length,
2918 length_mode,
2919 ..Default::default()
2920 };
2921 reflow_paragraph_at_line_with_options(content, line_number, &options)
2922}
2923
2924pub fn reflow_paragraph_at_line_with_options(
2935 content: &str,
2936 line_number: usize,
2937 options: &ReflowOptions,
2938) -> Option<ParagraphReflow> {
2939 if line_number == 0 {
2940 return None;
2941 }
2942
2943 let lines: Vec<&str> = content.lines().collect();
2944
2945 if line_number > lines.len() {
2947 return None;
2948 }
2949
2950 let target_idx = line_number - 1; let target_line = lines[target_idx];
2952 let trimmed = target_line.trim();
2953
2954 if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
2957 return Some(blockquote_reflow);
2958 }
2959
2960 if is_paragraph_boundary(trimmed, target_line) {
2962 return None;
2963 }
2964
2965 let mut para_start = target_idx;
2967 while para_start > 0 {
2968 let prev_idx = para_start - 1;
2969 let prev_line = lines[prev_idx];
2970 let prev_trimmed = prev_line.trim();
2971
2972 if is_paragraph_boundary(prev_trimmed, prev_line) {
2974 break;
2975 }
2976
2977 para_start = prev_idx;
2978 }
2979
2980 let mut para_end = target_idx;
2982 while para_end + 1 < lines.len() {
2983 let next_idx = para_end + 1;
2984 let next_line = lines[next_idx];
2985 let next_trimmed = next_line.trim();
2986
2987 if is_paragraph_boundary(next_trimmed, next_line) {
2989 break;
2990 }
2991
2992 para_end = next_idx;
2993 }
2994
2995 let paragraph_lines = &lines[para_start..=para_end];
2997
2998 let mut start_byte = 0;
3000 for line in lines.iter().take(para_start) {
3001 start_byte += line.len() + 1; }
3003
3004 let mut end_byte = start_byte;
3005 for line in paragraph_lines.iter() {
3006 end_byte += line.len() + 1; }
3008
3009 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3012
3013 if !includes_trailing_newline {
3015 end_byte -= 1;
3016 }
3017
3018 let paragraph_text = paragraph_lines.join("\n");
3020
3021 let reflowed = reflow_markdown(¶graph_text, options);
3023
3024 let reflowed_text = if includes_trailing_newline {
3028 if reflowed.ends_with('\n') {
3030 reflowed
3031 } else {
3032 format!("{reflowed}\n")
3033 }
3034 } else {
3035 if reflowed.ends_with('\n') {
3037 reflowed.trim_end_matches('\n').to_string()
3038 } else {
3039 reflowed
3040 }
3041 };
3042
3043 Some(ParagraphReflow {
3044 start_byte,
3045 end_byte,
3046 reflowed_text,
3047 })
3048}
3049
3050#[cfg(test)]
3051mod tests {
3052 use super::*;
3053
3054 #[test]
3059 fn test_helper_function_text_ends_with_abbreviation() {
3060 let abbreviations = get_abbreviations(&None);
3062
3063 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
3065 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
3066 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
3067 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
3068 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
3069 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
3070 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
3071 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
3072
3073 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
3075 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
3076 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
3077 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
3078 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
3079 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
3085
3086 #[test]
3087 fn test_is_unordered_list_marker() {
3088 assert!(is_unordered_list_marker("- item"));
3090 assert!(is_unordered_list_marker("* item"));
3091 assert!(is_unordered_list_marker("+ item"));
3092 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
3094 assert!(is_unordered_list_marker("+"));
3095
3096 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
3107
3108 #[test]
3109 fn test_is_block_boundary() {
3110 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
3132 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
3135 }
3136
3137 #[test]
3138 fn test_definition_list_boundary_in_single_line_paragraph() {
3139 let options = ReflowOptions {
3142 line_length: 80,
3143 ..Default::default()
3144 };
3145 let input = "Term\n: Definition of the term";
3146 let result = reflow_markdown(input, &options);
3147 assert!(
3149 result.contains(": Definition"),
3150 "Definition list item should not be merged into previous line. Got: {result:?}"
3151 );
3152 let lines: Vec<&str> = result.lines().collect();
3153 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3154 assert_eq!(lines[0], "Term");
3155 assert_eq!(lines[1], ": Definition of the term");
3156 }
3157
3158 #[test]
3159 fn test_is_paragraph_boundary() {
3160 assert!(is_paragraph_boundary("# Heading", "# Heading"));
3162 assert!(is_paragraph_boundary("- item", "- item"));
3163 assert!(is_paragraph_boundary(":::", ":::"));
3164 assert!(is_paragraph_boundary(": definition", ": definition"));
3165
3166 assert!(is_paragraph_boundary("code", " code"));
3168 assert!(is_paragraph_boundary("code", "\tcode"));
3169
3170 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3172 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
3176 assert!(!is_paragraph_boundary("text", " text")); }
3178
3179 #[test]
3180 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3181 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3184 let result = reflow_paragraph_at_line(content, 3, 80);
3186 assert!(result.is_none(), "Div marker line should not be reflowed");
3187 }
3188}