1use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
13 LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
14 REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18 text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27 Chars,
29 #[default]
31 Visual,
32 Bytes,
34}
35
36fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38 match mode {
39 ReflowLengthMode::Chars => s.chars().count(),
40 ReflowLengthMode::Visual => s.width(),
41 ReflowLengthMode::Bytes => s.len(),
42 }
43}
44
45#[derive(Clone)]
47pub struct ReflowOptions {
48 pub line_length: usize,
50 pub break_on_sentences: bool,
52 pub preserve_breaks: bool,
54 pub sentence_per_line: bool,
56 pub semantic_line_breaks: bool,
58 pub abbreviations: Option<Vec<String>>,
62 pub length_mode: ReflowLengthMode,
64 pub attr_lists: bool,
67}
68
69impl Default for ReflowOptions {
70 fn default() -> Self {
71 Self {
72 line_length: 80,
73 break_on_sentences: true,
74 preserve_breaks: false,
75 sentence_per_line: false,
76 semantic_line_breaks: false,
77 abbreviations: None,
78 length_mode: ReflowLengthMode::default(),
79 attr_lists: false,
80 }
81 }
82}
83
84fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
88 let chars: Vec<char> = text.chars().collect();
89
90 if pos + 1 >= chars.len() {
91 return false;
92 }
93
94 let c = chars[pos];
95 let next_char = chars[pos + 1];
96
97 if is_cjk_sentence_ending(c) {
100 let mut after_punct_pos = pos + 1;
102 while after_punct_pos < chars.len()
103 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
104 {
105 after_punct_pos += 1;
106 }
107
108 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
110 after_punct_pos += 1;
111 }
112
113 if after_punct_pos >= chars.len() {
115 return false;
116 }
117
118 while after_punct_pos < chars.len()
120 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
121 {
122 after_punct_pos += 1;
123 }
124
125 if after_punct_pos >= chars.len() {
126 return false;
127 }
128
129 return true;
132 }
133
134 if c != '.' && c != '!' && c != '?' {
136 return false;
137 }
138
139 let (_space_pos, after_space_pos) = if next_char == ' ' {
141 (pos + 1, pos + 2)
143 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
144 if chars[pos + 2] == ' ' {
146 (pos + 2, pos + 3)
148 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
149 (pos + 3, pos + 4)
151 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
152 && pos + 4 < chars.len()
153 && chars[pos + 3] == chars[pos + 2]
154 && chars[pos + 4] == ' '
155 {
156 (pos + 4, pos + 5)
158 } else {
159 return false;
160 }
161 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
162 (pos + 2, pos + 3)
164 } else if (next_char == '*' || next_char == '_')
165 && pos + 3 < chars.len()
166 && chars[pos + 2] == next_char
167 && chars[pos + 3] == ' '
168 {
169 (pos + 3, pos + 4)
171 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
172 (pos + 3, pos + 4)
174 } else {
175 return false;
176 };
177
178 let mut next_char_pos = after_space_pos;
180 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
181 next_char_pos += 1;
182 }
183
184 if next_char_pos >= chars.len() {
186 return false;
187 }
188
189 let mut first_letter_pos = next_char_pos;
191 while first_letter_pos < chars.len()
192 && (chars[first_letter_pos] == '*'
193 || chars[first_letter_pos] == '_'
194 || chars[first_letter_pos] == '~'
195 || is_opening_quote(chars[first_letter_pos]))
196 {
197 first_letter_pos += 1;
198 }
199
200 if first_letter_pos >= chars.len() {
202 return false;
203 }
204
205 let first_char = chars[first_letter_pos];
207 if !first_char.is_uppercase() && !is_cjk_char(first_char) {
208 return false;
209 }
210
211 if pos > 0 && c == '.' {
213 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
215 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
216 return false;
217 }
218
219 if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
222 return false;
223 }
224 }
225 true
226}
227
228pub fn split_into_sentences(text: &str) -> Vec<String> {
230 split_into_sentences_custom(text, &None)
231}
232
233pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
235 let abbreviations = get_abbreviations(custom_abbreviations);
236 split_into_sentences_with_set(text, &abbreviations)
237}
238
239fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
242 let mut sentences = Vec::new();
243 let mut current_sentence = String::new();
244 let mut chars = text.chars().peekable();
245 let mut pos = 0;
246
247 while let Some(c) = chars.next() {
248 current_sentence.push(c);
249
250 if is_sentence_boundary(text, pos, abbreviations) {
251 while let Some(&next) = chars.peek() {
253 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
254 current_sentence.push(chars.next().unwrap());
255 pos += 1;
256 } else {
257 break;
258 }
259 }
260
261 if chars.peek() == Some(&' ') {
263 chars.next();
264 pos += 1;
265 }
266
267 sentences.push(current_sentence.trim().to_string());
268 current_sentence.clear();
269 }
270
271 pos += 1;
272 }
273
274 if !current_sentence.trim().is_empty() {
276 sentences.push(current_sentence.trim().to_string());
277 }
278 sentences
279}
280
281fn is_horizontal_rule(line: &str) -> bool {
283 if line.len() < 3 {
284 return false;
285 }
286
287 let chars: Vec<char> = line.chars().collect();
289 if chars.is_empty() {
290 return false;
291 }
292
293 let first_char = chars[0];
294 if first_char != '-' && first_char != '_' && first_char != '*' {
295 return false;
296 }
297
298 for c in &chars {
300 if *c != first_char && *c != ' ' {
301 return false;
302 }
303 }
304
305 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
307 non_space_count >= 3
308}
309
310fn is_numbered_list_item(line: &str) -> bool {
312 let mut chars = line.chars();
313
314 if !chars.next().is_some_and(|c| c.is_numeric()) {
316 return false;
317 }
318
319 while let Some(c) = chars.next() {
321 if c == '.' {
322 return chars.next() == Some(' ');
325 }
326 if !c.is_numeric() {
327 return false;
328 }
329 }
330
331 false
332}
333
334fn is_unordered_list_marker(s: &str) -> bool {
336 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
337 && !is_horizontal_rule(s)
338 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
339}
340
341fn is_block_boundary_core(trimmed: &str) -> bool {
344 trimmed.is_empty()
345 || trimmed.starts_with('#')
346 || trimmed.starts_with("```")
347 || trimmed.starts_with("~~~")
348 || trimmed.starts_with('>')
349 || (trimmed.starts_with('[') && trimmed.contains("]:"))
350 || is_horizontal_rule(trimmed)
351 || is_unordered_list_marker(trimmed)
352 || is_numbered_list_item(trimmed)
353 || is_definition_list_item(trimmed)
354 || trimmed.starts_with(":::")
355}
356
357fn is_block_boundary(trimmed: &str) -> bool {
360 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
361}
362
363fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
367 is_block_boundary_core(trimmed)
368 || ElementCache::calculate_indentation_width_default(line) >= 4
369 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
370}
371
372fn has_hard_break(line: &str) -> bool {
378 let line = line.strip_suffix('\r').unwrap_or(line);
379 line.ends_with(" ") || line.ends_with('\\')
380}
381
382fn ends_with_sentence_punct(text: &str) -> bool {
384 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
385}
386
387fn trim_preserving_hard_break(s: &str) -> String {
393 let s = s.strip_suffix('\r').unwrap_or(s);
395
396 if s.ends_with('\\') {
398 return s.to_string();
400 }
401
402 if s.ends_with(" ") {
404 let content_end = s.trim_end().len();
406 if content_end == 0 {
407 return String::new();
409 }
410 format!("{} ", &s[..content_end])
412 } else {
413 s.trim_end().to_string()
415 }
416}
417
418fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
420 if options.attr_lists {
421 parse_markdown_elements_with_attr_lists(text)
422 } else {
423 parse_markdown_elements(text)
424 }
425}
426
427pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
428 if options.sentence_per_line {
430 let elements = parse_elements(line, options);
431 return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
432 }
433
434 if options.semantic_line_breaks {
436 let elements = parse_elements(line, options);
437 return reflow_elements_semantic(&elements, options);
438 }
439
440 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
443 return vec![line.to_string()];
444 }
445
446 let elements = parse_elements(line, options);
448
449 reflow_elements(&elements, options)
451}
452
453#[derive(Debug, Clone)]
455enum LinkedImageSource {
456 Inline(String),
458 Reference(String),
460}
461
462#[derive(Debug, Clone)]
464enum LinkedImageTarget {
465 Inline(String),
467 Reference(String),
469}
470
471#[derive(Debug, Clone)]
473enum Element {
474 Text(String),
476 Link { text: String, url: String },
478 ReferenceLink { text: String, reference: String },
480 EmptyReferenceLink { text: String },
482 ShortcutReference { reference: String },
484 InlineImage { alt: String, url: String },
486 ReferenceImage { alt: String, reference: String },
488 EmptyReferenceImage { alt: String },
490 LinkedImage {
496 alt: String,
497 img_source: LinkedImageSource,
498 link_target: LinkedImageTarget,
499 },
500 FootnoteReference { note: String },
502 Strikethrough(String),
504 WikiLink(String),
506 InlineMath(String),
508 DisplayMath(String),
510 EmojiShortcode(String),
512 Autolink(String),
514 HtmlTag(String),
516 HtmlEntity(String),
518 HugoShortcode(String),
520 AttrList(String),
522 Code(String),
524 Bold {
526 content: String,
527 underscore: bool,
529 },
530 Italic {
532 content: String,
533 underscore: bool,
535 },
536}
537
538impl std::fmt::Display for Element {
539 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
540 match self {
541 Element::Text(s) => write!(f, "{s}"),
542 Element::Link { text, url } => write!(f, "[{text}]({url})"),
543 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
544 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
545 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
546 Element::InlineImage { alt, url } => write!(f, ""),
547 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
548 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
549 Element::LinkedImage {
550 alt,
551 img_source,
552 link_target,
553 } => {
554 let img_part = match img_source {
556 LinkedImageSource::Inline(url) => format!(""),
557 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
558 };
559 match link_target {
561 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
562 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
563 }
564 }
565 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
566 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
567 Element::WikiLink(s) => write!(f, "[[{s}]]"),
568 Element::InlineMath(s) => write!(f, "${s}$"),
569 Element::DisplayMath(s) => write!(f, "$${s}$$"),
570 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
571 Element::Autolink(s) => write!(f, "{s}"),
572 Element::HtmlTag(s) => write!(f, "{s}"),
573 Element::HtmlEntity(s) => write!(f, "{s}"),
574 Element::HugoShortcode(s) => write!(f, "{s}"),
575 Element::AttrList(s) => write!(f, "{s}"),
576 Element::Code(s) => write!(f, "`{s}`"),
577 Element::Bold { content, underscore } => {
578 if *underscore {
579 write!(f, "__{content}__")
580 } else {
581 write!(f, "**{content}**")
582 }
583 }
584 Element::Italic { content, underscore } => {
585 if *underscore {
586 write!(f, "_{content}_")
587 } else {
588 write!(f, "*{content}*")
589 }
590 }
591 }
592 }
593}
594
595impl Element {
596 fn display_width(&self, mode: ReflowLengthMode) -> usize {
600 let formatted = format!("{self}");
601 display_len(&formatted, mode)
602 }
603}
604
605#[derive(Debug, Clone)]
607struct EmphasisSpan {
608 start: usize,
610 end: usize,
612 content: String,
614 is_strong: bool,
616 is_strikethrough: bool,
618 uses_underscore: bool,
620}
621
622fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
632 let mut spans = Vec::new();
633 let mut options = Options::empty();
634 options.insert(Options::ENABLE_STRIKETHROUGH);
635
636 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
639 let mut strikethrough_stack: Vec<usize> = Vec::new();
640
641 let parser = Parser::new_ext(text, options).into_offset_iter();
642
643 for (event, range) in parser {
644 match event {
645 Event::Start(Tag::Emphasis) => {
646 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
648 emphasis_stack.push((range.start, uses_underscore));
649 }
650 Event::End(TagEnd::Emphasis) => {
651 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
652 let content_start = start_byte + 1;
654 let content_end = range.end - 1;
655 if content_end > content_start
656 && let Some(content) = text.get(content_start..content_end)
657 {
658 spans.push(EmphasisSpan {
659 start: start_byte,
660 end: range.end,
661 content: content.to_string(),
662 is_strong: false,
663 is_strikethrough: false,
664 uses_underscore,
665 });
666 }
667 }
668 }
669 Event::Start(Tag::Strong) => {
670 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
672 strong_stack.push((range.start, uses_underscore));
673 }
674 Event::End(TagEnd::Strong) => {
675 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
676 let content_start = start_byte + 2;
678 let content_end = range.end - 2;
679 if content_end > content_start
680 && let Some(content) = text.get(content_start..content_end)
681 {
682 spans.push(EmphasisSpan {
683 start: start_byte,
684 end: range.end,
685 content: content.to_string(),
686 is_strong: true,
687 is_strikethrough: false,
688 uses_underscore,
689 });
690 }
691 }
692 }
693 Event::Start(Tag::Strikethrough) => {
694 strikethrough_stack.push(range.start);
695 }
696 Event::End(TagEnd::Strikethrough) => {
697 if let Some(start_byte) = strikethrough_stack.pop() {
698 let content_start = start_byte + 2;
700 let content_end = range.end - 2;
701 if content_end > content_start
702 && let Some(content) = text.get(content_start..content_end)
703 {
704 spans.push(EmphasisSpan {
705 start: start_byte,
706 end: range.end,
707 content: content.to_string(),
708 is_strong: false,
709 is_strikethrough: true,
710 uses_underscore: false,
711 });
712 }
713 }
714 }
715 _ => {}
716 }
717 }
718
719 spans.sort_by_key(|s| s.start);
721 spans
722}
723
724fn parse_markdown_elements(text: &str) -> Vec<Element> {
735 parse_markdown_elements_inner(text, false)
736}
737
738fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
739 parse_markdown_elements_inner(text, true)
740}
741
742fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
743 let mut elements = Vec::new();
744 let mut remaining = text;
745
746 let emphasis_spans = extract_emphasis_spans(text);
748
749 while !remaining.is_empty() {
750 let current_offset = text.len() - remaining.len();
752 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
754
755 if remaining.contains("[!") {
759 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
761 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
762 {
763 earliest_match = Some((m.start(), "linked_image_ii", m));
764 }
765
766 if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
768 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
769 {
770 earliest_match = Some((m.start(), "linked_image_ri", m));
771 }
772
773 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
775 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
776 {
777 earliest_match = Some((m.start(), "linked_image_ir", m));
778 }
779
780 if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
782 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
783 {
784 earliest_match = Some((m.start(), "linked_image_rr", m));
785 }
786 }
787
788 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
791 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
792 {
793 earliest_match = Some((m.start(), "inline_image", m));
794 }
795
796 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
798 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
799 {
800 earliest_match = Some((m.start(), "ref_image", m));
801 }
802
803 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
805 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
806 {
807 earliest_match = Some((m.start(), "footnote_ref", m));
808 }
809
810 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
812 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
813 {
814 earliest_match = Some((m.start(), "inline_link", m));
815 }
816
817 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
819 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
820 {
821 earliest_match = Some((m.start(), "ref_link", m));
822 }
823
824 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
827 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
828 {
829 earliest_match = Some((m.start(), "shortcut_ref", m));
830 }
831
832 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
834 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
835 {
836 earliest_match = Some((m.start(), "wiki_link", m));
837 }
838
839 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
841 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
842 {
843 earliest_match = Some((m.start(), "display_math", m));
844 }
845
846 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
848 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
849 {
850 earliest_match = Some((m.start(), "inline_math", m));
851 }
852
853 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
857 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
858 {
859 earliest_match = Some((m.start(), "emoji", m));
860 }
861
862 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
864 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
865 {
866 earliest_match = Some((m.start(), "html_entity", m));
867 }
868
869 if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
872 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
873 {
874 earliest_match = Some((m.start(), "hugo_shortcode", m));
875 }
876
877 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
880 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
881 {
882 let matched_text = &remaining[m.start()..m.end()];
884 let is_url_autolink = matched_text.starts_with("<http://")
885 || matched_text.starts_with("<https://")
886 || matched_text.starts_with("<mailto:")
887 || matched_text.starts_with("<ftp://")
888 || matched_text.starts_with("<ftps://");
889
890 let is_email_autolink = {
893 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
894 EMAIL_PATTERN.is_match(content)
895 };
896
897 if is_url_autolink || is_email_autolink {
898 earliest_match = Some((m.start(), "autolink", m));
899 } else {
900 earliest_match = Some((m.start(), "html_tag", m));
901 }
902 }
903
904 let mut next_special = remaining.len();
906 let mut special_type = "";
907 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
908 let mut attr_list_len: usize = 0;
909
910 if let Some(pos) = remaining.find('`')
912 && pos < next_special
913 {
914 next_special = pos;
915 special_type = "code";
916 }
917
918 if attr_lists
920 && let Some(pos) = remaining.find('{')
921 && pos < next_special
922 {
923 if let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..]) {
924 if m.start() == 0 {
925 next_special = pos;
926 special_type = "attr_list";
927 attr_list_len = m.end();
928 }
929 }
930 }
931
932 for span in &emphasis_spans {
935 if span.start >= current_offset && span.start < current_offset + remaining.len() {
936 let pos_in_remaining = span.start - current_offset;
937 if pos_in_remaining < next_special {
938 next_special = pos_in_remaining;
939 special_type = "pulldown_emphasis";
940 pulldown_emphasis = Some(span);
941 }
942 break; }
944 }
945
946 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
948 pos < next_special
949 } else {
950 false
951 };
952
953 if should_process_markdown_link {
954 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
955
956 if pos > 0 {
958 elements.push(Element::Text(remaining[..pos].to_string()));
959 }
960
961 match pattern_type {
963 "linked_image_ii" => {
965 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
966 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
967 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
968 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
969 elements.push(Element::LinkedImage {
970 alt: alt.to_string(),
971 img_source: LinkedImageSource::Inline(img_url.to_string()),
972 link_target: LinkedImageTarget::Inline(link_url.to_string()),
973 });
974 remaining = &remaining[match_obj.end()..];
975 } else {
976 elements.push(Element::Text("[".to_string()));
977 remaining = &remaining[1..];
978 }
979 }
980 "linked_image_ri" => {
982 if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
983 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
984 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
985 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
986 elements.push(Element::LinkedImage {
987 alt: alt.to_string(),
988 img_source: LinkedImageSource::Reference(img_ref.to_string()),
989 link_target: LinkedImageTarget::Inline(link_url.to_string()),
990 });
991 remaining = &remaining[match_obj.end()..];
992 } else {
993 elements.push(Element::Text("[".to_string()));
994 remaining = &remaining[1..];
995 }
996 }
997 "linked_image_ir" => {
999 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1000 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1001 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1002 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1003 elements.push(Element::LinkedImage {
1004 alt: alt.to_string(),
1005 img_source: LinkedImageSource::Inline(img_url.to_string()),
1006 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1007 });
1008 remaining = &remaining[match_obj.end()..];
1009 } else {
1010 elements.push(Element::Text("[".to_string()));
1011 remaining = &remaining[1..];
1012 }
1013 }
1014 "linked_image_rr" => {
1016 if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
1017 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1018 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1019 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1020 elements.push(Element::LinkedImage {
1021 alt: alt.to_string(),
1022 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1023 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1024 });
1025 remaining = &remaining[match_obj.end()..];
1026 } else {
1027 elements.push(Element::Text("[".to_string()));
1028 remaining = &remaining[1..];
1029 }
1030 }
1031 "inline_image" => {
1032 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
1033 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1034 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1035 elements.push(Element::InlineImage {
1036 alt: alt.to_string(),
1037 url: url.to_string(),
1038 });
1039 remaining = &remaining[match_obj.end()..];
1040 } else {
1041 elements.push(Element::Text("!".to_string()));
1042 remaining = &remaining[1..];
1043 }
1044 }
1045 "ref_image" => {
1046 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
1047 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1048 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1049
1050 if reference.is_empty() {
1051 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1052 } else {
1053 elements.push(Element::ReferenceImage {
1054 alt: alt.to_string(),
1055 reference: reference.to_string(),
1056 });
1057 }
1058 remaining = &remaining[match_obj.end()..];
1059 } else {
1060 elements.push(Element::Text("!".to_string()));
1061 remaining = &remaining[1..];
1062 }
1063 }
1064 "footnote_ref" => {
1065 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
1066 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1067 elements.push(Element::FootnoteReference { note: note.to_string() });
1068 remaining = &remaining[match_obj.end()..];
1069 } else {
1070 elements.push(Element::Text("[".to_string()));
1071 remaining = &remaining[1..];
1072 }
1073 }
1074 "inline_link" => {
1075 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1076 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1077 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1078 elements.push(Element::Link {
1079 text: text.to_string(),
1080 url: url.to_string(),
1081 });
1082 remaining = &remaining[match_obj.end()..];
1083 } else {
1084 elements.push(Element::Text("[".to_string()));
1086 remaining = &remaining[1..];
1087 }
1088 }
1089 "ref_link" => {
1090 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1091 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1092 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1093
1094 if reference.is_empty() {
1095 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1097 } else {
1098 elements.push(Element::ReferenceLink {
1100 text: text.to_string(),
1101 reference: reference.to_string(),
1102 });
1103 }
1104 remaining = &remaining[match_obj.end()..];
1105 } else {
1106 elements.push(Element::Text("[".to_string()));
1108 remaining = &remaining[1..];
1109 }
1110 }
1111 "shortcut_ref" => {
1112 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1113 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1114 elements.push(Element::ShortcutReference {
1115 reference: reference.to_string(),
1116 });
1117 remaining = &remaining[match_obj.end()..];
1118 } else {
1119 elements.push(Element::Text("[".to_string()));
1121 remaining = &remaining[1..];
1122 }
1123 }
1124 "wiki_link" => {
1125 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1126 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1127 elements.push(Element::WikiLink(content.to_string()));
1128 remaining = &remaining[match_obj.end()..];
1129 } else {
1130 elements.push(Element::Text("[[".to_string()));
1131 remaining = &remaining[2..];
1132 }
1133 }
1134 "display_math" => {
1135 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1136 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1137 elements.push(Element::DisplayMath(math.to_string()));
1138 remaining = &remaining[match_obj.end()..];
1139 } else {
1140 elements.push(Element::Text("$$".to_string()));
1141 remaining = &remaining[2..];
1142 }
1143 }
1144 "inline_math" => {
1145 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1146 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1147 elements.push(Element::InlineMath(math.to_string()));
1148 remaining = &remaining[match_obj.end()..];
1149 } else {
1150 elements.push(Element::Text("$".to_string()));
1151 remaining = &remaining[1..];
1152 }
1153 }
1154 "emoji" => {
1156 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1157 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1158 elements.push(Element::EmojiShortcode(emoji.to_string()));
1159 remaining = &remaining[match_obj.end()..];
1160 } else {
1161 elements.push(Element::Text(":".to_string()));
1162 remaining = &remaining[1..];
1163 }
1164 }
1165 "html_entity" => {
1166 elements.push(Element::HtmlEntity(match_obj.as_str().to_string()));
1168 remaining = &remaining[match_obj.end()..];
1169 }
1170 "hugo_shortcode" => {
1171 elements.push(Element::HugoShortcode(match_obj.as_str().to_string()));
1173 remaining = &remaining[match_obj.end()..];
1174 }
1175 "autolink" => {
1176 elements.push(Element::Autolink(match_obj.as_str().to_string()));
1178 remaining = &remaining[match_obj.end()..];
1179 }
1180 "html_tag" => {
1181 elements.push(Element::HtmlTag(match_obj.as_str().to_string()));
1183 remaining = &remaining[match_obj.end()..];
1184 }
1185 _ => {
1186 elements.push(Element::Text("[".to_string()));
1188 remaining = &remaining[1..];
1189 }
1190 }
1191 } else {
1192 if next_special > 0 && next_special < remaining.len() {
1196 elements.push(Element::Text(remaining[..next_special].to_string()));
1197 remaining = &remaining[next_special..];
1198 }
1199
1200 match special_type {
1202 "code" => {
1203 if let Some(code_end) = remaining[1..].find('`') {
1205 let code = &remaining[1..1 + code_end];
1206 elements.push(Element::Code(code.to_string()));
1207 remaining = &remaining[1 + code_end + 1..];
1208 } else {
1209 elements.push(Element::Text(remaining.to_string()));
1211 break;
1212 }
1213 }
1214 "attr_list" => {
1215 elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1216 remaining = &remaining[attr_list_len..];
1217 }
1218 "pulldown_emphasis" => {
1219 if let Some(span) = pulldown_emphasis {
1221 let span_len = span.end - span.start;
1222 if span.is_strikethrough {
1223 elements.push(Element::Strikethrough(span.content.clone()));
1224 } else if span.is_strong {
1225 elements.push(Element::Bold {
1226 content: span.content.clone(),
1227 underscore: span.uses_underscore,
1228 });
1229 } else {
1230 elements.push(Element::Italic {
1231 content: span.content.clone(),
1232 underscore: span.uses_underscore,
1233 });
1234 }
1235 remaining = &remaining[span_len..];
1236 } else {
1237 elements.push(Element::Text(remaining[..1].to_string()));
1239 remaining = &remaining[1..];
1240 }
1241 }
1242 _ => {
1243 elements.push(Element::Text(remaining.to_string()));
1245 break;
1246 }
1247 }
1248 }
1249 }
1250
1251 elements
1252}
1253
1254fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1256 let abbreviations = get_abbreviations(custom_abbreviations);
1257 let mut lines = Vec::new();
1258 let mut current_line = String::new();
1259
1260 for (idx, element) in elements.iter().enumerate() {
1261 let element_str = format!("{element}");
1262
1263 if let Element::Text(text) = element {
1265 let combined = format!("{current_line}{text}");
1267 let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1269
1270 if sentences.len() > 1 {
1271 for (i, sentence) in sentences.iter().enumerate() {
1273 if i == 0 {
1274 let trimmed = sentence.trim();
1277
1278 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1279 current_line = sentence.to_string();
1281 } else {
1282 lines.push(sentence.to_string());
1284 current_line.clear();
1285 }
1286 } else if i == sentences.len() - 1 {
1287 let trimmed = sentence.trim();
1289 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1290
1291 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1292 lines.push(sentence.to_string());
1294 current_line.clear();
1295 } else {
1296 current_line = sentence.to_string();
1298 }
1299 } else {
1300 lines.push(sentence.to_string());
1302 }
1303 }
1304 } else {
1305 let trimmed = combined.trim();
1307
1308 if trimmed.is_empty() {
1312 continue;
1313 }
1314
1315 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1316
1317 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1318 lines.push(trimmed.to_string());
1320 current_line.clear();
1321 } else {
1322 current_line = combined;
1324 }
1325 }
1326 } else if let Element::Italic { content, underscore } = element {
1327 let marker = if *underscore { "_" } else { "*" };
1329 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1330 } else if let Element::Bold { content, underscore } = element {
1331 let marker = if *underscore { "__" } else { "**" };
1333 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1334 } else if let Element::Strikethrough(content) = element {
1335 handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1337 } else {
1338 let is_adjacent = if idx > 0 {
1341 match &elements[idx - 1] {
1342 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1343 _ => true,
1344 }
1345 } else {
1346 false
1347 };
1348
1349 if !is_adjacent
1351 && !current_line.is_empty()
1352 && !current_line.ends_with(' ')
1353 && !current_line.ends_with('(')
1354 && !current_line.ends_with('[')
1355 {
1356 current_line.push(' ');
1357 }
1358 current_line.push_str(&element_str);
1359 }
1360 }
1361
1362 if !current_line.is_empty() {
1364 lines.push(current_line.trim().to_string());
1365 }
1366 lines
1367}
1368
1369fn handle_emphasis_sentence_split(
1371 content: &str,
1372 marker: &str,
1373 abbreviations: &HashSet<String>,
1374 current_line: &mut String,
1375 lines: &mut Vec<String>,
1376) {
1377 let sentences = split_into_sentences_with_set(content, abbreviations);
1379
1380 if sentences.len() <= 1 {
1381 if !current_line.is_empty()
1383 && !current_line.ends_with(' ')
1384 && !current_line.ends_with('(')
1385 && !current_line.ends_with('[')
1386 {
1387 current_line.push(' ');
1388 }
1389 current_line.push_str(marker);
1390 current_line.push_str(content);
1391 current_line.push_str(marker);
1392
1393 let trimmed = content.trim();
1395 let ends_with_punct = ends_with_sentence_punct(trimmed);
1396 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1397 lines.push(current_line.clone());
1398 current_line.clear();
1399 }
1400 } else {
1401 for (i, sentence) in sentences.iter().enumerate() {
1403 let trimmed = sentence.trim();
1404 if trimmed.is_empty() {
1405 continue;
1406 }
1407
1408 if i == 0 {
1409 if !current_line.is_empty()
1411 && !current_line.ends_with(' ')
1412 && !current_line.ends_with('(')
1413 && !current_line.ends_with('[')
1414 {
1415 current_line.push(' ');
1416 }
1417 current_line.push_str(marker);
1418 current_line.push_str(trimmed);
1419 current_line.push_str(marker);
1420
1421 let ends_with_punct = ends_with_sentence_punct(trimmed);
1423 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1424 lines.push(current_line.clone());
1425 current_line.clear();
1426 }
1427 } else if i == sentences.len() - 1 {
1428 let ends_with_punct = ends_with_sentence_punct(trimmed);
1430
1431 let mut line = String::new();
1432 line.push_str(marker);
1433 line.push_str(trimmed);
1434 line.push_str(marker);
1435
1436 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1437 lines.push(line);
1438 } else {
1439 *current_line = line;
1441 }
1442 } else {
1443 let mut line = String::new();
1445 line.push_str(marker);
1446 line.push_str(trimmed);
1447 line.push_str(marker);
1448 lines.push(line);
1449 }
1450 }
1451 }
1452}
1453
1454const BREAK_WORDS: &[&str] = &[
1458 "and",
1459 "or",
1460 "but",
1461 "nor",
1462 "yet",
1463 "so",
1464 "for",
1465 "which",
1466 "that",
1467 "because",
1468 "when",
1469 "if",
1470 "while",
1471 "where",
1472 "although",
1473 "though",
1474 "unless",
1475 "since",
1476 "after",
1477 "before",
1478 "until",
1479 "as",
1480 "once",
1481 "whether",
1482 "however",
1483 "therefore",
1484 "moreover",
1485 "furthermore",
1486 "nevertheless",
1487 "whereas",
1488];
1489
1490fn is_clause_punctuation(c: char) -> bool {
1492 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1494
1495fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1499 let mut spans = Vec::new();
1500 let mut offset = 0;
1501 for element in elements {
1502 let rendered = format!("{element}");
1503 let len = rendered.len();
1504 if !matches!(element, Element::Text(_)) {
1505 spans.push((offset, offset + len));
1506 }
1507 offset += len;
1508 }
1509 spans
1510}
1511
1512fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1514 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1515}
1516
1517const MIN_SPLIT_RATIO: f64 = 0.3;
1520
1521fn split_at_clause_punctuation(
1525 text: &str,
1526 line_length: usize,
1527 element_spans: &[(usize, usize)],
1528 length_mode: ReflowLengthMode,
1529) -> Option<(String, String)> {
1530 let chars: Vec<char> = text.chars().collect();
1531 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1532
1533 let mut width_acc = 0;
1535 let mut search_end_char = 0;
1536 for (idx, &c) in chars.iter().enumerate() {
1537 let c_width = display_len(&c.to_string(), length_mode);
1538 if width_acc + c_width > line_length {
1539 break;
1540 }
1541 width_acc += c_width;
1542 search_end_char = idx + 1;
1543 }
1544
1545 let mut best_pos = None;
1546 for i in (0..search_end_char).rev() {
1547 if is_clause_punctuation(chars[i]) {
1548 let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1550 if !is_inside_element(byte_pos, element_spans) {
1551 best_pos = Some(i);
1552 break;
1553 }
1554 }
1555 }
1556
1557 let pos = best_pos?;
1558
1559 let first: String = chars[..=pos].iter().collect();
1561 let first_display_len = display_len(&first, length_mode);
1562 if first_display_len < min_first_len {
1563 return None;
1564 }
1565
1566 let rest: String = chars[pos + 1..].iter().collect();
1568 let rest = rest.trim_start().to_string();
1569
1570 if rest.is_empty() {
1571 return None;
1572 }
1573
1574 Some((first, rest))
1575}
1576
1577fn split_at_break_word(
1581 text: &str,
1582 line_length: usize,
1583 element_spans: &[(usize, usize)],
1584 length_mode: ReflowLengthMode,
1585) -> Option<(String, String)> {
1586 let lower = text.to_lowercase();
1587 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1588 let mut best_split: Option<(usize, usize)> = None; for &word in BREAK_WORDS {
1591 let mut search_start = 0;
1592 while let Some(pos) = lower[search_start..].find(word) {
1593 let abs_pos = search_start + pos;
1594
1595 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1597 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1598
1599 if preceded_by_space && followed_by_space {
1600 let first_part = text[..abs_pos].trim_end();
1602 let first_part_len = display_len(first_part, length_mode);
1603
1604 if first_part_len >= min_first_len
1605 && first_part_len <= line_length
1606 && !is_inside_element(abs_pos, element_spans)
1607 {
1608 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1610 best_split = Some((abs_pos, word.len()));
1611 }
1612 }
1613 }
1614
1615 search_start = abs_pos + word.len();
1616 }
1617 }
1618
1619 let (byte_start, _word_len) = best_split?;
1620
1621 let first = text[..byte_start].trim_end().to_string();
1622 let rest = text[byte_start..].to_string();
1623
1624 if first.is_empty() || rest.trim().is_empty() {
1625 return None;
1626 }
1627
1628 Some((first, rest))
1629}
1630
1631fn cascade_split_line(
1634 text: &str,
1635 line_length: usize,
1636 abbreviations: &Option<Vec<String>>,
1637 length_mode: ReflowLengthMode,
1638 attr_lists: bool,
1639) -> Vec<String> {
1640 if line_length == 0 || display_len(text, length_mode) <= line_length {
1641 return vec![text.to_string()];
1642 }
1643
1644 let elements = parse_markdown_elements_inner(text, attr_lists);
1645 let element_spans = compute_element_spans(&elements);
1646
1647 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
1649 let mut result = vec![first];
1650 result.extend(cascade_split_line(
1651 &rest,
1652 line_length,
1653 abbreviations,
1654 length_mode,
1655 attr_lists,
1656 ));
1657 return result;
1658 }
1659
1660 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
1662 let mut result = vec![first];
1663 result.extend(cascade_split_line(
1664 &rest,
1665 line_length,
1666 abbreviations,
1667 length_mode,
1668 attr_lists,
1669 ));
1670 return result;
1671 }
1672
1673 let options = ReflowOptions {
1675 line_length,
1676 break_on_sentences: false,
1677 preserve_breaks: false,
1678 sentence_per_line: false,
1679 semantic_line_breaks: false,
1680 abbreviations: abbreviations.clone(),
1681 length_mode,
1682 attr_lists,
1683 };
1684 reflow_elements(&elements, &options)
1685}
1686
1687fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1691 let sentence_lines = reflow_elements_sentence_per_line(elements, &options.abbreviations);
1693
1694 if options.line_length == 0 {
1697 return sentence_lines;
1698 }
1699
1700 let length_mode = options.length_mode;
1701 let mut result = Vec::new();
1702 for line in sentence_lines {
1703 if display_len(&line, length_mode) <= options.line_length {
1704 result.push(line);
1705 } else {
1706 result.extend(cascade_split_line(
1707 &line,
1708 options.line_length,
1709 &options.abbreviations,
1710 length_mode,
1711 options.attr_lists,
1712 ));
1713 }
1714 }
1715
1716 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1719 let mut merged: Vec<String> = Vec::with_capacity(result.len());
1720 for line in result {
1721 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
1722 let prev_ends_at_sentence = {
1724 let trimmed = merged.last().unwrap().trim_end();
1725 trimmed
1726 .chars()
1727 .rev()
1728 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1729 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1730 };
1731
1732 if !prev_ends_at_sentence {
1733 let prev = merged.last_mut().unwrap();
1734 let combined = format!("{prev} {line}");
1735 if display_len(&combined, length_mode) <= options.line_length {
1737 *prev = combined;
1738 continue;
1739 }
1740 }
1741 }
1742 merged.push(line);
1743 }
1744 merged
1745}
1746
1747fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
1755 line.char_indices()
1756 .rev()
1757 .map(|(pos, _)| pos)
1758 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
1759}
1760
1761fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1763 let mut lines = Vec::new();
1764 let mut current_line = String::new();
1765 let mut current_length = 0;
1766 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
1768 let length_mode = options.length_mode;
1769
1770 for (idx, element) in elements.iter().enumerate() {
1771 let element_str = format!("{element}");
1772 let element_len = element.display_width(length_mode);
1773
1774 let is_adjacent_to_prev = if idx > 0 {
1780 match (&elements[idx - 1], element) {
1781 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1782 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1783 _ => true,
1784 }
1785 } else {
1786 false
1787 };
1788
1789 if let Element::Text(text) = element {
1791 let has_leading_space = text.starts_with(char::is_whitespace);
1793 let words: Vec<&str> = text.split_whitespace().collect();
1795
1796 for (i, word) in words.iter().enumerate() {
1797 let word_len = display_len(word, length_mode);
1798 let is_trailing_punct = word
1800 .chars()
1801 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1802
1803 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1806
1807 if is_first_adjacent {
1808 if current_length + word_len > options.line_length && current_length > 0 {
1810 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1813 let before = current_line[..last_space].trim_end().to_string();
1814 let after = current_line[last_space + 1..].to_string();
1815 lines.push(before);
1816 current_line = format!("{after}{word}");
1817 current_length = display_len(¤t_line, length_mode);
1818 current_line_element_spans.clear();
1819 } else {
1820 current_line.push_str(word);
1821 current_length += word_len;
1822 }
1823 } else {
1824 current_line.push_str(word);
1825 current_length += word_len;
1826 }
1827 } else if current_length > 0
1828 && current_length + 1 + word_len > options.line_length
1829 && !is_trailing_punct
1830 {
1831 lines.push(current_line.trim().to_string());
1833 current_line = word.to_string();
1834 current_length = word_len;
1835 current_line_element_spans.clear();
1836 } else {
1837 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1841 current_line.push(' ');
1842 current_length += 1;
1843 }
1844 current_line.push_str(word);
1845 current_length += word_len;
1846 }
1847 }
1848 } else if matches!(
1849 element,
1850 Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
1851 ) && element_len > options.line_length
1852 {
1853 let (content, marker): (&str, &str) = match element {
1857 Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
1858 Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
1859 Element::Strikethrough(content) => (content.as_str(), "~~"),
1860 _ => unreachable!(),
1861 };
1862
1863 let words: Vec<&str> = content.split_whitespace().collect();
1864 let n = words.len();
1865
1866 if n == 0 {
1867 let full = format!("{marker}{marker}");
1869 let full_len = display_len(&full, length_mode);
1870 if !is_adjacent_to_prev && current_length > 0 {
1871 current_line.push(' ');
1872 current_length += 1;
1873 }
1874 current_line.push_str(&full);
1875 current_length += full_len;
1876 } else {
1877 for (i, word) in words.iter().enumerate() {
1878 let is_first = i == 0;
1879 let is_last = i == n - 1;
1880 let word_str: String = match (is_first, is_last) {
1881 (true, true) => format!("{marker}{word}{marker}"),
1882 (true, false) => format!("{marker}{word}"),
1883 (false, true) => format!("{word}{marker}"),
1884 (false, false) => word.to_string(),
1885 };
1886 let word_len = display_len(&word_str, length_mode);
1887
1888 let needs_space = if is_first {
1889 !is_adjacent_to_prev && current_length > 0
1890 } else {
1891 current_length > 0
1892 };
1893
1894 if needs_space && current_length + 1 + word_len > options.line_length {
1895 lines.push(current_line.trim_end().to_string());
1896 current_line = word_str;
1897 current_length = word_len;
1898 current_line_element_spans.clear();
1899 } else {
1900 if needs_space {
1901 current_line.push(' ');
1902 current_length += 1;
1903 }
1904 current_line.push_str(&word_str);
1905 current_length += word_len;
1906 }
1907 }
1908 }
1909 } else {
1910 if is_adjacent_to_prev {
1914 if current_length + element_len > options.line_length {
1916 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1919 let before = current_line[..last_space].trim_end().to_string();
1920 let after = current_line[last_space + 1..].to_string();
1921 lines.push(before);
1922 current_line = format!("{after}{element_str}");
1923 current_length = display_len(¤t_line, length_mode);
1924 current_line_element_spans.clear();
1925 let start = after.len();
1927 current_line_element_spans.push((start, start + element_str.len()));
1928 } else {
1929 let start = current_line.len();
1931 current_line.push_str(&element_str);
1932 current_length += element_len;
1933 current_line_element_spans.push((start, current_line.len()));
1934 }
1935 } else {
1936 let start = current_line.len();
1937 current_line.push_str(&element_str);
1938 current_length += element_len;
1939 current_line_element_spans.push((start, current_line.len()));
1940 }
1941 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
1942 lines.push(current_line.trim().to_string());
1944 current_line = element_str.clone();
1945 current_length = element_len;
1946 current_line_element_spans.clear();
1947 current_line_element_spans.push((0, element_str.len()));
1948 } else {
1949 let ends_with_opener =
1951 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1952 if current_length > 0 && !ends_with_opener {
1953 current_line.push(' ');
1954 current_length += 1;
1955 }
1956 let start = current_line.len();
1957 current_line.push_str(&element_str);
1958 current_length += element_len;
1959 current_line_element_spans.push((start, current_line.len()));
1960 }
1961 }
1962 }
1963
1964 if !current_line.is_empty() {
1966 lines.push(current_line.trim_end().to_string());
1967 }
1968
1969 lines
1970}
1971
1972pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1974 let lines: Vec<&str> = content.lines().collect();
1975 let mut result = Vec::new();
1976 let mut i = 0;
1977
1978 while i < lines.len() {
1979 let line = lines[i];
1980 let trimmed = line.trim();
1981
1982 if trimmed.is_empty() {
1984 result.push(String::new());
1985 i += 1;
1986 continue;
1987 }
1988
1989 if trimmed.starts_with('#') {
1991 result.push(line.to_string());
1992 i += 1;
1993 continue;
1994 }
1995
1996 if trimmed.starts_with(":::") {
1998 result.push(line.to_string());
1999 i += 1;
2000 continue;
2001 }
2002
2003 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2005 result.push(line.to_string());
2006 i += 1;
2007 while i < lines.len() {
2009 result.push(lines[i].to_string());
2010 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2011 i += 1;
2012 break;
2013 }
2014 i += 1;
2015 }
2016 continue;
2017 }
2018
2019 if ElementCache::calculate_indentation_width_default(line) >= 4 {
2021 result.push(line.to_string());
2023 i += 1;
2024 while i < lines.len() {
2025 let next_line = lines[i];
2026 if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2028 result.push(next_line.to_string());
2029 i += 1;
2030 } else {
2031 break;
2032 }
2033 }
2034 continue;
2035 }
2036
2037 if trimmed.starts_with('>') {
2039 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2042 let quote_prefix = line[0..gt_pos + 1].to_string();
2043 let quote_content = &line[quote_prefix.len()..].trim_start();
2044
2045 let reflowed = reflow_line(quote_content, options);
2046 for reflowed_line in reflowed.iter() {
2047 result.push(format!("{quote_prefix} {reflowed_line}"));
2048 }
2049 i += 1;
2050 continue;
2051 }
2052
2053 if is_horizontal_rule(trimmed) {
2055 result.push(line.to_string());
2056 i += 1;
2057 continue;
2058 }
2059
2060 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2062 let indent = line.len() - line.trim_start().len();
2064 let indent_str = " ".repeat(indent);
2065
2066 let mut marker_end = indent;
2069 let mut content_start = indent;
2070
2071 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
2072 if let Some(period_pos) = line[indent..].find('.') {
2074 marker_end = indent + period_pos + 1; content_start = marker_end;
2076 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2080 content_start += 1;
2081 }
2082 }
2083 } else {
2084 marker_end = indent + 1; content_start = marker_end;
2087 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2091 content_start += 1;
2092 }
2093 }
2094
2095 let marker = &line[indent..marker_end];
2096
2097 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2100 i += 1;
2101
2102 while i < lines.len() {
2104 let next_line = lines[i];
2105 let next_trimmed = next_line.trim();
2106
2107 if is_block_boundary(next_trimmed) {
2109 break;
2110 }
2111
2112 let next_indent = next_line.len() - next_line.trim_start().len();
2114 if next_indent >= content_start {
2115 let trimmed_start = next_line.trim_start();
2118 list_content.push(trim_preserving_hard_break(trimmed_start));
2119 i += 1;
2120 } else {
2121 break;
2123 }
2124 }
2125
2126 let combined_content = if options.preserve_breaks {
2129 list_content[0].clone()
2130 } else {
2131 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2133 if has_hard_breaks {
2134 list_content.join("\n")
2136 } else {
2137 list_content.join(" ")
2139 }
2140 };
2141
2142 let trimmed_marker = marker;
2144 let continuation_spaces = content_start;
2145
2146 let prefix_length = indent + trimmed_marker.len() + 1;
2148
2149 let adjusted_options = ReflowOptions {
2151 line_length: options.line_length.saturating_sub(prefix_length),
2152 ..options.clone()
2153 };
2154
2155 let reflowed = reflow_line(&combined_content, &adjusted_options);
2156 for (j, reflowed_line) in reflowed.iter().enumerate() {
2157 if j == 0 {
2158 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2159 } else {
2160 let continuation_indent = " ".repeat(continuation_spaces);
2162 result.push(format!("{continuation_indent}{reflowed_line}"));
2163 }
2164 }
2165 continue;
2166 }
2167
2168 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2170 result.push(line.to_string());
2171 i += 1;
2172 continue;
2173 }
2174
2175 if trimmed.starts_with('[') && line.contains("]:") {
2177 result.push(line.to_string());
2178 i += 1;
2179 continue;
2180 }
2181
2182 if is_definition_list_item(trimmed) {
2184 result.push(line.to_string());
2185 i += 1;
2186 continue;
2187 }
2188
2189 let mut is_single_line_paragraph = true;
2191 if i + 1 < lines.len() {
2192 let next_trimmed = lines[i + 1].trim();
2193 if !is_block_boundary(next_trimmed) {
2195 is_single_line_paragraph = false;
2196 }
2197 }
2198
2199 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2201 result.push(line.to_string());
2202 i += 1;
2203 continue;
2204 }
2205
2206 let mut paragraph_parts = Vec::new();
2208 let mut current_part = vec![line];
2209 i += 1;
2210
2211 if options.preserve_breaks {
2213 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2215 Some("\\")
2216 } else if line.ends_with(" ") {
2217 Some(" ")
2218 } else {
2219 None
2220 };
2221 let reflowed = reflow_line(line, options);
2222
2223 if let Some(break_marker) = hard_break_type {
2225 if !reflowed.is_empty() {
2226 let mut reflowed_with_break = reflowed;
2227 let last_idx = reflowed_with_break.len() - 1;
2228 if !has_hard_break(&reflowed_with_break[last_idx]) {
2229 reflowed_with_break[last_idx].push_str(break_marker);
2230 }
2231 result.extend(reflowed_with_break);
2232 }
2233 } else {
2234 result.extend(reflowed);
2235 }
2236 } else {
2237 while i < lines.len() {
2239 let prev_line = if !current_part.is_empty() {
2240 current_part.last().unwrap()
2241 } else {
2242 ""
2243 };
2244 let next_line = lines[i];
2245 let next_trimmed = next_line.trim();
2246
2247 if is_block_boundary(next_trimmed) {
2249 break;
2250 }
2251
2252 let prev_trimmed = prev_line.trim();
2255 let abbreviations = get_abbreviations(&options.abbreviations);
2256 let ends_with_sentence = (prev_trimmed.ends_with('.')
2257 || prev_trimmed.ends_with('!')
2258 || prev_trimmed.ends_with('?')
2259 || prev_trimmed.ends_with(".*")
2260 || prev_trimmed.ends_with("!*")
2261 || prev_trimmed.ends_with("?*")
2262 || prev_trimmed.ends_with("._")
2263 || prev_trimmed.ends_with("!_")
2264 || prev_trimmed.ends_with("?_")
2265 || prev_trimmed.ends_with(".\"")
2267 || prev_trimmed.ends_with("!\"")
2268 || prev_trimmed.ends_with("?\"")
2269 || prev_trimmed.ends_with(".'")
2270 || prev_trimmed.ends_with("!'")
2271 || prev_trimmed.ends_with("?'")
2272 || prev_trimmed.ends_with(".\u{201D}")
2273 || prev_trimmed.ends_with("!\u{201D}")
2274 || prev_trimmed.ends_with("?\u{201D}")
2275 || prev_trimmed.ends_with(".\u{2019}")
2276 || prev_trimmed.ends_with("!\u{2019}")
2277 || prev_trimmed.ends_with("?\u{2019}"))
2278 && !text_ends_with_abbreviation(
2279 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2280 &abbreviations,
2281 );
2282
2283 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2284 paragraph_parts.push(current_part.join(" "));
2286 current_part = vec![next_line];
2287 } else {
2288 current_part.push(next_line);
2289 }
2290 i += 1;
2291 }
2292
2293 if !current_part.is_empty() {
2295 if current_part.len() == 1 {
2296 paragraph_parts.push(current_part[0].to_string());
2298 } else {
2299 paragraph_parts.push(current_part.join(" "));
2300 }
2301 }
2302
2303 for (j, part) in paragraph_parts.iter().enumerate() {
2305 let reflowed = reflow_line(part, options);
2306 result.extend(reflowed);
2307
2308 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2312 let last_idx = result.len() - 1;
2313 if !has_hard_break(&result[last_idx]) {
2314 result[last_idx].push_str(" ");
2315 }
2316 }
2317 }
2318 }
2319 }
2320
2321 let result_text = result.join("\n");
2323 if content.ends_with('\n') && !result_text.ends_with('\n') {
2324 format!("{result_text}\n")
2325 } else {
2326 result_text
2327 }
2328}
2329
2330#[derive(Debug, Clone)]
2332pub struct ParagraphReflow {
2333 pub start_byte: usize,
2335 pub end_byte: usize,
2337 pub reflowed_text: String,
2339}
2340
2341#[derive(Debug, Clone)]
2347pub struct BlockquoteLineData {
2348 pub(crate) content: String,
2350 pub(crate) is_explicit: bool,
2352 pub(crate) prefix: Option<String>,
2354}
2355
2356impl BlockquoteLineData {
2357 pub fn explicit(content: String, prefix: String) -> Self {
2359 Self {
2360 content,
2361 is_explicit: true,
2362 prefix: Some(prefix),
2363 }
2364 }
2365
2366 pub fn lazy(content: String) -> Self {
2368 Self {
2369 content,
2370 is_explicit: false,
2371 prefix: None,
2372 }
2373 }
2374}
2375
2376#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2378pub enum BlockquoteContinuationStyle {
2379 Explicit,
2380 Lazy,
2381}
2382
2383pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2391 let mut explicit_count = 0usize;
2392 let mut lazy_count = 0usize;
2393
2394 for line in lines.iter().skip(1) {
2395 if line.is_explicit {
2396 explicit_count += 1;
2397 } else {
2398 lazy_count += 1;
2399 }
2400 }
2401
2402 if explicit_count > 0 && lazy_count == 0 {
2403 BlockquoteContinuationStyle::Explicit
2404 } else if lazy_count > 0 && explicit_count == 0 {
2405 BlockquoteContinuationStyle::Lazy
2406 } else if explicit_count >= lazy_count {
2407 BlockquoteContinuationStyle::Explicit
2408 } else {
2409 BlockquoteContinuationStyle::Lazy
2410 }
2411}
2412
2413pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2418 let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2419
2420 for (idx, line) in lines.iter().enumerate() {
2421 let Some(prefix) = line.prefix.as_ref() else {
2422 continue;
2423 };
2424 counts
2425 .entry(prefix.clone())
2426 .and_modify(|entry| entry.0 += 1)
2427 .or_insert((1, idx));
2428 }
2429
2430 counts
2431 .into_iter()
2432 .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2433 count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2434 })
2435 .map(|(prefix, _)| prefix)
2436 .unwrap_or_else(|| fallback.to_string())
2437}
2438
2439pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2444 let trimmed = content_line.trim_start();
2445 trimmed.starts_with('>')
2446 || trimmed.starts_with('#')
2447 || trimmed.starts_with("```")
2448 || trimmed.starts_with("~~~")
2449 || is_unordered_list_marker(trimmed)
2450 || is_numbered_list_item(trimmed)
2451 || is_horizontal_rule(trimmed)
2452 || is_definition_list_item(trimmed)
2453 || (trimmed.starts_with('[') && trimmed.contains("]:"))
2454 || trimmed.starts_with(":::")
2455 || (trimmed.starts_with('<')
2456 && !trimmed.starts_with("<http")
2457 && !trimmed.starts_with("<https")
2458 && !trimmed.starts_with("<mailto:"))
2459}
2460
2461pub fn reflow_blockquote_content(
2470 lines: &[BlockquoteLineData],
2471 explicit_prefix: &str,
2472 continuation_style: BlockquoteContinuationStyle,
2473 options: &ReflowOptions,
2474) -> Vec<String> {
2475 let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2476 let segments = split_into_segments_strs(&content_strs);
2477 let mut reflowed_content_lines: Vec<String> = Vec::new();
2478
2479 for segment in segments {
2480 let hard_break_type = segment.last().and_then(|&line| {
2481 let line = line.strip_suffix('\r').unwrap_or(line);
2482 if line.ends_with('\\') {
2483 Some("\\")
2484 } else if line.ends_with(" ") {
2485 Some(" ")
2486 } else {
2487 None
2488 }
2489 });
2490
2491 let pieces: Vec<&str> = segment
2492 .iter()
2493 .map(|&line| {
2494 if let Some(l) = line.strip_suffix('\\') {
2495 l.trim_end()
2496 } else if let Some(l) = line.strip_suffix(" ") {
2497 l.trim_end()
2498 } else {
2499 line.trim_end()
2500 }
2501 })
2502 .collect();
2503
2504 let segment_text = pieces.join(" ");
2505 let segment_text = segment_text.trim();
2506 if segment_text.is_empty() {
2507 continue;
2508 }
2509
2510 let mut reflowed = reflow_line(segment_text, options);
2511 if let Some(break_marker) = hard_break_type
2512 && !reflowed.is_empty()
2513 {
2514 let last_idx = reflowed.len() - 1;
2515 if !has_hard_break(&reflowed[last_idx]) {
2516 reflowed[last_idx].push_str(break_marker);
2517 }
2518 }
2519 reflowed_content_lines.extend(reflowed);
2520 }
2521
2522 let mut styled_lines: Vec<String> = Vec::new();
2523 for (idx, line) in reflowed_content_lines.iter().enumerate() {
2524 let force_explicit = idx == 0
2525 || continuation_style == BlockquoteContinuationStyle::Explicit
2526 || should_force_explicit_blockquote_line(line);
2527 if force_explicit {
2528 styled_lines.push(format!("{explicit_prefix}{line}"));
2529 } else {
2530 styled_lines.push(line.clone());
2531 }
2532 }
2533
2534 styled_lines
2535}
2536
2537fn is_blockquote_content_boundary(content: &str) -> bool {
2538 let trimmed = content.trim();
2539 trimmed.is_empty()
2540 || is_block_boundary(trimmed)
2541 || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2542 || trimmed.starts_with(":::")
2543 || crate::utils::is_template_directive_only(content)
2544 || is_standalone_attr_list(content)
2545 || is_snippet_block_delimiter(content)
2546}
2547
2548fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2549 let mut segments = Vec::new();
2550 let mut current = Vec::new();
2551
2552 for &line in lines {
2553 current.push(line);
2554 if has_hard_break(line) {
2555 segments.push(current);
2556 current = Vec::new();
2557 }
2558 }
2559
2560 if !current.is_empty() {
2561 segments.push(current);
2562 }
2563
2564 segments
2565}
2566
2567fn reflow_blockquote_paragraph_at_line(
2568 content: &str,
2569 lines: &[&str],
2570 target_idx: usize,
2571 options: &ReflowOptions,
2572) -> Option<ParagraphReflow> {
2573 let mut anchor_idx = target_idx;
2574 let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2575 parsed.nesting_level
2576 } else {
2577 let mut found = None;
2578 let mut idx = target_idx;
2579 loop {
2580 if lines[idx].trim().is_empty() {
2581 break;
2582 }
2583 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2584 found = Some((idx, parsed.nesting_level));
2585 break;
2586 }
2587 if idx == 0 {
2588 break;
2589 }
2590 idx -= 1;
2591 }
2592 let (idx, level) = found?;
2593 anchor_idx = idx;
2594 level
2595 };
2596
2597 let mut para_start = anchor_idx;
2599 while para_start > 0 {
2600 let prev_idx = para_start - 1;
2601 let prev_line = lines[prev_idx];
2602
2603 if prev_line.trim().is_empty() {
2604 break;
2605 }
2606
2607 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2608 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2609 break;
2610 }
2611 para_start = prev_idx;
2612 continue;
2613 }
2614
2615 let prev_lazy = prev_line.trim_start();
2616 if is_blockquote_content_boundary(prev_lazy) {
2617 break;
2618 }
2619 para_start = prev_idx;
2620 }
2621
2622 while para_start < lines.len() {
2624 let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
2625 para_start += 1;
2626 continue;
2627 };
2628 target_level = parsed.nesting_level;
2629 break;
2630 }
2631
2632 if para_start >= lines.len() || para_start > target_idx {
2633 return None;
2634 }
2635
2636 let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
2639 let mut idx = para_start;
2640 while idx < lines.len() {
2641 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
2642 break;
2643 }
2644
2645 let line = lines[idx];
2646 if line.trim().is_empty() {
2647 break;
2648 }
2649
2650 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
2651 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2652 break;
2653 }
2654 collected.push((
2655 idx,
2656 BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
2657 ));
2658 idx += 1;
2659 continue;
2660 }
2661
2662 let lazy_content = line.trim_start();
2663 if is_blockquote_content_boundary(lazy_content) {
2664 break;
2665 }
2666
2667 collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
2668 idx += 1;
2669 }
2670
2671 if collected.is_empty() {
2672 return None;
2673 }
2674
2675 let para_end = collected[collected.len() - 1].0;
2676 if target_idx < para_start || target_idx > para_end {
2677 return None;
2678 }
2679
2680 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
2681
2682 let fallback_prefix = line_data
2683 .iter()
2684 .find_map(|d| d.prefix.clone())
2685 .unwrap_or_else(|| "> ".to_string());
2686 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
2687 let continuation_style = blockquote_continuation_style(&line_data);
2688
2689 let adjusted_line_length = options
2690 .line_length
2691 .saturating_sub(display_len(&explicit_prefix, options.length_mode))
2692 .max(1);
2693
2694 let adjusted_options = ReflowOptions {
2695 line_length: adjusted_line_length,
2696 ..options.clone()
2697 };
2698
2699 let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
2700
2701 if styled_lines.is_empty() {
2702 return None;
2703 }
2704
2705 let mut start_byte = 0;
2707 for line in lines.iter().take(para_start) {
2708 start_byte += line.len() + 1;
2709 }
2710
2711 let mut end_byte = start_byte;
2712 for line in lines.iter().take(para_end + 1).skip(para_start) {
2713 end_byte += line.len() + 1;
2714 }
2715
2716 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2717 if !includes_trailing_newline {
2718 end_byte -= 1;
2719 }
2720
2721 let reflowed_joined = styled_lines.join("\n");
2722 let reflowed_text = if includes_trailing_newline {
2723 if reflowed_joined.ends_with('\n') {
2724 reflowed_joined
2725 } else {
2726 format!("{reflowed_joined}\n")
2727 }
2728 } else if reflowed_joined.ends_with('\n') {
2729 reflowed_joined.trim_end_matches('\n').to_string()
2730 } else {
2731 reflowed_joined
2732 };
2733
2734 Some(ParagraphReflow {
2735 start_byte,
2736 end_byte,
2737 reflowed_text,
2738 })
2739}
2740
2741pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2759 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
2760}
2761
2762pub fn reflow_paragraph_at_line_with_mode(
2764 content: &str,
2765 line_number: usize,
2766 line_length: usize,
2767 length_mode: ReflowLengthMode,
2768) -> Option<ParagraphReflow> {
2769 let options = ReflowOptions {
2770 line_length,
2771 length_mode,
2772 ..Default::default()
2773 };
2774 reflow_paragraph_at_line_with_options(content, line_number, &options)
2775}
2776
2777pub fn reflow_paragraph_at_line_with_options(
2788 content: &str,
2789 line_number: usize,
2790 options: &ReflowOptions,
2791) -> Option<ParagraphReflow> {
2792 if line_number == 0 {
2793 return None;
2794 }
2795
2796 let lines: Vec<&str> = content.lines().collect();
2797
2798 if line_number > lines.len() {
2800 return None;
2801 }
2802
2803 let target_idx = line_number - 1; let target_line = lines[target_idx];
2805 let trimmed = target_line.trim();
2806
2807 if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
2810 return Some(blockquote_reflow);
2811 }
2812
2813 if is_paragraph_boundary(trimmed, target_line) {
2815 return None;
2816 }
2817
2818 let mut para_start = target_idx;
2820 while para_start > 0 {
2821 let prev_idx = para_start - 1;
2822 let prev_line = lines[prev_idx];
2823 let prev_trimmed = prev_line.trim();
2824
2825 if is_paragraph_boundary(prev_trimmed, prev_line) {
2827 break;
2828 }
2829
2830 para_start = prev_idx;
2831 }
2832
2833 let mut para_end = target_idx;
2835 while para_end + 1 < lines.len() {
2836 let next_idx = para_end + 1;
2837 let next_line = lines[next_idx];
2838 let next_trimmed = next_line.trim();
2839
2840 if is_paragraph_boundary(next_trimmed, next_line) {
2842 break;
2843 }
2844
2845 para_end = next_idx;
2846 }
2847
2848 let paragraph_lines = &lines[para_start..=para_end];
2850
2851 let mut start_byte = 0;
2853 for line in lines.iter().take(para_start) {
2854 start_byte += line.len() + 1; }
2856
2857 let mut end_byte = start_byte;
2858 for line in paragraph_lines.iter() {
2859 end_byte += line.len() + 1; }
2861
2862 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2865
2866 if !includes_trailing_newline {
2868 end_byte -= 1;
2869 }
2870
2871 let paragraph_text = paragraph_lines.join("\n");
2873
2874 let reflowed = reflow_markdown(¶graph_text, options);
2876
2877 let reflowed_text = if includes_trailing_newline {
2881 if reflowed.ends_with('\n') {
2883 reflowed
2884 } else {
2885 format!("{reflowed}\n")
2886 }
2887 } else {
2888 if reflowed.ends_with('\n') {
2890 reflowed.trim_end_matches('\n').to_string()
2891 } else {
2892 reflowed
2893 }
2894 };
2895
2896 Some(ParagraphReflow {
2897 start_byte,
2898 end_byte,
2899 reflowed_text,
2900 })
2901}
2902
2903#[cfg(test)]
2904mod tests {
2905 use super::*;
2906
2907 #[test]
2912 fn test_helper_function_text_ends_with_abbreviation() {
2913 let abbreviations = get_abbreviations(&None);
2915
2916 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2918 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2919 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2920 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2921 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2922 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2923 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2924 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2925
2926 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2928 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2929 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2930 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2931 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2932 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
2938
2939 #[test]
2940 fn test_is_unordered_list_marker() {
2941 assert!(is_unordered_list_marker("- item"));
2943 assert!(is_unordered_list_marker("* item"));
2944 assert!(is_unordered_list_marker("+ item"));
2945 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
2947 assert!(is_unordered_list_marker("+"));
2948
2949 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
2960
2961 #[test]
2962 fn test_is_block_boundary() {
2963 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
2985 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
2988 }
2989
2990 #[test]
2991 fn test_definition_list_boundary_in_single_line_paragraph() {
2992 let options = ReflowOptions {
2995 line_length: 80,
2996 ..Default::default()
2997 };
2998 let input = "Term\n: Definition of the term";
2999 let result = reflow_markdown(input, &options);
3000 assert!(
3002 result.contains(": Definition"),
3003 "Definition list item should not be merged into previous line. Got: {result:?}"
3004 );
3005 let lines: Vec<&str> = result.lines().collect();
3006 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3007 assert_eq!(lines[0], "Term");
3008 assert_eq!(lines[1], ": Definition of the term");
3009 }
3010
3011 #[test]
3012 fn test_is_paragraph_boundary() {
3013 assert!(is_paragraph_boundary("# Heading", "# Heading"));
3015 assert!(is_paragraph_boundary("- item", "- item"));
3016 assert!(is_paragraph_boundary(":::", ":::"));
3017 assert!(is_paragraph_boundary(": definition", ": definition"));
3018
3019 assert!(is_paragraph_boundary("code", " code"));
3021 assert!(is_paragraph_boundary("code", "\tcode"));
3022
3023 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3025 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
3029 assert!(!is_paragraph_boundary("text", " text")); }
3031
3032 #[test]
3033 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3034 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3037 let result = reflow_paragraph_at_line(content, 3, 80);
3039 assert!(result.is_none(), "Div marker line should not be reflowed");
3040 }
3041}