1use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11 DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12 HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
13 LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
14 REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17 get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18 text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27 Chars,
29 #[default]
31 Visual,
32 Bytes,
34}
35
36fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38 match mode {
39 ReflowLengthMode::Chars => s.chars().count(),
40 ReflowLengthMode::Visual => s.width(),
41 ReflowLengthMode::Bytes => s.len(),
42 }
43}
44
45#[derive(Clone)]
47pub struct ReflowOptions {
48 pub line_length: usize,
50 pub break_on_sentences: bool,
52 pub preserve_breaks: bool,
54 pub sentence_per_line: bool,
56 pub semantic_line_breaks: bool,
58 pub abbreviations: Option<Vec<String>>,
62 pub length_mode: ReflowLengthMode,
64 pub attr_lists: bool,
67}
68
69impl Default for ReflowOptions {
70 fn default() -> Self {
71 Self {
72 line_length: 80,
73 break_on_sentences: true,
74 preserve_breaks: false,
75 sentence_per_line: false,
76 semantic_line_breaks: false,
77 abbreviations: None,
78 length_mode: ReflowLengthMode::default(),
79 attr_lists: false,
80 }
81 }
82}
83
84fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
88 let chars: Vec<char> = text.chars().collect();
89
90 if pos + 1 >= chars.len() {
91 return false;
92 }
93
94 let c = chars[pos];
95 let next_char = chars[pos + 1];
96
97 if is_cjk_sentence_ending(c) {
100 let mut after_punct_pos = pos + 1;
102 while after_punct_pos < chars.len()
103 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
104 {
105 after_punct_pos += 1;
106 }
107
108 while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
110 after_punct_pos += 1;
111 }
112
113 if after_punct_pos >= chars.len() {
115 return false;
116 }
117
118 while after_punct_pos < chars.len()
120 && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
121 {
122 after_punct_pos += 1;
123 }
124
125 if after_punct_pos >= chars.len() {
126 return false;
127 }
128
129 return true;
132 }
133
134 if c != '.' && c != '!' && c != '?' {
136 return false;
137 }
138
139 let (_space_pos, after_space_pos) = if next_char == ' ' {
141 (pos + 1, pos + 2)
143 } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
144 if chars[pos + 2] == ' ' {
146 (pos + 2, pos + 3)
148 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
149 (pos + 3, pos + 4)
151 } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
152 && pos + 4 < chars.len()
153 && chars[pos + 3] == chars[pos + 2]
154 && chars[pos + 4] == ' '
155 {
156 (pos + 4, pos + 5)
158 } else {
159 return false;
160 }
161 } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
162 (pos + 2, pos + 3)
164 } else if (next_char == '*' || next_char == '_')
165 && pos + 3 < chars.len()
166 && chars[pos + 2] == next_char
167 && chars[pos + 3] == ' '
168 {
169 (pos + 3, pos + 4)
171 } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
172 (pos + 3, pos + 4)
174 } else {
175 return false;
176 };
177
178 let mut next_char_pos = after_space_pos;
180 while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
181 next_char_pos += 1;
182 }
183
184 if next_char_pos >= chars.len() {
186 return false;
187 }
188
189 let mut first_letter_pos = next_char_pos;
191 while first_letter_pos < chars.len()
192 && (chars[first_letter_pos] == '*'
193 || chars[first_letter_pos] == '_'
194 || chars[first_letter_pos] == '~'
195 || is_opening_quote(chars[first_letter_pos]))
196 {
197 first_letter_pos += 1;
198 }
199
200 if first_letter_pos >= chars.len() {
202 return false;
203 }
204
205 let first_char = chars[first_letter_pos];
207 if !first_char.is_uppercase() && !is_cjk_char(first_char) {
208 return false;
209 }
210
211 if pos > 0 && c == '.' {
213 let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
215 if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
216 return false;
217 }
218
219 if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
222 return false;
223 }
224 }
225 true
226}
227
228pub fn split_into_sentences(text: &str) -> Vec<String> {
230 split_into_sentences_custom(text, &None)
231}
232
233pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
235 let abbreviations = get_abbreviations(custom_abbreviations);
236 split_into_sentences_with_set(text, &abbreviations)
237}
238
239fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
242 let mut sentences = Vec::new();
243 let mut current_sentence = String::new();
244 let mut chars = text.chars().peekable();
245 let mut pos = 0;
246
247 while let Some(c) = chars.next() {
248 current_sentence.push(c);
249
250 if is_sentence_boundary(text, pos, abbreviations) {
251 while let Some(&next) = chars.peek() {
253 if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
254 current_sentence.push(chars.next().unwrap());
255 pos += 1;
256 } else {
257 break;
258 }
259 }
260
261 if chars.peek() == Some(&' ') {
263 chars.next();
264 pos += 1;
265 }
266
267 sentences.push(current_sentence.trim().to_string());
268 current_sentence.clear();
269 }
270
271 pos += 1;
272 }
273
274 if !current_sentence.trim().is_empty() {
276 sentences.push(current_sentence.trim().to_string());
277 }
278 sentences
279}
280
281fn is_horizontal_rule(line: &str) -> bool {
283 if line.len() < 3 {
284 return false;
285 }
286
287 let chars: Vec<char> = line.chars().collect();
289 if chars.is_empty() {
290 return false;
291 }
292
293 let first_char = chars[0];
294 if first_char != '-' && first_char != '_' && first_char != '*' {
295 return false;
296 }
297
298 for c in &chars {
300 if *c != first_char && *c != ' ' {
301 return false;
302 }
303 }
304
305 let non_space_count = chars.iter().filter(|c| **c != ' ').count();
307 non_space_count >= 3
308}
309
310fn is_numbered_list_item(line: &str) -> bool {
312 let mut chars = line.chars();
313
314 if !chars.next().is_some_and(|c| c.is_numeric()) {
316 return false;
317 }
318
319 while let Some(c) = chars.next() {
321 if c == '.' {
322 return chars.next() == Some(' ');
325 }
326 if !c.is_numeric() {
327 return false;
328 }
329 }
330
331 false
332}
333
334fn is_unordered_list_marker(s: &str) -> bool {
336 matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
337 && !is_horizontal_rule(s)
338 && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
339}
340
341fn is_block_boundary_core(trimmed: &str) -> bool {
344 trimmed.is_empty()
345 || trimmed.starts_with('#')
346 || trimmed.starts_with("```")
347 || trimmed.starts_with("~~~")
348 || trimmed.starts_with('>')
349 || (trimmed.starts_with('[') && trimmed.contains("]:"))
350 || is_horizontal_rule(trimmed)
351 || is_unordered_list_marker(trimmed)
352 || is_numbered_list_item(trimmed)
353 || is_definition_list_item(trimmed)
354 || trimmed.starts_with(":::")
355}
356
357fn is_block_boundary(trimmed: &str) -> bool {
360 is_block_boundary_core(trimmed) || trimmed.starts_with('|')
361}
362
363fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
367 is_block_boundary_core(trimmed)
368 || ElementCache::calculate_indentation_width_default(line) >= 4
369 || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
370}
371
372fn has_hard_break(line: &str) -> bool {
378 let line = line.strip_suffix('\r').unwrap_or(line);
379 line.ends_with(" ") || line.ends_with('\\')
380}
381
382fn ends_with_sentence_punct(text: &str) -> bool {
384 text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
385}
386
387fn trim_preserving_hard_break(s: &str) -> String {
393 let s = s.strip_suffix('\r').unwrap_or(s);
395
396 if s.ends_with('\\') {
398 return s.to_string();
400 }
401
402 if s.ends_with(" ") {
404 let content_end = s.trim_end().len();
406 if content_end == 0 {
407 return String::new();
409 }
410 format!("{} ", &s[..content_end])
412 } else {
413 s.trim_end().to_string()
415 }
416}
417
418fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
420 if options.attr_lists {
421 parse_markdown_elements_with_attr_lists(text)
422 } else {
423 parse_markdown_elements(text)
424 }
425}
426
427pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
428 if options.sentence_per_line {
430 let elements = parse_elements(line, options);
431 return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
432 }
433
434 if options.semantic_line_breaks {
436 let elements = parse_elements(line, options);
437 return reflow_elements_semantic(&elements, options);
438 }
439
440 if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
443 return vec![line.to_string()];
444 }
445
446 let elements = parse_elements(line, options);
448
449 reflow_elements(&elements, options)
451}
452
453#[derive(Debug, Clone)]
455enum LinkedImageSource {
456 Inline(String),
458 Reference(String),
460}
461
462#[derive(Debug, Clone)]
464enum LinkedImageTarget {
465 Inline(String),
467 Reference(String),
469}
470
471#[derive(Debug, Clone)]
473enum Element {
474 Text(String),
476 Link { text: String, url: String },
478 ReferenceLink { text: String, reference: String },
480 EmptyReferenceLink { text: String },
482 ShortcutReference { reference: String },
484 InlineImage { alt: String, url: String },
486 ReferenceImage { alt: String, reference: String },
488 EmptyReferenceImage { alt: String },
490 LinkedImage {
496 alt: String,
497 img_source: LinkedImageSource,
498 link_target: LinkedImageTarget,
499 },
500 FootnoteReference { note: String },
502 Strikethrough(String),
504 WikiLink(String),
506 InlineMath(String),
508 DisplayMath(String),
510 EmojiShortcode(String),
512 Autolink(String),
514 HtmlTag(String),
516 HtmlEntity(String),
518 HugoShortcode(String),
520 AttrList(String),
522 Code(String),
524 Bold {
526 content: String,
527 underscore: bool,
529 },
530 Italic {
532 content: String,
533 underscore: bool,
535 },
536}
537
538impl std::fmt::Display for Element {
539 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
540 match self {
541 Element::Text(s) => write!(f, "{s}"),
542 Element::Link { text, url } => write!(f, "[{text}]({url})"),
543 Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
544 Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
545 Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
546 Element::InlineImage { alt, url } => write!(f, ""),
547 Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
548 Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
549 Element::LinkedImage {
550 alt,
551 img_source,
552 link_target,
553 } => {
554 let img_part = match img_source {
556 LinkedImageSource::Inline(url) => format!(""),
557 LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
558 };
559 match link_target {
561 LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
562 LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
563 }
564 }
565 Element::FootnoteReference { note } => write!(f, "[^{note}]"),
566 Element::Strikethrough(s) => write!(f, "~~{s}~~"),
567 Element::WikiLink(s) => write!(f, "[[{s}]]"),
568 Element::InlineMath(s) => write!(f, "${s}$"),
569 Element::DisplayMath(s) => write!(f, "$${s}$$"),
570 Element::EmojiShortcode(s) => write!(f, ":{s}:"),
571 Element::Autolink(s) => write!(f, "{s}"),
572 Element::HtmlTag(s) => write!(f, "{s}"),
573 Element::HtmlEntity(s) => write!(f, "{s}"),
574 Element::HugoShortcode(s) => write!(f, "{s}"),
575 Element::AttrList(s) => write!(f, "{s}"),
576 Element::Code(s) => write!(f, "`{s}`"),
577 Element::Bold { content, underscore } => {
578 if *underscore {
579 write!(f, "__{content}__")
580 } else {
581 write!(f, "**{content}**")
582 }
583 }
584 Element::Italic { content, underscore } => {
585 if *underscore {
586 write!(f, "_{content}_")
587 } else {
588 write!(f, "*{content}*")
589 }
590 }
591 }
592 }
593}
594
595impl Element {
596 fn display_width(&self, mode: ReflowLengthMode) -> usize {
600 let formatted = format!("{self}");
601 display_len(&formatted, mode)
602 }
603}
604
605#[derive(Debug, Clone)]
607struct EmphasisSpan {
608 start: usize,
610 end: usize,
612 content: String,
614 is_strong: bool,
616 is_strikethrough: bool,
618 uses_underscore: bool,
620}
621
622fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
632 let mut spans = Vec::new();
633 let mut options = Options::empty();
634 options.insert(Options::ENABLE_STRIKETHROUGH);
635
636 let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
639 let mut strikethrough_stack: Vec<usize> = Vec::new();
640
641 let parser = Parser::new_ext(text, options).into_offset_iter();
642
643 for (event, range) in parser {
644 match event {
645 Event::Start(Tag::Emphasis) => {
646 let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
648 emphasis_stack.push((range.start, uses_underscore));
649 }
650 Event::End(TagEnd::Emphasis) => {
651 if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
652 let content_start = start_byte + 1;
654 let content_end = range.end - 1;
655 if content_end > content_start
656 && let Some(content) = text.get(content_start..content_end)
657 {
658 spans.push(EmphasisSpan {
659 start: start_byte,
660 end: range.end,
661 content: content.to_string(),
662 is_strong: false,
663 is_strikethrough: false,
664 uses_underscore,
665 });
666 }
667 }
668 }
669 Event::Start(Tag::Strong) => {
670 let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
672 strong_stack.push((range.start, uses_underscore));
673 }
674 Event::End(TagEnd::Strong) => {
675 if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
676 let content_start = start_byte + 2;
678 let content_end = range.end - 2;
679 if content_end > content_start
680 && let Some(content) = text.get(content_start..content_end)
681 {
682 spans.push(EmphasisSpan {
683 start: start_byte,
684 end: range.end,
685 content: content.to_string(),
686 is_strong: true,
687 is_strikethrough: false,
688 uses_underscore,
689 });
690 }
691 }
692 }
693 Event::Start(Tag::Strikethrough) => {
694 strikethrough_stack.push(range.start);
695 }
696 Event::End(TagEnd::Strikethrough) => {
697 if let Some(start_byte) = strikethrough_stack.pop() {
698 let content_start = start_byte + 2;
700 let content_end = range.end - 2;
701 if content_end > content_start
702 && let Some(content) = text.get(content_start..content_end)
703 {
704 spans.push(EmphasisSpan {
705 start: start_byte,
706 end: range.end,
707 content: content.to_string(),
708 is_strong: false,
709 is_strikethrough: true,
710 uses_underscore: false,
711 });
712 }
713 }
714 }
715 _ => {}
716 }
717 }
718
719 spans.sort_by_key(|s| s.start);
721 spans
722}
723
724fn parse_markdown_elements(text: &str) -> Vec<Element> {
735 parse_markdown_elements_inner(text, false)
736}
737
738fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
739 parse_markdown_elements_inner(text, true)
740}
741
742fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
743 let mut elements = Vec::new();
744 let mut remaining = text;
745
746 let emphasis_spans = extract_emphasis_spans(text);
748
749 while !remaining.is_empty() {
750 let current_offset = text.len() - remaining.len();
752 let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
754
755 if remaining.contains("[!") {
759 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
761 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
762 {
763 earliest_match = Some((m.start(), "linked_image_ii", m));
764 }
765
766 if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
768 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
769 {
770 earliest_match = Some((m.start(), "linked_image_ri", m));
771 }
772
773 if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
775 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
776 {
777 earliest_match = Some((m.start(), "linked_image_ir", m));
778 }
779
780 if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
782 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
783 {
784 earliest_match = Some((m.start(), "linked_image_rr", m));
785 }
786 }
787
788 if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
791 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
792 {
793 earliest_match = Some((m.start(), "inline_image", m));
794 }
795
796 if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
798 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
799 {
800 earliest_match = Some((m.start(), "ref_image", m));
801 }
802
803 if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
805 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
806 {
807 earliest_match = Some((m.start(), "footnote_ref", m));
808 }
809
810 if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
812 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
813 {
814 earliest_match = Some((m.start(), "inline_link", m));
815 }
816
817 if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
819 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
820 {
821 earliest_match = Some((m.start(), "ref_link", m));
822 }
823
824 if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
827 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
828 {
829 earliest_match = Some((m.start(), "shortcut_ref", m));
830 }
831
832 if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
834 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
835 {
836 earliest_match = Some((m.start(), "wiki_link", m));
837 }
838
839 if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
841 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
842 {
843 earliest_match = Some((m.start(), "display_math", m));
844 }
845
846 if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
848 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
849 {
850 earliest_match = Some((m.start(), "inline_math", m));
851 }
852
853 if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
857 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
858 {
859 earliest_match = Some((m.start(), "emoji", m));
860 }
861
862 if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
864 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
865 {
866 earliest_match = Some((m.start(), "html_entity", m));
867 }
868
869 if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
872 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
873 {
874 earliest_match = Some((m.start(), "hugo_shortcode", m));
875 }
876
877 if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
880 && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
881 {
882 let matched_text = &remaining[m.start()..m.end()];
884 let is_url_autolink = matched_text.starts_with("<http://")
885 || matched_text.starts_with("<https://")
886 || matched_text.starts_with("<mailto:")
887 || matched_text.starts_with("<ftp://")
888 || matched_text.starts_with("<ftps://");
889
890 let is_email_autolink = {
893 let content = matched_text.trim_start_matches('<').trim_end_matches('>');
894 EMAIL_PATTERN.is_match(content)
895 };
896
897 if is_url_autolink || is_email_autolink {
898 earliest_match = Some((m.start(), "autolink", m));
899 } else {
900 earliest_match = Some((m.start(), "html_tag", m));
901 }
902 }
903
904 let mut next_special = remaining.len();
906 let mut special_type = "";
907 let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
908 let mut attr_list_len: usize = 0;
909
910 if let Some(pos) = remaining.find('`')
912 && pos < next_special
913 {
914 next_special = pos;
915 special_type = "code";
916 }
917
918 if attr_lists
920 && let Some(pos) = remaining.find('{')
921 && pos < next_special
922 && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
923 && m.start() == 0
924 {
925 next_special = pos;
926 special_type = "attr_list";
927 attr_list_len = m.end();
928 }
929
930 for span in &emphasis_spans {
933 if span.start >= current_offset && span.start < current_offset + remaining.len() {
934 let pos_in_remaining = span.start - current_offset;
935 if pos_in_remaining < next_special {
936 next_special = pos_in_remaining;
937 special_type = "pulldown_emphasis";
938 pulldown_emphasis = Some(span);
939 }
940 break; }
942 }
943
944 let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
946 pos < next_special
947 } else {
948 false
949 };
950
951 if should_process_markdown_link {
952 let (pos, pattern_type, match_obj) = earliest_match.unwrap();
953
954 if pos > 0 {
956 elements.push(Element::Text(remaining[..pos].to_string()));
957 }
958
959 match pattern_type {
961 "linked_image_ii" => {
963 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
964 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
965 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
966 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
967 elements.push(Element::LinkedImage {
968 alt: alt.to_string(),
969 img_source: LinkedImageSource::Inline(img_url.to_string()),
970 link_target: LinkedImageTarget::Inline(link_url.to_string()),
971 });
972 remaining = &remaining[match_obj.end()..];
973 } else {
974 elements.push(Element::Text("[".to_string()));
975 remaining = &remaining[1..];
976 }
977 }
978 "linked_image_ri" => {
980 if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
981 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
982 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
983 let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
984 elements.push(Element::LinkedImage {
985 alt: alt.to_string(),
986 img_source: LinkedImageSource::Reference(img_ref.to_string()),
987 link_target: LinkedImageTarget::Inline(link_url.to_string()),
988 });
989 remaining = &remaining[match_obj.end()..];
990 } else {
991 elements.push(Element::Text("[".to_string()));
992 remaining = &remaining[1..];
993 }
994 }
995 "linked_image_ir" => {
997 if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
998 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
999 let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1000 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1001 elements.push(Element::LinkedImage {
1002 alt: alt.to_string(),
1003 img_source: LinkedImageSource::Inline(img_url.to_string()),
1004 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1005 });
1006 remaining = &remaining[match_obj.end()..];
1007 } else {
1008 elements.push(Element::Text("[".to_string()));
1009 remaining = &remaining[1..];
1010 }
1011 }
1012 "linked_image_rr" => {
1014 if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
1015 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1016 let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1017 let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1018 elements.push(Element::LinkedImage {
1019 alt: alt.to_string(),
1020 img_source: LinkedImageSource::Reference(img_ref.to_string()),
1021 link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1022 });
1023 remaining = &remaining[match_obj.end()..];
1024 } else {
1025 elements.push(Element::Text("[".to_string()));
1026 remaining = &remaining[1..];
1027 }
1028 }
1029 "inline_image" => {
1030 if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
1031 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1032 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1033 elements.push(Element::InlineImage {
1034 alt: alt.to_string(),
1035 url: url.to_string(),
1036 });
1037 remaining = &remaining[match_obj.end()..];
1038 } else {
1039 elements.push(Element::Text("!".to_string()));
1040 remaining = &remaining[1..];
1041 }
1042 }
1043 "ref_image" => {
1044 if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
1045 let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1046 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1047
1048 if reference.is_empty() {
1049 elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1050 } else {
1051 elements.push(Element::ReferenceImage {
1052 alt: alt.to_string(),
1053 reference: reference.to_string(),
1054 });
1055 }
1056 remaining = &remaining[match_obj.end()..];
1057 } else {
1058 elements.push(Element::Text("!".to_string()));
1059 remaining = &remaining[1..];
1060 }
1061 }
1062 "footnote_ref" => {
1063 if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
1064 let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1065 elements.push(Element::FootnoteReference { note: note.to_string() });
1066 remaining = &remaining[match_obj.end()..];
1067 } else {
1068 elements.push(Element::Text("[".to_string()));
1069 remaining = &remaining[1..];
1070 }
1071 }
1072 "inline_link" => {
1073 if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1074 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1075 let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1076 elements.push(Element::Link {
1077 text: text.to_string(),
1078 url: url.to_string(),
1079 });
1080 remaining = &remaining[match_obj.end()..];
1081 } else {
1082 elements.push(Element::Text("[".to_string()));
1084 remaining = &remaining[1..];
1085 }
1086 }
1087 "ref_link" => {
1088 if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1089 let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1090 let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1091
1092 if reference.is_empty() {
1093 elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1095 } else {
1096 elements.push(Element::ReferenceLink {
1098 text: text.to_string(),
1099 reference: reference.to_string(),
1100 });
1101 }
1102 remaining = &remaining[match_obj.end()..];
1103 } else {
1104 elements.push(Element::Text("[".to_string()));
1106 remaining = &remaining[1..];
1107 }
1108 }
1109 "shortcut_ref" => {
1110 if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1111 let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1112 elements.push(Element::ShortcutReference {
1113 reference: reference.to_string(),
1114 });
1115 remaining = &remaining[match_obj.end()..];
1116 } else {
1117 elements.push(Element::Text("[".to_string()));
1119 remaining = &remaining[1..];
1120 }
1121 }
1122 "wiki_link" => {
1123 if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1124 let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1125 elements.push(Element::WikiLink(content.to_string()));
1126 remaining = &remaining[match_obj.end()..];
1127 } else {
1128 elements.push(Element::Text("[[".to_string()));
1129 remaining = &remaining[2..];
1130 }
1131 }
1132 "display_math" => {
1133 if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1134 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1135 elements.push(Element::DisplayMath(math.to_string()));
1136 remaining = &remaining[match_obj.end()..];
1137 } else {
1138 elements.push(Element::Text("$$".to_string()));
1139 remaining = &remaining[2..];
1140 }
1141 }
1142 "inline_math" => {
1143 if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1144 let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1145 elements.push(Element::InlineMath(math.to_string()));
1146 remaining = &remaining[match_obj.end()..];
1147 } else {
1148 elements.push(Element::Text("$".to_string()));
1149 remaining = &remaining[1..];
1150 }
1151 }
1152 "emoji" => {
1154 if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1155 let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1156 elements.push(Element::EmojiShortcode(emoji.to_string()));
1157 remaining = &remaining[match_obj.end()..];
1158 } else {
1159 elements.push(Element::Text(":".to_string()));
1160 remaining = &remaining[1..];
1161 }
1162 }
1163 "html_entity" => {
1164 elements.push(Element::HtmlEntity(match_obj.as_str().to_string()));
1166 remaining = &remaining[match_obj.end()..];
1167 }
1168 "hugo_shortcode" => {
1169 elements.push(Element::HugoShortcode(match_obj.as_str().to_string()));
1171 remaining = &remaining[match_obj.end()..];
1172 }
1173 "autolink" => {
1174 elements.push(Element::Autolink(match_obj.as_str().to_string()));
1176 remaining = &remaining[match_obj.end()..];
1177 }
1178 "html_tag" => {
1179 elements.push(Element::HtmlTag(match_obj.as_str().to_string()));
1181 remaining = &remaining[match_obj.end()..];
1182 }
1183 _ => {
1184 elements.push(Element::Text("[".to_string()));
1186 remaining = &remaining[1..];
1187 }
1188 }
1189 } else {
1190 if next_special > 0 && next_special < remaining.len() {
1194 elements.push(Element::Text(remaining[..next_special].to_string()));
1195 remaining = &remaining[next_special..];
1196 }
1197
1198 match special_type {
1200 "code" => {
1201 if let Some(code_end) = remaining[1..].find('`') {
1203 let code = &remaining[1..1 + code_end];
1204 elements.push(Element::Code(code.to_string()));
1205 remaining = &remaining[1 + code_end + 1..];
1206 } else {
1207 elements.push(Element::Text(remaining.to_string()));
1209 break;
1210 }
1211 }
1212 "attr_list" => {
1213 elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1214 remaining = &remaining[attr_list_len..];
1215 }
1216 "pulldown_emphasis" => {
1217 if let Some(span) = pulldown_emphasis {
1219 let span_len = span.end - span.start;
1220 if span.is_strikethrough {
1221 elements.push(Element::Strikethrough(span.content.clone()));
1222 } else if span.is_strong {
1223 elements.push(Element::Bold {
1224 content: span.content.clone(),
1225 underscore: span.uses_underscore,
1226 });
1227 } else {
1228 elements.push(Element::Italic {
1229 content: span.content.clone(),
1230 underscore: span.uses_underscore,
1231 });
1232 }
1233 remaining = &remaining[span_len..];
1234 } else {
1235 elements.push(Element::Text(remaining[..1].to_string()));
1237 remaining = &remaining[1..];
1238 }
1239 }
1240 _ => {
1241 elements.push(Element::Text(remaining.to_string()));
1243 break;
1244 }
1245 }
1246 }
1247 }
1248
1249 elements
1250}
1251
1252fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1254 let abbreviations = get_abbreviations(custom_abbreviations);
1255 let mut lines = Vec::new();
1256 let mut current_line = String::new();
1257
1258 for (idx, element) in elements.iter().enumerate() {
1259 let element_str = format!("{element}");
1260
1261 if let Element::Text(text) = element {
1263 let combined = format!("{current_line}{text}");
1265 let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1267
1268 if sentences.len() > 1 {
1269 for (i, sentence) in sentences.iter().enumerate() {
1271 if i == 0 {
1272 let trimmed = sentence.trim();
1275
1276 if text_ends_with_abbreviation(trimmed, &abbreviations) {
1277 current_line = sentence.to_string();
1279 } else {
1280 lines.push(sentence.to_string());
1282 current_line.clear();
1283 }
1284 } else if i == sentences.len() - 1 {
1285 let trimmed = sentence.trim();
1287 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1288
1289 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1290 lines.push(sentence.to_string());
1292 current_line.clear();
1293 } else {
1294 current_line = sentence.to_string();
1296 }
1297 } else {
1298 lines.push(sentence.to_string());
1300 }
1301 }
1302 } else {
1303 let trimmed = combined.trim();
1305
1306 if trimmed.is_empty() {
1310 continue;
1311 }
1312
1313 let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1314
1315 if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1316 lines.push(trimmed.to_string());
1318 current_line.clear();
1319 } else {
1320 current_line = combined;
1322 }
1323 }
1324 } else if let Element::Italic { content, underscore } = element {
1325 let marker = if *underscore { "_" } else { "*" };
1327 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1328 } else if let Element::Bold { content, underscore } = element {
1329 let marker = if *underscore { "__" } else { "**" };
1331 handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1332 } else if let Element::Strikethrough(content) = element {
1333 handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1335 } else {
1336 let is_adjacent = if idx > 0 {
1339 match &elements[idx - 1] {
1340 Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1341 _ => true,
1342 }
1343 } else {
1344 false
1345 };
1346
1347 if !is_adjacent
1349 && !current_line.is_empty()
1350 && !current_line.ends_with(' ')
1351 && !current_line.ends_with('(')
1352 && !current_line.ends_with('[')
1353 {
1354 current_line.push(' ');
1355 }
1356 current_line.push_str(&element_str);
1357 }
1358 }
1359
1360 if !current_line.is_empty() {
1362 lines.push(current_line.trim().to_string());
1363 }
1364 lines
1365}
1366
1367fn handle_emphasis_sentence_split(
1369 content: &str,
1370 marker: &str,
1371 abbreviations: &HashSet<String>,
1372 current_line: &mut String,
1373 lines: &mut Vec<String>,
1374) {
1375 let sentences = split_into_sentences_with_set(content, abbreviations);
1377
1378 if sentences.len() <= 1 {
1379 if !current_line.is_empty()
1381 && !current_line.ends_with(' ')
1382 && !current_line.ends_with('(')
1383 && !current_line.ends_with('[')
1384 {
1385 current_line.push(' ');
1386 }
1387 current_line.push_str(marker);
1388 current_line.push_str(content);
1389 current_line.push_str(marker);
1390
1391 let trimmed = content.trim();
1393 let ends_with_punct = ends_with_sentence_punct(trimmed);
1394 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1395 lines.push(current_line.clone());
1396 current_line.clear();
1397 }
1398 } else {
1399 for (i, sentence) in sentences.iter().enumerate() {
1401 let trimmed = sentence.trim();
1402 if trimmed.is_empty() {
1403 continue;
1404 }
1405
1406 if i == 0 {
1407 if !current_line.is_empty()
1409 && !current_line.ends_with(' ')
1410 && !current_line.ends_with('(')
1411 && !current_line.ends_with('[')
1412 {
1413 current_line.push(' ');
1414 }
1415 current_line.push_str(marker);
1416 current_line.push_str(trimmed);
1417 current_line.push_str(marker);
1418
1419 let ends_with_punct = ends_with_sentence_punct(trimmed);
1421 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1422 lines.push(current_line.clone());
1423 current_line.clear();
1424 }
1425 } else if i == sentences.len() - 1 {
1426 let ends_with_punct = ends_with_sentence_punct(trimmed);
1428
1429 let mut line = String::new();
1430 line.push_str(marker);
1431 line.push_str(trimmed);
1432 line.push_str(marker);
1433
1434 if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1435 lines.push(line);
1436 } else {
1437 *current_line = line;
1439 }
1440 } else {
1441 let mut line = String::new();
1443 line.push_str(marker);
1444 line.push_str(trimmed);
1445 line.push_str(marker);
1446 lines.push(line);
1447 }
1448 }
1449 }
1450}
1451
1452const BREAK_WORDS: &[&str] = &[
1456 "and",
1457 "or",
1458 "but",
1459 "nor",
1460 "yet",
1461 "so",
1462 "for",
1463 "which",
1464 "that",
1465 "because",
1466 "when",
1467 "if",
1468 "while",
1469 "where",
1470 "although",
1471 "though",
1472 "unless",
1473 "since",
1474 "after",
1475 "before",
1476 "until",
1477 "as",
1478 "once",
1479 "whether",
1480 "however",
1481 "therefore",
1482 "moreover",
1483 "furthermore",
1484 "nevertheless",
1485 "whereas",
1486];
1487
1488fn is_clause_punctuation(c: char) -> bool {
1490 matches!(c, ',' | ';' | ':' | '\u{2014}') }
1492
1493fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1497 let mut spans = Vec::new();
1498 let mut offset = 0;
1499 for element in elements {
1500 let rendered = format!("{element}");
1501 let len = rendered.len();
1502 if !matches!(element, Element::Text(_)) {
1503 spans.push((offset, offset + len));
1504 }
1505 offset += len;
1506 }
1507 spans
1508}
1509
1510fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1512 spans.iter().any(|(start, end)| pos > *start && pos < *end)
1513}
1514
1515const MIN_SPLIT_RATIO: f64 = 0.3;
1518
1519fn split_at_clause_punctuation(
1523 text: &str,
1524 line_length: usize,
1525 element_spans: &[(usize, usize)],
1526 length_mode: ReflowLengthMode,
1527) -> Option<(String, String)> {
1528 let chars: Vec<char> = text.chars().collect();
1529 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1530
1531 let mut width_acc = 0;
1533 let mut search_end_char = 0;
1534 for (idx, &c) in chars.iter().enumerate() {
1535 let c_width = display_len(&c.to_string(), length_mode);
1536 if width_acc + c_width > line_length {
1537 break;
1538 }
1539 width_acc += c_width;
1540 search_end_char = idx + 1;
1541 }
1542
1543 let mut best_pos = None;
1544 for i in (0..search_end_char).rev() {
1545 if is_clause_punctuation(chars[i]) {
1546 let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1548 if !is_inside_element(byte_pos, element_spans) {
1549 best_pos = Some(i);
1550 break;
1551 }
1552 }
1553 }
1554
1555 let pos = best_pos?;
1556
1557 let first: String = chars[..=pos].iter().collect();
1559 let first_display_len = display_len(&first, length_mode);
1560 if first_display_len < min_first_len {
1561 return None;
1562 }
1563
1564 let rest: String = chars[pos + 1..].iter().collect();
1566 let rest = rest.trim_start().to_string();
1567
1568 if rest.is_empty() {
1569 return None;
1570 }
1571
1572 Some((first, rest))
1573}
1574
1575fn split_at_break_word(
1579 text: &str,
1580 line_length: usize,
1581 element_spans: &[(usize, usize)],
1582 length_mode: ReflowLengthMode,
1583) -> Option<(String, String)> {
1584 let lower = text.to_lowercase();
1585 let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1586 let mut best_split: Option<(usize, usize)> = None; for &word in BREAK_WORDS {
1589 let mut search_start = 0;
1590 while let Some(pos) = lower[search_start..].find(word) {
1591 let abs_pos = search_start + pos;
1592
1593 let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1595 let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1596
1597 if preceded_by_space && followed_by_space {
1598 let first_part = text[..abs_pos].trim_end();
1600 let first_part_len = display_len(first_part, length_mode);
1601
1602 if first_part_len >= min_first_len
1603 && first_part_len <= line_length
1604 && !is_inside_element(abs_pos, element_spans)
1605 {
1606 if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1608 best_split = Some((abs_pos, word.len()));
1609 }
1610 }
1611 }
1612
1613 search_start = abs_pos + word.len();
1614 }
1615 }
1616
1617 let (byte_start, _word_len) = best_split?;
1618
1619 let first = text[..byte_start].trim_end().to_string();
1620 let rest = text[byte_start..].to_string();
1621
1622 if first.is_empty() || rest.trim().is_empty() {
1623 return None;
1624 }
1625
1626 Some((first, rest))
1627}
1628
1629fn cascade_split_line(
1632 text: &str,
1633 line_length: usize,
1634 abbreviations: &Option<Vec<String>>,
1635 length_mode: ReflowLengthMode,
1636 attr_lists: bool,
1637) -> Vec<String> {
1638 if line_length == 0 || display_len(text, length_mode) <= line_length {
1639 return vec![text.to_string()];
1640 }
1641
1642 let elements = parse_markdown_elements_inner(text, attr_lists);
1643 let element_spans = compute_element_spans(&elements);
1644
1645 if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
1647 let mut result = vec![first];
1648 result.extend(cascade_split_line(
1649 &rest,
1650 line_length,
1651 abbreviations,
1652 length_mode,
1653 attr_lists,
1654 ));
1655 return result;
1656 }
1657
1658 if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
1660 let mut result = vec![first];
1661 result.extend(cascade_split_line(
1662 &rest,
1663 line_length,
1664 abbreviations,
1665 length_mode,
1666 attr_lists,
1667 ));
1668 return result;
1669 }
1670
1671 let options = ReflowOptions {
1673 line_length,
1674 break_on_sentences: false,
1675 preserve_breaks: false,
1676 sentence_per_line: false,
1677 semantic_line_breaks: false,
1678 abbreviations: abbreviations.clone(),
1679 length_mode,
1680 attr_lists,
1681 };
1682 reflow_elements(&elements, &options)
1683}
1684
1685fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1689 let sentence_lines = reflow_elements_sentence_per_line(elements, &options.abbreviations);
1691
1692 if options.line_length == 0 {
1695 return sentence_lines;
1696 }
1697
1698 let length_mode = options.length_mode;
1699 let mut result = Vec::new();
1700 for line in sentence_lines {
1701 if display_len(&line, length_mode) <= options.line_length {
1702 result.push(line);
1703 } else {
1704 result.extend(cascade_split_line(
1705 &line,
1706 options.line_length,
1707 &options.abbreviations,
1708 length_mode,
1709 options.attr_lists,
1710 ));
1711 }
1712 }
1713
1714 let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1717 let mut merged: Vec<String> = Vec::with_capacity(result.len());
1718 for line in result {
1719 if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
1720 let prev_ends_at_sentence = {
1722 let trimmed = merged.last().unwrap().trim_end();
1723 trimmed
1724 .chars()
1725 .rev()
1726 .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1727 .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1728 };
1729
1730 if !prev_ends_at_sentence {
1731 let prev = merged.last_mut().unwrap();
1732 let combined = format!("{prev} {line}");
1733 if display_len(&combined, length_mode) <= options.line_length {
1735 *prev = combined;
1736 continue;
1737 }
1738 }
1739 }
1740 merged.push(line);
1741 }
1742 merged
1743}
1744
1745fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
1753 line.char_indices()
1754 .rev()
1755 .map(|(pos, _)| pos)
1756 .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
1757}
1758
1759fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1761 let mut lines = Vec::new();
1762 let mut current_line = String::new();
1763 let mut current_length = 0;
1764 let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
1766 let length_mode = options.length_mode;
1767
1768 for (idx, element) in elements.iter().enumerate() {
1769 let element_str = format!("{element}");
1770 let element_len = element.display_width(length_mode);
1771
1772 let is_adjacent_to_prev = if idx > 0 {
1778 match (&elements[idx - 1], element) {
1779 (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1780 (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1781 _ => true,
1782 }
1783 } else {
1784 false
1785 };
1786
1787 if let Element::Text(text) = element {
1789 let has_leading_space = text.starts_with(char::is_whitespace);
1791 let words: Vec<&str> = text.split_whitespace().collect();
1793
1794 for (i, word) in words.iter().enumerate() {
1795 let word_len = display_len(word, length_mode);
1796 let is_trailing_punct = word
1798 .chars()
1799 .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1800
1801 let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1804
1805 if is_first_adjacent {
1806 if current_length + word_len > options.line_length && current_length > 0 {
1808 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1811 let before = current_line[..last_space].trim_end().to_string();
1812 let after = current_line[last_space + 1..].to_string();
1813 lines.push(before);
1814 current_line = format!("{after}{word}");
1815 current_length = display_len(¤t_line, length_mode);
1816 current_line_element_spans.clear();
1817 } else {
1818 current_line.push_str(word);
1819 current_length += word_len;
1820 }
1821 } else {
1822 current_line.push_str(word);
1823 current_length += word_len;
1824 }
1825 } else if current_length > 0
1826 && current_length + 1 + word_len > options.line_length
1827 && !is_trailing_punct
1828 {
1829 lines.push(current_line.trim().to_string());
1831 current_line = word.to_string();
1832 current_length = word_len;
1833 current_line_element_spans.clear();
1834 } else {
1835 if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1839 current_line.push(' ');
1840 current_length += 1;
1841 }
1842 current_line.push_str(word);
1843 current_length += word_len;
1844 }
1845 }
1846 } else if matches!(
1847 element,
1848 Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
1849 ) && element_len > options.line_length
1850 {
1851 let (content, marker): (&str, &str) = match element {
1855 Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
1856 Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
1857 Element::Strikethrough(content) => (content.as_str(), "~~"),
1858 _ => unreachable!(),
1859 };
1860
1861 let words: Vec<&str> = content.split_whitespace().collect();
1862 let n = words.len();
1863
1864 if n == 0 {
1865 let full = format!("{marker}{marker}");
1867 let full_len = display_len(&full, length_mode);
1868 if !is_adjacent_to_prev && current_length > 0 {
1869 current_line.push(' ');
1870 current_length += 1;
1871 }
1872 current_line.push_str(&full);
1873 current_length += full_len;
1874 } else {
1875 for (i, word) in words.iter().enumerate() {
1876 let is_first = i == 0;
1877 let is_last = i == n - 1;
1878 let word_str: String = match (is_first, is_last) {
1879 (true, true) => format!("{marker}{word}{marker}"),
1880 (true, false) => format!("{marker}{word}"),
1881 (false, true) => format!("{word}{marker}"),
1882 (false, false) => word.to_string(),
1883 };
1884 let word_len = display_len(&word_str, length_mode);
1885
1886 let needs_space = if is_first {
1887 !is_adjacent_to_prev && current_length > 0
1888 } else {
1889 current_length > 0
1890 };
1891
1892 if needs_space && current_length + 1 + word_len > options.line_length {
1893 lines.push(current_line.trim_end().to_string());
1894 current_line = word_str;
1895 current_length = word_len;
1896 current_line_element_spans.clear();
1897 } else {
1898 if needs_space {
1899 current_line.push(' ');
1900 current_length += 1;
1901 }
1902 current_line.push_str(&word_str);
1903 current_length += word_len;
1904 }
1905 }
1906 }
1907 } else {
1908 if is_adjacent_to_prev {
1912 if current_length + element_len > options.line_length {
1914 if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
1917 let before = current_line[..last_space].trim_end().to_string();
1918 let after = current_line[last_space + 1..].to_string();
1919 lines.push(before);
1920 current_line = format!("{after}{element_str}");
1921 current_length = display_len(¤t_line, length_mode);
1922 current_line_element_spans.clear();
1923 let start = after.len();
1925 current_line_element_spans.push((start, start + element_str.len()));
1926 } else {
1927 let start = current_line.len();
1929 current_line.push_str(&element_str);
1930 current_length += element_len;
1931 current_line_element_spans.push((start, current_line.len()));
1932 }
1933 } else {
1934 let start = current_line.len();
1935 current_line.push_str(&element_str);
1936 current_length += element_len;
1937 current_line_element_spans.push((start, current_line.len()));
1938 }
1939 } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
1940 lines.push(current_line.trim().to_string());
1942 current_line = element_str.clone();
1943 current_length = element_len;
1944 current_line_element_spans.clear();
1945 current_line_element_spans.push((0, element_str.len()));
1946 } else {
1947 let ends_with_opener =
1949 current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1950 if current_length > 0 && !ends_with_opener {
1951 current_line.push(' ');
1952 current_length += 1;
1953 }
1954 let start = current_line.len();
1955 current_line.push_str(&element_str);
1956 current_length += element_len;
1957 current_line_element_spans.push((start, current_line.len()));
1958 }
1959 }
1960 }
1961
1962 if !current_line.is_empty() {
1964 lines.push(current_line.trim_end().to_string());
1965 }
1966
1967 lines
1968}
1969
1970pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1972 let lines: Vec<&str> = content.lines().collect();
1973 let mut result = Vec::new();
1974 let mut i = 0;
1975
1976 while i < lines.len() {
1977 let line = lines[i];
1978 let trimmed = line.trim();
1979
1980 if trimmed.is_empty() {
1982 result.push(String::new());
1983 i += 1;
1984 continue;
1985 }
1986
1987 if trimmed.starts_with('#') {
1989 result.push(line.to_string());
1990 i += 1;
1991 continue;
1992 }
1993
1994 if trimmed.starts_with(":::") {
1996 result.push(line.to_string());
1997 i += 1;
1998 continue;
1999 }
2000
2001 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2003 result.push(line.to_string());
2004 i += 1;
2005 while i < lines.len() {
2007 result.push(lines[i].to_string());
2008 if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2009 i += 1;
2010 break;
2011 }
2012 i += 1;
2013 }
2014 continue;
2015 }
2016
2017 if ElementCache::calculate_indentation_width_default(line) >= 4 {
2019 result.push(line.to_string());
2021 i += 1;
2022 while i < lines.len() {
2023 let next_line = lines[i];
2024 if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2026 result.push(next_line.to_string());
2027 i += 1;
2028 } else {
2029 break;
2030 }
2031 }
2032 continue;
2033 }
2034
2035 if trimmed.starts_with('>') {
2037 let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2040 let quote_prefix = line[0..gt_pos + 1].to_string();
2041 let quote_content = &line[quote_prefix.len()..].trim_start();
2042
2043 let reflowed = reflow_line(quote_content, options);
2044 for reflowed_line in reflowed.iter() {
2045 result.push(format!("{quote_prefix} {reflowed_line}"));
2046 }
2047 i += 1;
2048 continue;
2049 }
2050
2051 if is_horizontal_rule(trimmed) {
2053 result.push(line.to_string());
2054 i += 1;
2055 continue;
2056 }
2057
2058 if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2060 let indent = line.len() - line.trim_start().len();
2062 let indent_str = " ".repeat(indent);
2063
2064 let mut marker_end = indent;
2067 let mut content_start = indent;
2068
2069 if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
2070 if let Some(period_pos) = line[indent..].find('.') {
2072 marker_end = indent + period_pos + 1; content_start = marker_end;
2074 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2078 content_start += 1;
2079 }
2080 }
2081 } else {
2082 marker_end = indent + 1; content_start = marker_end;
2085 while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2089 content_start += 1;
2090 }
2091 }
2092
2093 let marker = &line[indent..marker_end];
2094
2095 let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2098 i += 1;
2099
2100 while i < lines.len() {
2102 let next_line = lines[i];
2103 let next_trimmed = next_line.trim();
2104
2105 if is_block_boundary(next_trimmed) {
2107 break;
2108 }
2109
2110 let next_indent = next_line.len() - next_line.trim_start().len();
2112 if next_indent >= content_start {
2113 let trimmed_start = next_line.trim_start();
2116 list_content.push(trim_preserving_hard_break(trimmed_start));
2117 i += 1;
2118 } else {
2119 break;
2121 }
2122 }
2123
2124 let combined_content = if options.preserve_breaks {
2127 list_content[0].clone()
2128 } else {
2129 let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2131 if has_hard_breaks {
2132 list_content.join("\n")
2134 } else {
2135 list_content.join(" ")
2137 }
2138 };
2139
2140 let trimmed_marker = marker;
2142 let continuation_spaces = content_start;
2143
2144 let prefix_length = indent + trimmed_marker.len() + 1;
2146
2147 let adjusted_options = ReflowOptions {
2149 line_length: options.line_length.saturating_sub(prefix_length),
2150 ..options.clone()
2151 };
2152
2153 let reflowed = reflow_line(&combined_content, &adjusted_options);
2154 for (j, reflowed_line) in reflowed.iter().enumerate() {
2155 if j == 0 {
2156 result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2157 } else {
2158 let continuation_indent = " ".repeat(continuation_spaces);
2160 result.push(format!("{continuation_indent}{reflowed_line}"));
2161 }
2162 }
2163 continue;
2164 }
2165
2166 if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2168 result.push(line.to_string());
2169 i += 1;
2170 continue;
2171 }
2172
2173 if trimmed.starts_with('[') && line.contains("]:") {
2175 result.push(line.to_string());
2176 i += 1;
2177 continue;
2178 }
2179
2180 if is_definition_list_item(trimmed) {
2182 result.push(line.to_string());
2183 i += 1;
2184 continue;
2185 }
2186
2187 let mut is_single_line_paragraph = true;
2189 if i + 1 < lines.len() {
2190 let next_trimmed = lines[i + 1].trim();
2191 if !is_block_boundary(next_trimmed) {
2193 is_single_line_paragraph = false;
2194 }
2195 }
2196
2197 if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2199 result.push(line.to_string());
2200 i += 1;
2201 continue;
2202 }
2203
2204 let mut paragraph_parts = Vec::new();
2206 let mut current_part = vec![line];
2207 i += 1;
2208
2209 if options.preserve_breaks {
2211 let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2213 Some("\\")
2214 } else if line.ends_with(" ") {
2215 Some(" ")
2216 } else {
2217 None
2218 };
2219 let reflowed = reflow_line(line, options);
2220
2221 if let Some(break_marker) = hard_break_type {
2223 if !reflowed.is_empty() {
2224 let mut reflowed_with_break = reflowed;
2225 let last_idx = reflowed_with_break.len() - 1;
2226 if !has_hard_break(&reflowed_with_break[last_idx]) {
2227 reflowed_with_break[last_idx].push_str(break_marker);
2228 }
2229 result.extend(reflowed_with_break);
2230 }
2231 } else {
2232 result.extend(reflowed);
2233 }
2234 } else {
2235 while i < lines.len() {
2237 let prev_line = if !current_part.is_empty() {
2238 current_part.last().unwrap()
2239 } else {
2240 ""
2241 };
2242 let next_line = lines[i];
2243 let next_trimmed = next_line.trim();
2244
2245 if is_block_boundary(next_trimmed) {
2247 break;
2248 }
2249
2250 let prev_trimmed = prev_line.trim();
2253 let abbreviations = get_abbreviations(&options.abbreviations);
2254 let ends_with_sentence = (prev_trimmed.ends_with('.')
2255 || prev_trimmed.ends_with('!')
2256 || prev_trimmed.ends_with('?')
2257 || prev_trimmed.ends_with(".*")
2258 || prev_trimmed.ends_with("!*")
2259 || prev_trimmed.ends_with("?*")
2260 || prev_trimmed.ends_with("._")
2261 || prev_trimmed.ends_with("!_")
2262 || prev_trimmed.ends_with("?_")
2263 || prev_trimmed.ends_with(".\"")
2265 || prev_trimmed.ends_with("!\"")
2266 || prev_trimmed.ends_with("?\"")
2267 || prev_trimmed.ends_with(".'")
2268 || prev_trimmed.ends_with("!'")
2269 || prev_trimmed.ends_with("?'")
2270 || prev_trimmed.ends_with(".\u{201D}")
2271 || prev_trimmed.ends_with("!\u{201D}")
2272 || prev_trimmed.ends_with("?\u{201D}")
2273 || prev_trimmed.ends_with(".\u{2019}")
2274 || prev_trimmed.ends_with("!\u{2019}")
2275 || prev_trimmed.ends_with("?\u{2019}"))
2276 && !text_ends_with_abbreviation(
2277 prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2278 &abbreviations,
2279 );
2280
2281 if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2282 paragraph_parts.push(current_part.join(" "));
2284 current_part = vec![next_line];
2285 } else {
2286 current_part.push(next_line);
2287 }
2288 i += 1;
2289 }
2290
2291 if !current_part.is_empty() {
2293 if current_part.len() == 1 {
2294 paragraph_parts.push(current_part[0].to_string());
2296 } else {
2297 paragraph_parts.push(current_part.join(" "));
2298 }
2299 }
2300
2301 for (j, part) in paragraph_parts.iter().enumerate() {
2303 let reflowed = reflow_line(part, options);
2304 result.extend(reflowed);
2305
2306 if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2310 let last_idx = result.len() - 1;
2311 if !has_hard_break(&result[last_idx]) {
2312 result[last_idx].push_str(" ");
2313 }
2314 }
2315 }
2316 }
2317 }
2318
2319 let result_text = result.join("\n");
2321 if content.ends_with('\n') && !result_text.ends_with('\n') {
2322 format!("{result_text}\n")
2323 } else {
2324 result_text
2325 }
2326}
2327
2328#[derive(Debug, Clone)]
2330pub struct ParagraphReflow {
2331 pub start_byte: usize,
2333 pub end_byte: usize,
2335 pub reflowed_text: String,
2337}
2338
2339#[derive(Debug, Clone)]
2345pub struct BlockquoteLineData {
2346 pub(crate) content: String,
2348 pub(crate) is_explicit: bool,
2350 pub(crate) prefix: Option<String>,
2352}
2353
2354impl BlockquoteLineData {
2355 pub fn explicit(content: String, prefix: String) -> Self {
2357 Self {
2358 content,
2359 is_explicit: true,
2360 prefix: Some(prefix),
2361 }
2362 }
2363
2364 pub fn lazy(content: String) -> Self {
2366 Self {
2367 content,
2368 is_explicit: false,
2369 prefix: None,
2370 }
2371 }
2372}
2373
2374#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2376pub enum BlockquoteContinuationStyle {
2377 Explicit,
2378 Lazy,
2379}
2380
2381pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2389 let mut explicit_count = 0usize;
2390 let mut lazy_count = 0usize;
2391
2392 for line in lines.iter().skip(1) {
2393 if line.is_explicit {
2394 explicit_count += 1;
2395 } else {
2396 lazy_count += 1;
2397 }
2398 }
2399
2400 if explicit_count > 0 && lazy_count == 0 {
2401 BlockquoteContinuationStyle::Explicit
2402 } else if lazy_count > 0 && explicit_count == 0 {
2403 BlockquoteContinuationStyle::Lazy
2404 } else if explicit_count >= lazy_count {
2405 BlockquoteContinuationStyle::Explicit
2406 } else {
2407 BlockquoteContinuationStyle::Lazy
2408 }
2409}
2410
2411pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2416 let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2417
2418 for (idx, line) in lines.iter().enumerate() {
2419 let Some(prefix) = line.prefix.as_ref() else {
2420 continue;
2421 };
2422 counts
2423 .entry(prefix.clone())
2424 .and_modify(|entry| entry.0 += 1)
2425 .or_insert((1, idx));
2426 }
2427
2428 counts
2429 .into_iter()
2430 .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2431 count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2432 })
2433 .map(|(prefix, _)| prefix)
2434 .unwrap_or_else(|| fallback.to_string())
2435}
2436
2437pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2442 let trimmed = content_line.trim_start();
2443 trimmed.starts_with('>')
2444 || trimmed.starts_with('#')
2445 || trimmed.starts_with("```")
2446 || trimmed.starts_with("~~~")
2447 || is_unordered_list_marker(trimmed)
2448 || is_numbered_list_item(trimmed)
2449 || is_horizontal_rule(trimmed)
2450 || is_definition_list_item(trimmed)
2451 || (trimmed.starts_with('[') && trimmed.contains("]:"))
2452 || trimmed.starts_with(":::")
2453 || (trimmed.starts_with('<')
2454 && !trimmed.starts_with("<http")
2455 && !trimmed.starts_with("<https")
2456 && !trimmed.starts_with("<mailto:"))
2457}
2458
2459pub fn reflow_blockquote_content(
2468 lines: &[BlockquoteLineData],
2469 explicit_prefix: &str,
2470 continuation_style: BlockquoteContinuationStyle,
2471 options: &ReflowOptions,
2472) -> Vec<String> {
2473 let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2474 let segments = split_into_segments_strs(&content_strs);
2475 let mut reflowed_content_lines: Vec<String> = Vec::new();
2476
2477 for segment in segments {
2478 let hard_break_type = segment.last().and_then(|&line| {
2479 let line = line.strip_suffix('\r').unwrap_or(line);
2480 if line.ends_with('\\') {
2481 Some("\\")
2482 } else if line.ends_with(" ") {
2483 Some(" ")
2484 } else {
2485 None
2486 }
2487 });
2488
2489 let pieces: Vec<&str> = segment
2490 .iter()
2491 .map(|&line| {
2492 if let Some(l) = line.strip_suffix('\\') {
2493 l.trim_end()
2494 } else if let Some(l) = line.strip_suffix(" ") {
2495 l.trim_end()
2496 } else {
2497 line.trim_end()
2498 }
2499 })
2500 .collect();
2501
2502 let segment_text = pieces.join(" ");
2503 let segment_text = segment_text.trim();
2504 if segment_text.is_empty() {
2505 continue;
2506 }
2507
2508 let mut reflowed = reflow_line(segment_text, options);
2509 if let Some(break_marker) = hard_break_type
2510 && !reflowed.is_empty()
2511 {
2512 let last_idx = reflowed.len() - 1;
2513 if !has_hard_break(&reflowed[last_idx]) {
2514 reflowed[last_idx].push_str(break_marker);
2515 }
2516 }
2517 reflowed_content_lines.extend(reflowed);
2518 }
2519
2520 let mut styled_lines: Vec<String> = Vec::new();
2521 for (idx, line) in reflowed_content_lines.iter().enumerate() {
2522 let force_explicit = idx == 0
2523 || continuation_style == BlockquoteContinuationStyle::Explicit
2524 || should_force_explicit_blockquote_line(line);
2525 if force_explicit {
2526 styled_lines.push(format!("{explicit_prefix}{line}"));
2527 } else {
2528 styled_lines.push(line.clone());
2529 }
2530 }
2531
2532 styled_lines
2533}
2534
2535fn is_blockquote_content_boundary(content: &str) -> bool {
2536 let trimmed = content.trim();
2537 trimmed.is_empty()
2538 || is_block_boundary(trimmed)
2539 || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2540 || trimmed.starts_with(":::")
2541 || crate::utils::is_template_directive_only(content)
2542 || is_standalone_attr_list(content)
2543 || is_snippet_block_delimiter(content)
2544}
2545
2546fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2547 let mut segments = Vec::new();
2548 let mut current = Vec::new();
2549
2550 for &line in lines {
2551 current.push(line);
2552 if has_hard_break(line) {
2553 segments.push(current);
2554 current = Vec::new();
2555 }
2556 }
2557
2558 if !current.is_empty() {
2559 segments.push(current);
2560 }
2561
2562 segments
2563}
2564
2565fn reflow_blockquote_paragraph_at_line(
2566 content: &str,
2567 lines: &[&str],
2568 target_idx: usize,
2569 options: &ReflowOptions,
2570) -> Option<ParagraphReflow> {
2571 let mut anchor_idx = target_idx;
2572 let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2573 parsed.nesting_level
2574 } else {
2575 let mut found = None;
2576 let mut idx = target_idx;
2577 loop {
2578 if lines[idx].trim().is_empty() {
2579 break;
2580 }
2581 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2582 found = Some((idx, parsed.nesting_level));
2583 break;
2584 }
2585 if idx == 0 {
2586 break;
2587 }
2588 idx -= 1;
2589 }
2590 let (idx, level) = found?;
2591 anchor_idx = idx;
2592 level
2593 };
2594
2595 let mut para_start = anchor_idx;
2597 while para_start > 0 {
2598 let prev_idx = para_start - 1;
2599 let prev_line = lines[prev_idx];
2600
2601 if prev_line.trim().is_empty() {
2602 break;
2603 }
2604
2605 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2606 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2607 break;
2608 }
2609 para_start = prev_idx;
2610 continue;
2611 }
2612
2613 let prev_lazy = prev_line.trim_start();
2614 if is_blockquote_content_boundary(prev_lazy) {
2615 break;
2616 }
2617 para_start = prev_idx;
2618 }
2619
2620 while para_start < lines.len() {
2622 let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
2623 para_start += 1;
2624 continue;
2625 };
2626 target_level = parsed.nesting_level;
2627 break;
2628 }
2629
2630 if para_start >= lines.len() || para_start > target_idx {
2631 return None;
2632 }
2633
2634 let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
2637 let mut idx = para_start;
2638 while idx < lines.len() {
2639 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
2640 break;
2641 }
2642
2643 let line = lines[idx];
2644 if line.trim().is_empty() {
2645 break;
2646 }
2647
2648 if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
2649 if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2650 break;
2651 }
2652 collected.push((
2653 idx,
2654 BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
2655 ));
2656 idx += 1;
2657 continue;
2658 }
2659
2660 let lazy_content = line.trim_start();
2661 if is_blockquote_content_boundary(lazy_content) {
2662 break;
2663 }
2664
2665 collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
2666 idx += 1;
2667 }
2668
2669 if collected.is_empty() {
2670 return None;
2671 }
2672
2673 let para_end = collected[collected.len() - 1].0;
2674 if target_idx < para_start || target_idx > para_end {
2675 return None;
2676 }
2677
2678 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
2679
2680 let fallback_prefix = line_data
2681 .iter()
2682 .find_map(|d| d.prefix.clone())
2683 .unwrap_or_else(|| "> ".to_string());
2684 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
2685 let continuation_style = blockquote_continuation_style(&line_data);
2686
2687 let adjusted_line_length = options
2688 .line_length
2689 .saturating_sub(display_len(&explicit_prefix, options.length_mode))
2690 .max(1);
2691
2692 let adjusted_options = ReflowOptions {
2693 line_length: adjusted_line_length,
2694 ..options.clone()
2695 };
2696
2697 let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
2698
2699 if styled_lines.is_empty() {
2700 return None;
2701 }
2702
2703 let mut start_byte = 0;
2705 for line in lines.iter().take(para_start) {
2706 start_byte += line.len() + 1;
2707 }
2708
2709 let mut end_byte = start_byte;
2710 for line in lines.iter().take(para_end + 1).skip(para_start) {
2711 end_byte += line.len() + 1;
2712 }
2713
2714 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2715 if !includes_trailing_newline {
2716 end_byte -= 1;
2717 }
2718
2719 let reflowed_joined = styled_lines.join("\n");
2720 let reflowed_text = if includes_trailing_newline {
2721 if reflowed_joined.ends_with('\n') {
2722 reflowed_joined
2723 } else {
2724 format!("{reflowed_joined}\n")
2725 }
2726 } else if reflowed_joined.ends_with('\n') {
2727 reflowed_joined.trim_end_matches('\n').to_string()
2728 } else {
2729 reflowed_joined
2730 };
2731
2732 Some(ParagraphReflow {
2733 start_byte,
2734 end_byte,
2735 reflowed_text,
2736 })
2737}
2738
2739pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2757 reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
2758}
2759
2760pub fn reflow_paragraph_at_line_with_mode(
2762 content: &str,
2763 line_number: usize,
2764 line_length: usize,
2765 length_mode: ReflowLengthMode,
2766) -> Option<ParagraphReflow> {
2767 let options = ReflowOptions {
2768 line_length,
2769 length_mode,
2770 ..Default::default()
2771 };
2772 reflow_paragraph_at_line_with_options(content, line_number, &options)
2773}
2774
2775pub fn reflow_paragraph_at_line_with_options(
2786 content: &str,
2787 line_number: usize,
2788 options: &ReflowOptions,
2789) -> Option<ParagraphReflow> {
2790 if line_number == 0 {
2791 return None;
2792 }
2793
2794 let lines: Vec<&str> = content.lines().collect();
2795
2796 if line_number > lines.len() {
2798 return None;
2799 }
2800
2801 let target_idx = line_number - 1; let target_line = lines[target_idx];
2803 let trimmed = target_line.trim();
2804
2805 if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
2808 return Some(blockquote_reflow);
2809 }
2810
2811 if is_paragraph_boundary(trimmed, target_line) {
2813 return None;
2814 }
2815
2816 let mut para_start = target_idx;
2818 while para_start > 0 {
2819 let prev_idx = para_start - 1;
2820 let prev_line = lines[prev_idx];
2821 let prev_trimmed = prev_line.trim();
2822
2823 if is_paragraph_boundary(prev_trimmed, prev_line) {
2825 break;
2826 }
2827
2828 para_start = prev_idx;
2829 }
2830
2831 let mut para_end = target_idx;
2833 while para_end + 1 < lines.len() {
2834 let next_idx = para_end + 1;
2835 let next_line = lines[next_idx];
2836 let next_trimmed = next_line.trim();
2837
2838 if is_paragraph_boundary(next_trimmed, next_line) {
2840 break;
2841 }
2842
2843 para_end = next_idx;
2844 }
2845
2846 let paragraph_lines = &lines[para_start..=para_end];
2848
2849 let mut start_byte = 0;
2851 for line in lines.iter().take(para_start) {
2852 start_byte += line.len() + 1; }
2854
2855 let mut end_byte = start_byte;
2856 for line in paragraph_lines.iter() {
2857 end_byte += line.len() + 1; }
2859
2860 let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2863
2864 if !includes_trailing_newline {
2866 end_byte -= 1;
2867 }
2868
2869 let paragraph_text = paragraph_lines.join("\n");
2871
2872 let reflowed = reflow_markdown(¶graph_text, options);
2874
2875 let reflowed_text = if includes_trailing_newline {
2879 if reflowed.ends_with('\n') {
2881 reflowed
2882 } else {
2883 format!("{reflowed}\n")
2884 }
2885 } else {
2886 if reflowed.ends_with('\n') {
2888 reflowed.trim_end_matches('\n').to_string()
2889 } else {
2890 reflowed
2891 }
2892 };
2893
2894 Some(ParagraphReflow {
2895 start_byte,
2896 end_byte,
2897 reflowed_text,
2898 })
2899}
2900
2901#[cfg(test)]
2902mod tests {
2903 use super::*;
2904
2905 #[test]
2910 fn test_helper_function_text_ends_with_abbreviation() {
2911 let abbreviations = get_abbreviations(&None);
2913
2914 assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2916 assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2917 assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2918 assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2919 assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2920 assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2921 assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2922 assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2923
2924 assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2926 assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2927 assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2928 assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2929 assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2930 assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
2936
2937 #[test]
2938 fn test_is_unordered_list_marker() {
2939 assert!(is_unordered_list_marker("- item"));
2941 assert!(is_unordered_list_marker("* item"));
2942 assert!(is_unordered_list_marker("+ item"));
2943 assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
2945 assert!(is_unordered_list_marker("+"));
2946
2947 assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
2958
2959 #[test]
2960 fn test_is_block_boundary() {
2961 assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}")); assert!(!is_block_boundary("regular text"));
2983 assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
2986 }
2987
2988 #[test]
2989 fn test_definition_list_boundary_in_single_line_paragraph() {
2990 let options = ReflowOptions {
2993 line_length: 80,
2994 ..Default::default()
2995 };
2996 let input = "Term\n: Definition of the term";
2997 let result = reflow_markdown(input, &options);
2998 assert!(
3000 result.contains(": Definition"),
3001 "Definition list item should not be merged into previous line. Got: {result:?}"
3002 );
3003 let lines: Vec<&str> = result.lines().collect();
3004 assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3005 assert_eq!(lines[0], "Term");
3006 assert_eq!(lines[1], ": Definition of the term");
3007 }
3008
3009 #[test]
3010 fn test_is_paragraph_boundary() {
3011 assert!(is_paragraph_boundary("# Heading", "# Heading"));
3013 assert!(is_paragraph_boundary("- item", "- item"));
3014 assert!(is_paragraph_boundary(":::", ":::"));
3015 assert!(is_paragraph_boundary(": definition", ": definition"));
3016
3017 assert!(is_paragraph_boundary("code", " code"));
3019 assert!(is_paragraph_boundary("code", "\tcode"));
3020
3021 assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3023 assert!(is_paragraph_boundary("a | b", "a | b")); assert!(!is_paragraph_boundary("regular text", "regular text"));
3027 assert!(!is_paragraph_boundary("text", " text")); }
3029
3030 #[test]
3031 fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3032 let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3035 let result = reflow_paragraph_at_line(content, 3, 80);
3037 assert!(result.is_none(), "Div marker line should not be reflowed");
3038 }
3039}