use crate::utils::calculate_indentation_width_default;
use crate::utils::is_definition_list_item;
use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
use crate::utils::regex_cache::{
DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
};
use crate::utils::sentence_utils::{
get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
text_ends_with_abbreviation,
};
use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
use std::collections::HashSet;
use unicode_width::UnicodeWidthStr;
#[derive(Clone, Copy, Debug, Default, PartialEq)]
pub enum ReflowLengthMode {
Chars,
#[default]
Visual,
Bytes,
}
fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
match mode {
ReflowLengthMode::Chars => s.chars().count(),
ReflowLengthMode::Visual => s.width(),
ReflowLengthMode::Bytes => s.len(),
}
}
#[derive(Clone)]
pub struct ReflowOptions {
pub line_length: usize,
pub break_on_sentences: bool,
pub preserve_breaks: bool,
pub sentence_per_line: bool,
pub semantic_line_breaks: bool,
pub abbreviations: Option<Vec<String>>,
pub length_mode: ReflowLengthMode,
pub attr_lists: bool,
pub require_sentence_capital: bool,
pub max_list_continuation_indent: Option<usize>,
}
impl Default for ReflowOptions {
fn default() -> Self {
Self {
line_length: 80,
break_on_sentences: true,
preserve_breaks: false,
sentence_per_line: false,
semantic_line_breaks: false,
abbreviations: None,
length_mode: ReflowLengthMode::default(),
attr_lists: false,
require_sentence_capital: true,
max_list_continuation_indent: None,
}
}
}
fn is_sentence_boundary(
text: &str,
pos: usize,
abbreviations: &HashSet<String>,
require_sentence_capital: bool,
) -> bool {
let chars: Vec<char> = text.chars().collect();
if pos + 1 >= chars.len() {
return false;
}
let c = chars[pos];
let next_char = chars[pos + 1];
if is_cjk_sentence_ending(c) {
let mut after_punct_pos = pos + 1;
while after_punct_pos < chars.len()
&& (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
{
after_punct_pos += 1;
}
while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
after_punct_pos += 1;
}
if after_punct_pos >= chars.len() {
return false;
}
while after_punct_pos < chars.len()
&& (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
{
after_punct_pos += 1;
}
if after_punct_pos >= chars.len() {
return false;
}
return true;
}
if c != '.' && c != '!' && c != '?' {
return false;
}
let (_space_pos, after_space_pos) = if next_char == ' ' {
(pos + 1, pos + 2)
} else if is_closing_quote(next_char) && pos + 2 < chars.len() {
if chars[pos + 2] == ' ' {
(pos + 2, pos + 3)
} else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
(pos + 3, pos + 4)
} else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
&& pos + 4 < chars.len()
&& chars[pos + 3] == chars[pos + 2]
&& chars[pos + 4] == ' '
{
(pos + 4, pos + 5)
} else {
return false;
}
} else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
(pos + 2, pos + 3)
} else if (next_char == '*' || next_char == '_')
&& pos + 3 < chars.len()
&& chars[pos + 2] == next_char
&& chars[pos + 3] == ' '
{
(pos + 3, pos + 4)
} else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
(pos + 3, pos + 4)
} else {
return false;
};
let mut next_char_pos = after_space_pos;
while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
next_char_pos += 1;
}
if next_char_pos >= chars.len() {
return false;
}
let mut first_letter_pos = next_char_pos;
while first_letter_pos < chars.len()
&& (chars[first_letter_pos] == '*'
|| chars[first_letter_pos] == '_'
|| chars[first_letter_pos] == '~'
|| is_opening_quote(chars[first_letter_pos]))
{
first_letter_pos += 1;
}
if first_letter_pos >= chars.len() {
return false;
}
let first_char = chars[first_letter_pos];
if c == '!' || c == '?' {
return true;
}
if pos > 0 {
let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
return false;
}
if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
return false;
}
if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
return false;
}
}
if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
return false;
}
true
}
pub fn split_into_sentences(text: &str) -> Vec<String> {
split_into_sentences_custom(text, &None)
}
pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
let abbreviations = get_abbreviations(custom_abbreviations);
split_into_sentences_with_set(text, &abbreviations, true)
}
fn split_into_sentences_with_set(
text: &str,
abbreviations: &HashSet<String>,
require_sentence_capital: bool,
) -> Vec<String> {
let mut sentences = Vec::new();
let mut current_sentence = String::new();
let mut chars = text.chars().peekable();
let mut pos = 0;
while let Some(c) = chars.next() {
current_sentence.push(c);
if is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
while let Some(&next) = chars.peek() {
if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
current_sentence.push(chars.next().unwrap());
pos += 1;
} else {
break;
}
}
if chars.peek() == Some(&' ') {
chars.next();
pos += 1;
}
sentences.push(current_sentence.trim().to_string());
current_sentence.clear();
}
pos += 1;
}
if !current_sentence.trim().is_empty() {
sentences.push(current_sentence.trim().to_string());
}
sentences
}
fn is_horizontal_rule(line: &str) -> bool {
if line.len() < 3 {
return false;
}
let chars: Vec<char> = line.chars().collect();
if chars.is_empty() {
return false;
}
let first_char = chars[0];
if first_char != '-' && first_char != '_' && first_char != '*' {
return false;
}
for c in &chars {
if *c != first_char && *c != ' ' {
return false;
}
}
let non_space_count = chars.iter().filter(|c| **c != ' ').count();
non_space_count >= 3
}
fn is_numbered_list_item(line: &str) -> bool {
let mut chars = line.chars();
if !chars.next().is_some_and(|c| c.is_numeric()) {
return false;
}
while let Some(c) = chars.next() {
if c == '.' {
return chars.next() == Some(' ');
}
if !c.is_numeric() {
return false;
}
}
false
}
fn is_unordered_list_marker(s: &str) -> bool {
matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
&& !is_horizontal_rule(s)
&& (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
}
fn is_block_boundary_core(trimmed: &str) -> bool {
trimmed.is_empty()
|| trimmed.starts_with('#')
|| trimmed.starts_with("```")
|| trimmed.starts_with("~~~")
|| trimmed.starts_with('>')
|| (trimmed.starts_with('[') && trimmed.contains("]:"))
|| is_horizontal_rule(trimmed)
|| is_unordered_list_marker(trimmed)
|| is_numbered_list_item(trimmed)
|| is_definition_list_item(trimmed)
|| trimmed.starts_with(":::")
}
fn is_block_boundary(trimmed: &str) -> bool {
is_block_boundary_core(trimmed) || trimmed.starts_with('|')
}
fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
is_block_boundary_core(trimmed)
|| calculate_indentation_width_default(line) >= 4
|| crate::utils::table_utils::TableUtils::is_potential_table_row(line)
}
fn has_hard_break(line: &str) -> bool {
let line = line.strip_suffix('\r').unwrap_or(line);
line.ends_with(" ") || line.ends_with('\\')
}
fn ends_with_sentence_punct(text: &str) -> bool {
text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
}
fn trim_preserving_hard_break(s: &str) -> String {
let s = s.strip_suffix('\r').unwrap_or(s);
if s.ends_with('\\') {
return s.to_string();
}
if s.ends_with(" ") {
let content_end = s.trim_end().len();
if content_end == 0 {
return String::new();
}
format!("{} ", &s[..content_end])
} else {
s.trim_end().to_string()
}
}
fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
if options.attr_lists {
parse_markdown_elements_with_attr_lists(text)
} else {
parse_markdown_elements(text)
}
}
pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
if options.sentence_per_line {
let elements = parse_elements(line, options);
return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
}
if options.semantic_line_breaks {
let elements = parse_elements(line, options);
return reflow_elements_semantic(&elements, options);
}
if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
return vec![line.to_string()];
}
let elements = parse_elements(line, options);
reflow_elements(&elements, options)
}
#[derive(Debug, Clone)]
enum LinkedImageSource {
Inline(String),
Reference(String),
}
#[derive(Debug, Clone)]
enum LinkedImageTarget {
Inline(String),
Reference(String),
}
#[derive(Debug, Clone)]
enum Element {
Text(String),
Link { text: String, url: String },
ReferenceLink { text: String, reference: String },
EmptyReferenceLink { text: String },
ShortcutReference { reference: String },
InlineImage { alt: String, url: String },
ReferenceImage { alt: String, reference: String },
EmptyReferenceImage { alt: String },
LinkedImage {
alt: String,
img_source: LinkedImageSource,
link_target: LinkedImageTarget,
},
FootnoteReference { note: String },
Strikethrough(String),
WikiLink(String),
InlineMath(String),
DisplayMath(String),
EmojiShortcode(String),
Autolink(String),
HtmlTag(String),
HtmlEntity(String),
HugoShortcode(String),
AttrList(String),
Code(String),
Bold {
content: String,
underscore: bool,
},
Italic {
content: String,
underscore: bool,
},
}
impl std::fmt::Display for Element {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Element::Text(s) => write!(f, "{s}"),
Element::Link { text, url } => write!(f, "[{text}]({url})"),
Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
Element::InlineImage { alt, url } => write!(f, ""),
Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
Element::LinkedImage {
alt,
img_source,
link_target,
} => {
let img_part = match img_source {
LinkedImageSource::Inline(url) => format!(""),
LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
};
match link_target {
LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
}
}
Element::FootnoteReference { note } => write!(f, "[^{note}]"),
Element::Strikethrough(s) => write!(f, "~~{s}~~"),
Element::WikiLink(s) => write!(f, "[[{s}]]"),
Element::InlineMath(s) => write!(f, "${s}$"),
Element::DisplayMath(s) => write!(f, "$${s}$$"),
Element::EmojiShortcode(s) => write!(f, ":{s}:"),
Element::Autolink(s) => write!(f, "{s}"),
Element::HtmlTag(s) => write!(f, "{s}"),
Element::HtmlEntity(s) => write!(f, "{s}"),
Element::HugoShortcode(s) => write!(f, "{s}"),
Element::AttrList(s) => write!(f, "{s}"),
Element::Code(s) => write!(f, "`{s}`"),
Element::Bold { content, underscore } => {
if *underscore {
write!(f, "__{content}__")
} else {
write!(f, "**{content}**")
}
}
Element::Italic { content, underscore } => {
if *underscore {
write!(f, "_{content}_")
} else {
write!(f, "*{content}*")
}
}
}
}
}
impl Element {
fn display_width(&self, mode: ReflowLengthMode) -> usize {
let formatted = format!("{self}");
display_len(&formatted, mode)
}
}
#[derive(Debug, Clone)]
struct EmphasisSpan {
start: usize,
end: usize,
content: String,
is_strong: bool,
is_strikethrough: bool,
uses_underscore: bool,
}
fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
let mut spans = Vec::new();
let mut options = Options::empty();
options.insert(Options::ENABLE_STRIKETHROUGH);
let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); let mut strong_stack: Vec<(usize, bool)> = Vec::new();
let mut strikethrough_stack: Vec<usize> = Vec::new();
let parser = Parser::new_ext(text, options).into_offset_iter();
for (event, range) in parser {
match event {
Event::Start(Tag::Emphasis) => {
let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
emphasis_stack.push((range.start, uses_underscore));
}
Event::End(TagEnd::Emphasis) => {
if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
let content_start = start_byte + 1;
let content_end = range.end - 1;
if content_end > content_start
&& let Some(content) = text.get(content_start..content_end)
{
spans.push(EmphasisSpan {
start: start_byte,
end: range.end,
content: content.to_string(),
is_strong: false,
is_strikethrough: false,
uses_underscore,
});
}
}
}
Event::Start(Tag::Strong) => {
let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
strong_stack.push((range.start, uses_underscore));
}
Event::End(TagEnd::Strong) => {
if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
let content_start = start_byte + 2;
let content_end = range.end - 2;
if content_end > content_start
&& let Some(content) = text.get(content_start..content_end)
{
spans.push(EmphasisSpan {
start: start_byte,
end: range.end,
content: content.to_string(),
is_strong: true,
is_strikethrough: false,
uses_underscore,
});
}
}
}
Event::Start(Tag::Strikethrough) => {
strikethrough_stack.push(range.start);
}
Event::End(TagEnd::Strikethrough) => {
if let Some(start_byte) = strikethrough_stack.pop() {
let content_start = start_byte + 2;
let content_end = range.end - 2;
if content_end > content_start
&& let Some(content) = text.get(content_start..content_end)
{
spans.push(EmphasisSpan {
start: start_byte,
end: range.end,
content: content.to_string(),
is_strong: false,
is_strikethrough: true,
uses_underscore: false,
});
}
}
}
_ => {}
}
}
spans.sort_by_key(|s| s.start);
spans
}
fn parse_markdown_elements(text: &str) -> Vec<Element> {
parse_markdown_elements_inner(text, false)
}
fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
parse_markdown_elements_inner(text, true)
}
fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
let mut elements = Vec::new();
let mut remaining = text;
let emphasis_spans = extract_emphasis_spans(text);
while !remaining.is_empty() {
let current_offset = text.len() - remaining.len();
let mut earliest_match: Option<(usize, usize, &str)> = None;
if remaining.contains("[!") {
if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
}
if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
}
if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
}
if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
}
}
if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "inline_image"));
}
if let Some(m) = REF_IMAGE_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "ref_image"));
}
if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "footnote_ref"));
}
if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "inline_link"));
}
if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "ref_link"));
}
if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
}
if let Some(m) = WIKI_LINK_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "wiki_link"));
}
if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "display_math"));
}
if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "inline_math"));
}
if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "emoji"));
}
if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "html_entity"));
}
if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
}
if let Some(m) = HTML_TAG_PATTERN.find(remaining)
&& earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
{
let matched_text = &remaining[m.start()..m.end()];
let is_url_autolink = matched_text.starts_with("<http://")
|| matched_text.starts_with("<https://")
|| matched_text.starts_with("<mailto:")
|| matched_text.starts_with("<ftp://")
|| matched_text.starts_with("<ftps://");
let is_email_autolink = {
let content = matched_text.trim_start_matches('<').trim_end_matches('>');
EMAIL_PATTERN.is_match(content)
};
if is_url_autolink || is_email_autolink {
earliest_match = Some((m.start(), m.end(), "autolink"));
} else {
earliest_match = Some((m.start(), m.end(), "html_tag"));
}
}
let mut next_special = remaining.len();
let mut special_type = "";
let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
let mut attr_list_len: usize = 0;
if let Some(pos) = remaining.find('`')
&& pos < next_special
{
next_special = pos;
special_type = "code";
}
if attr_lists
&& let Some(pos) = remaining.find('{')
&& pos < next_special
&& let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
&& m.start() == 0
{
next_special = pos;
special_type = "attr_list";
attr_list_len = m.end();
}
for span in &emphasis_spans {
if span.start >= current_offset && span.start < current_offset + remaining.len() {
let pos_in_remaining = span.start - current_offset;
if pos_in_remaining < next_special {
next_special = pos_in_remaining;
special_type = "pulldown_emphasis";
pulldown_emphasis = Some(span);
}
break; }
}
let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
pos < next_special
} else {
false
};
if should_process_markdown_link {
let (pos, match_end, pattern_type) = earliest_match.unwrap();
if pos > 0 {
elements.push(Element::Text(remaining[..pos].to_string()));
}
match pattern_type {
"linked_image_ii" => {
if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::LinkedImage {
alt: alt.to_string(),
img_source: LinkedImageSource::Inline(img_url.to_string()),
link_target: LinkedImageTarget::Inline(link_url.to_string()),
});
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("[".to_string()));
remaining = &remaining[1..];
}
}
"linked_image_ri" => {
if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::LinkedImage {
alt: alt.to_string(),
img_source: LinkedImageSource::Reference(img_ref.to_string()),
link_target: LinkedImageTarget::Inline(link_url.to_string()),
});
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("[".to_string()));
remaining = &remaining[1..];
}
}
"linked_image_ir" => {
if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::LinkedImage {
alt: alt.to_string(),
img_source: LinkedImageSource::Inline(img_url.to_string()),
link_target: LinkedImageTarget::Reference(link_ref.to_string()),
});
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("[".to_string()));
remaining = &remaining[1..];
}
}
"linked_image_rr" => {
if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::LinkedImage {
alt: alt.to_string(),
img_source: LinkedImageSource::Reference(img_ref.to_string()),
link_target: LinkedImageTarget::Reference(link_ref.to_string()),
});
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("[".to_string()));
remaining = &remaining[1..];
}
}
"inline_image" => {
if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::InlineImage {
alt: alt.to_string(),
url: url.to_string(),
});
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("!".to_string()));
remaining = &remaining[1..];
}
}
"ref_image" => {
if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
if reference.is_empty() {
elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
} else {
elements.push(Element::ReferenceImage {
alt: alt.to_string(),
reference: reference.to_string(),
});
}
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("!".to_string()));
remaining = &remaining[1..];
}
}
"footnote_ref" => {
if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::FootnoteReference { note: note.to_string() });
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("[".to_string()));
remaining = &remaining[1..];
}
}
"inline_link" => {
if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::Link {
text: text.to_string(),
url: url.to_string(),
});
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("[".to_string()));
remaining = &remaining[1..];
}
}
"ref_link" => {
if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
if reference.is_empty() {
elements.push(Element::EmptyReferenceLink { text: text.to_string() });
} else {
elements.push(Element::ReferenceLink {
text: text.to_string(),
reference: reference.to_string(),
});
}
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("[".to_string()));
remaining = &remaining[1..];
}
}
"shortcut_ref" => {
if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::ShortcutReference {
reference: reference.to_string(),
});
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("[".to_string()));
remaining = &remaining[1..];
}
}
"wiki_link" => {
if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::WikiLink(content.to_string()));
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("[[".to_string()));
remaining = &remaining[2..];
}
}
"display_math" => {
if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::DisplayMath(math.to_string()));
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("$$".to_string()));
remaining = &remaining[2..];
}
}
"inline_math" => {
if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::InlineMath(math.to_string()));
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text("$".to_string()));
remaining = &remaining[1..];
}
}
"emoji" => {
if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
elements.push(Element::EmojiShortcode(emoji.to_string()));
remaining = &remaining[match_end..];
} else {
elements.push(Element::Text(":".to_string()));
remaining = &remaining[1..];
}
}
"html_entity" => {
elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
remaining = &remaining[match_end..];
}
"hugo_shortcode" => {
elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
remaining = &remaining[match_end..];
}
"autolink" => {
elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
remaining = &remaining[match_end..];
}
"html_tag" => {
elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
remaining = &remaining[match_end..];
}
_ => {
elements.push(Element::Text("[".to_string()));
remaining = &remaining[1..];
}
}
} else {
if next_special > 0 && next_special < remaining.len() {
elements.push(Element::Text(remaining[..next_special].to_string()));
remaining = &remaining[next_special..];
}
match special_type {
"code" => {
if let Some(code_end) = remaining[1..].find('`') {
let code = &remaining[1..1 + code_end];
elements.push(Element::Code(code.to_string()));
remaining = &remaining[1 + code_end + 1..];
} else {
elements.push(Element::Text(remaining.to_string()));
break;
}
}
"attr_list" => {
elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
remaining = &remaining[attr_list_len..];
}
"pulldown_emphasis" => {
if let Some(span) = pulldown_emphasis {
let span_len = span.end - span.start;
if span.is_strikethrough {
elements.push(Element::Strikethrough(span.content.clone()));
} else if span.is_strong {
elements.push(Element::Bold {
content: span.content.clone(),
underscore: span.uses_underscore,
});
} else {
elements.push(Element::Italic {
content: span.content.clone(),
underscore: span.uses_underscore,
});
}
remaining = &remaining[span_len..];
} else {
elements.push(Element::Text(remaining[..1].to_string()));
remaining = &remaining[1..];
}
}
_ => {
elements.push(Element::Text(remaining.to_string()));
break;
}
}
}
}
elements
}
fn reflow_elements_sentence_per_line(
elements: &[Element],
custom_abbreviations: &Option<Vec<String>>,
require_sentence_capital: bool,
) -> Vec<String> {
let abbreviations = get_abbreviations(custom_abbreviations);
let mut lines = Vec::new();
let mut current_line = String::new();
for (idx, element) in elements.iter().enumerate() {
let element_str = format!("{element}");
if let Element::Text(text) = element {
let combined = format!("{current_line}{text}");
let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
if sentences.len() > 1 {
for (i, sentence) in sentences.iter().enumerate() {
if i == 0 {
let trimmed = sentence.trim();
if text_ends_with_abbreviation(trimmed, &abbreviations) {
current_line = sentence.to_string();
} else {
lines.push(sentence.to_string());
current_line.clear();
}
} else if i == sentences.len() - 1 {
let trimmed = sentence.trim();
let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
lines.push(sentence.to_string());
current_line.clear();
} else {
current_line = sentence.to_string();
}
} else {
lines.push(sentence.to_string());
}
}
} else {
let trimmed = combined.trim();
if trimmed.is_empty() {
continue;
}
let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
lines.push(trimmed.to_string());
current_line.clear();
} else {
current_line = combined;
}
}
} else if let Element::Italic { content, underscore } = element {
let marker = if *underscore { "_" } else { "*" };
handle_emphasis_sentence_split(
content,
marker,
&abbreviations,
require_sentence_capital,
&mut current_line,
&mut lines,
);
} else if let Element::Bold { content, underscore } = element {
let marker = if *underscore { "__" } else { "**" };
handle_emphasis_sentence_split(
content,
marker,
&abbreviations,
require_sentence_capital,
&mut current_line,
&mut lines,
);
} else if let Element::Strikethrough(content) = element {
handle_emphasis_sentence_split(
content,
"~~",
&abbreviations,
require_sentence_capital,
&mut current_line,
&mut lines,
);
} else {
let is_adjacent = if idx > 0 {
match &elements[idx - 1] {
Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
_ => true,
}
} else {
false
};
if !is_adjacent
&& !current_line.is_empty()
&& !current_line.ends_with(' ')
&& !current_line.ends_with('(')
&& !current_line.ends_with('[')
{
current_line.push(' ');
}
current_line.push_str(&element_str);
}
}
if !current_line.is_empty() {
lines.push(current_line.trim().to_string());
}
lines
}
fn handle_emphasis_sentence_split(
content: &str,
marker: &str,
abbreviations: &HashSet<String>,
require_sentence_capital: bool,
current_line: &mut String,
lines: &mut Vec<String>,
) {
let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
if sentences.len() <= 1 {
if !current_line.is_empty()
&& !current_line.ends_with(' ')
&& !current_line.ends_with('(')
&& !current_line.ends_with('[')
{
current_line.push(' ');
}
current_line.push_str(marker);
current_line.push_str(content);
current_line.push_str(marker);
let trimmed = content.trim();
let ends_with_punct = ends_with_sentence_punct(trimmed);
if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
lines.push(current_line.clone());
current_line.clear();
}
} else {
for (i, sentence) in sentences.iter().enumerate() {
let trimmed = sentence.trim();
if trimmed.is_empty() {
continue;
}
if i == 0 {
if !current_line.is_empty()
&& !current_line.ends_with(' ')
&& !current_line.ends_with('(')
&& !current_line.ends_with('[')
{
current_line.push(' ');
}
current_line.push_str(marker);
current_line.push_str(trimmed);
current_line.push_str(marker);
let ends_with_punct = ends_with_sentence_punct(trimmed);
if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
lines.push(current_line.clone());
current_line.clear();
}
} else if i == sentences.len() - 1 {
let ends_with_punct = ends_with_sentence_punct(trimmed);
let mut line = String::new();
line.push_str(marker);
line.push_str(trimmed);
line.push_str(marker);
if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
lines.push(line);
} else {
*current_line = line;
}
} else {
let mut line = String::new();
line.push_str(marker);
line.push_str(trimmed);
line.push_str(marker);
lines.push(line);
}
}
}
}
const BREAK_WORDS: &[&str] = &[
"and",
"or",
"but",
"nor",
"yet",
"so",
"for",
"which",
"that",
"because",
"when",
"if",
"while",
"where",
"although",
"though",
"unless",
"since",
"after",
"before",
"until",
"as",
"once",
"whether",
"however",
"therefore",
"moreover",
"furthermore",
"nevertheless",
"whereas",
];
fn is_clause_punctuation(c: char) -> bool {
matches!(c, ',' | ';' | ':' | '\u{2014}') }
fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
let mut spans = Vec::new();
let mut offset = 0;
for element in elements {
let rendered = format!("{element}");
let len = rendered.len();
if !matches!(element, Element::Text(_)) {
spans.push((offset, offset + len));
}
offset += len;
}
spans
}
fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
spans.iter().any(|(start, end)| pos > *start && pos < *end)
}
const MIN_SPLIT_RATIO: f64 = 0.3;
fn split_at_clause_punctuation(
text: &str,
line_length: usize,
element_spans: &[(usize, usize)],
length_mode: ReflowLengthMode,
) -> Option<(String, String)> {
let chars: Vec<char> = text.chars().collect();
let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
let mut width_acc = 0;
let mut search_end_char = 0;
for (idx, &c) in chars.iter().enumerate() {
let c_width = display_len(&c.to_string(), length_mode);
if width_acc + c_width > line_length {
break;
}
width_acc += c_width;
search_end_char = idx + 1;
}
let mut best_pos = None;
for i in (0..search_end_char).rev() {
if is_clause_punctuation(chars[i]) {
let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
if !is_inside_element(byte_pos, element_spans) {
best_pos = Some(i);
break;
}
}
}
let pos = best_pos?;
let first: String = chars[..=pos].iter().collect();
let first_display_len = display_len(&first, length_mode);
if first_display_len < min_first_len {
return None;
}
let rest: String = chars[pos + 1..].iter().collect();
let rest = rest.trim_start().to_string();
if rest.is_empty() {
return None;
}
Some((first, rest))
}
fn split_at_break_word(
text: &str,
line_length: usize,
element_spans: &[(usize, usize)],
length_mode: ReflowLengthMode,
) -> Option<(String, String)> {
let lower = text.to_lowercase();
let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
let mut best_split: Option<(usize, usize)> = None;
for &word in BREAK_WORDS {
let mut search_start = 0;
while let Some(pos) = lower[search_start..].find(word) {
let abs_pos = search_start + pos;
let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
if preceded_by_space && followed_by_space {
let first_part = text[..abs_pos].trim_end();
let first_part_len = display_len(first_part, length_mode);
if first_part_len >= min_first_len
&& first_part_len <= line_length
&& !is_inside_element(abs_pos, element_spans)
{
if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
best_split = Some((abs_pos, word.len()));
}
}
}
search_start = abs_pos + word.len();
}
}
let (byte_start, _word_len) = best_split?;
let first = text[..byte_start].trim_end().to_string();
let rest = text[byte_start..].to_string();
if first.is_empty() || rest.trim().is_empty() {
return None;
}
Some((first, rest))
}
fn cascade_split_line(
text: &str,
line_length: usize,
abbreviations: &Option<Vec<String>>,
length_mode: ReflowLengthMode,
attr_lists: bool,
) -> Vec<String> {
if line_length == 0 || display_len(text, length_mode) <= line_length {
return vec![text.to_string()];
}
let elements = parse_markdown_elements_inner(text, attr_lists);
let element_spans = compute_element_spans(&elements);
if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
let mut result = vec![first];
result.extend(cascade_split_line(
&rest,
line_length,
abbreviations,
length_mode,
attr_lists,
));
return result;
}
if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
let mut result = vec![first];
result.extend(cascade_split_line(
&rest,
line_length,
abbreviations,
length_mode,
attr_lists,
));
return result;
}
let options = ReflowOptions {
line_length,
break_on_sentences: false,
preserve_breaks: false,
sentence_per_line: false,
semantic_line_breaks: false,
abbreviations: abbreviations.clone(),
length_mode,
attr_lists,
require_sentence_capital: true,
max_list_continuation_indent: None,
};
reflow_elements(&elements, &options)
}
fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
let sentence_lines =
reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
if options.line_length == 0 {
return sentence_lines;
}
let length_mode = options.length_mode;
let mut result = Vec::new();
for line in sentence_lines {
if display_len(&line, length_mode) <= options.line_length {
result.push(line);
} else {
result.extend(cascade_split_line(
&line,
options.line_length,
&options.abbreviations,
length_mode,
options.attr_lists,
));
}
}
let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
let mut merged: Vec<String> = Vec::with_capacity(result.len());
for line in result {
if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
let prev_ends_at_sentence = {
let trimmed = merged.last().unwrap().trim_end();
trimmed
.chars()
.rev()
.find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
.is_some_and(|c| matches!(c, '.' | '!' | '?'))
};
if !prev_ends_at_sentence {
let prev = merged.last_mut().unwrap();
let combined = format!("{prev} {line}");
if display_len(&combined, length_mode) <= options.line_length {
*prev = combined;
continue;
}
}
}
merged.push(line);
}
merged
}
fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
line.char_indices()
.rev()
.map(|(pos, _)| pos)
.find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
}
fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
let mut lines = Vec::new();
let mut current_line = String::new();
let mut current_length = 0;
let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
let length_mode = options.length_mode;
for (idx, element) in elements.iter().enumerate() {
let element_str = format!("{element}");
let element_len = element.display_width(length_mode);
let is_adjacent_to_prev = if idx > 0 {
match (&elements[idx - 1], element) {
(Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
(_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
_ => true,
}
} else {
false
};
if let Element::Text(text) = element {
let has_leading_space = text.starts_with(char::is_whitespace);
let words: Vec<&str> = text.split_whitespace().collect();
for (i, word) in words.iter().enumerate() {
let word_len = display_len(word, length_mode);
let is_trailing_punct = word
.chars()
.all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
let is_first_adjacent = i == 0 && is_adjacent_to_prev;
if is_first_adjacent {
if current_length + word_len > options.line_length && current_length > 0 {
if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
let before = current_line[..last_space].trim_end().to_string();
let after = current_line[last_space + 1..].to_string();
lines.push(before);
current_line = format!("{after}{word}");
current_length = display_len(¤t_line, length_mode);
current_line_element_spans.clear();
} else {
current_line.push_str(word);
current_length += word_len;
}
} else {
current_line.push_str(word);
current_length += word_len;
}
} else if current_length > 0
&& current_length + 1 + word_len > options.line_length
&& !is_trailing_punct
{
lines.push(current_line.trim().to_string());
current_line = word.to_string();
current_length = word_len;
current_line_element_spans.clear();
} else {
if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
current_line.push(' ');
current_length += 1;
}
current_line.push_str(word);
current_length += word_len;
}
}
} else if matches!(
element,
Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
) && element_len > options.line_length
{
let (content, marker): (&str, &str) = match element {
Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
Element::Strikethrough(content) => (content.as_str(), "~~"),
_ => unreachable!(),
};
let words: Vec<&str> = content.split_whitespace().collect();
let n = words.len();
if n == 0 {
let full = format!("{marker}{marker}");
let full_len = display_len(&full, length_mode);
if !is_adjacent_to_prev && current_length > 0 {
current_line.push(' ');
current_length += 1;
}
current_line.push_str(&full);
current_length += full_len;
} else {
for (i, word) in words.iter().enumerate() {
let is_first = i == 0;
let is_last = i == n - 1;
let word_str: String = match (is_first, is_last) {
(true, true) => format!("{marker}{word}{marker}"),
(true, false) => format!("{marker}{word}"),
(false, true) => format!("{word}{marker}"),
(false, false) => word.to_string(),
};
let word_len = display_len(&word_str, length_mode);
let needs_space = if is_first {
!is_adjacent_to_prev && current_length > 0
} else {
current_length > 0
};
if needs_space && current_length + 1 + word_len > options.line_length {
lines.push(current_line.trim_end().to_string());
current_line = word_str;
current_length = word_len;
current_line_element_spans.clear();
} else {
if needs_space {
current_line.push(' ');
current_length += 1;
}
current_line.push_str(&word_str);
current_length += word_len;
}
}
}
} else {
if is_adjacent_to_prev {
if current_length + element_len > options.line_length {
if let Some(last_space) = rfind_safe_space(¤t_line, ¤t_line_element_spans) {
let before = current_line[..last_space].trim_end().to_string();
let after = current_line[last_space + 1..].to_string();
lines.push(before);
current_line = format!("{after}{element_str}");
current_length = display_len(¤t_line, length_mode);
current_line_element_spans.clear();
let start = after.len();
current_line_element_spans.push((start, start + element_str.len()));
} else {
let start = current_line.len();
current_line.push_str(&element_str);
current_length += element_len;
current_line_element_spans.push((start, current_line.len()));
}
} else {
let start = current_line.len();
current_line.push_str(&element_str);
current_length += element_len;
current_line_element_spans.push((start, current_line.len()));
}
} else if current_length > 0 && current_length + 1 + element_len > options.line_length {
lines.push(current_line.trim().to_string());
current_line = element_str.clone();
current_length = element_len;
current_line_element_spans.clear();
current_line_element_spans.push((0, element_str.len()));
} else {
let ends_with_opener =
current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
if current_length > 0 && !ends_with_opener {
current_line.push(' ');
current_length += 1;
}
let start = current_line.len();
current_line.push_str(&element_str);
current_length += element_len;
current_line_element_spans.push((start, current_line.len()));
}
}
}
if !current_line.is_empty() {
lines.push(current_line.trim_end().to_string());
}
lines
}
pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
let lines: Vec<&str> = content.lines().collect();
let mut result = Vec::new();
let mut i = 0;
while i < lines.len() {
let line = lines[i];
let trimmed = line.trim();
if trimmed.is_empty() {
result.push(String::new());
i += 1;
continue;
}
if trimmed.starts_with('#') {
result.push(line.to_string());
i += 1;
continue;
}
if trimmed.starts_with(":::") {
result.push(line.to_string());
i += 1;
continue;
}
if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
result.push(line.to_string());
i += 1;
while i < lines.len() {
result.push(lines[i].to_string());
if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
i += 1;
break;
}
i += 1;
}
continue;
}
if calculate_indentation_width_default(line) >= 4 {
result.push(line.to_string());
i += 1;
while i < lines.len() {
let next_line = lines[i];
if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
result.push(next_line.to_string());
i += 1;
} else {
break;
}
}
continue;
}
if trimmed.starts_with('>') {
let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
let quote_prefix = line[0..gt_pos + 1].to_string();
let quote_content = &line[quote_prefix.len()..].trim_start();
let reflowed = reflow_line(quote_content, options);
for reflowed_line in reflowed.iter() {
result.push(format!("{quote_prefix} {reflowed_line}"));
}
i += 1;
continue;
}
if is_horizontal_rule(trimmed) {
result.push(line.to_string());
i += 1;
continue;
}
if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
let indent = line.len() - line.trim_start().len();
let indent_str = " ".repeat(indent);
let mut marker_end = indent;
let mut content_start = indent;
if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
if let Some(period_pos) = line[indent..].find('.') {
marker_end = indent + period_pos + 1; content_start = marker_end;
while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
content_start += 1;
}
}
} else {
marker_end = indent + 1; content_start = marker_end;
while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
content_start += 1;
}
}
let min_continuation_indent = content_start;
let rest = &line[content_start..];
if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
marker_end = content_start + 3; content_start += 4; }
let marker = &line[indent..marker_end];
let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
i += 1;
while i < lines.len() {
let next_line = lines[i];
let next_trimmed = next_line.trim();
if is_block_boundary(next_trimmed) {
break;
}
let next_indent = next_line.len() - next_line.trim_start().len();
if next_indent >= min_continuation_indent {
let trimmed_start = next_line.trim_start();
list_content.push(trim_preserving_hard_break(trimmed_start));
i += 1;
} else {
break;
}
}
let combined_content = if options.preserve_breaks {
list_content[0].clone()
} else {
let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
if has_hard_breaks {
list_content.join("\n")
} else {
list_content.join(" ")
}
};
let trimmed_marker = marker;
let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
indent + (content_start - indent).min(max_indent)
} else {
content_start
};
let prefix_length = indent + trimmed_marker.len() + 1;
let adjusted_options = ReflowOptions {
line_length: options.line_length.saturating_sub(prefix_length),
..options.clone()
};
let reflowed = reflow_line(&combined_content, &adjusted_options);
for (j, reflowed_line) in reflowed.iter().enumerate() {
if j == 0 {
result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
} else {
let continuation_indent = " ".repeat(continuation_spaces);
result.push(format!("{continuation_indent}{reflowed_line}"));
}
}
continue;
}
if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
result.push(line.to_string());
i += 1;
continue;
}
if trimmed.starts_with('[') && line.contains("]:") {
result.push(line.to_string());
i += 1;
continue;
}
if is_definition_list_item(trimmed) {
result.push(line.to_string());
i += 1;
continue;
}
let mut is_single_line_paragraph = true;
if i + 1 < lines.len() {
let next_trimmed = lines[i + 1].trim();
if !is_block_boundary(next_trimmed) {
is_single_line_paragraph = false;
}
}
if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
result.push(line.to_string());
i += 1;
continue;
}
let mut paragraph_parts = Vec::new();
let mut current_part = vec![line];
i += 1;
if options.preserve_breaks {
let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
Some("\\")
} else if line.ends_with(" ") {
Some(" ")
} else {
None
};
let reflowed = reflow_line(line, options);
if let Some(break_marker) = hard_break_type {
if !reflowed.is_empty() {
let mut reflowed_with_break = reflowed;
let last_idx = reflowed_with_break.len() - 1;
if !has_hard_break(&reflowed_with_break[last_idx]) {
reflowed_with_break[last_idx].push_str(break_marker);
}
result.extend(reflowed_with_break);
}
} else {
result.extend(reflowed);
}
} else {
while i < lines.len() {
let prev_line = if !current_part.is_empty() {
current_part.last().unwrap()
} else {
""
};
let next_line = lines[i];
let next_trimmed = next_line.trim();
if is_block_boundary(next_trimmed) {
break;
}
let prev_trimmed = prev_line.trim();
let abbreviations = get_abbreviations(&options.abbreviations);
let ends_with_sentence = (prev_trimmed.ends_with('.')
|| prev_trimmed.ends_with('!')
|| prev_trimmed.ends_with('?')
|| prev_trimmed.ends_with(".*")
|| prev_trimmed.ends_with("!*")
|| prev_trimmed.ends_with("?*")
|| prev_trimmed.ends_with("._")
|| prev_trimmed.ends_with("!_")
|| prev_trimmed.ends_with("?_")
|| prev_trimmed.ends_with(".\"")
|| prev_trimmed.ends_with("!\"")
|| prev_trimmed.ends_with("?\"")
|| prev_trimmed.ends_with(".'")
|| prev_trimmed.ends_with("!'")
|| prev_trimmed.ends_with("?'")
|| prev_trimmed.ends_with(".\u{201D}")
|| prev_trimmed.ends_with("!\u{201D}")
|| prev_trimmed.ends_with("?\u{201D}")
|| prev_trimmed.ends_with(".\u{2019}")
|| prev_trimmed.ends_with("!\u{2019}")
|| prev_trimmed.ends_with("?\u{2019}"))
&& !text_ends_with_abbreviation(
prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
&abbreviations,
);
if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
paragraph_parts.push(current_part.join(" "));
current_part = vec![next_line];
} else {
current_part.push(next_line);
}
i += 1;
}
if !current_part.is_empty() {
if current_part.len() == 1 {
paragraph_parts.push(current_part[0].to_string());
} else {
paragraph_parts.push(current_part.join(" "));
}
}
for (j, part) in paragraph_parts.iter().enumerate() {
let reflowed = reflow_line(part, options);
result.extend(reflowed);
if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
let last_idx = result.len() - 1;
if !has_hard_break(&result[last_idx]) {
result[last_idx].push_str(" ");
}
}
}
}
}
let result_text = result.join("\n");
if content.ends_with('\n') && !result_text.ends_with('\n') {
format!("{result_text}\n")
} else {
result_text
}
}
#[derive(Debug, Clone)]
pub struct ParagraphReflow {
pub start_byte: usize,
pub end_byte: usize,
pub reflowed_text: String,
}
#[derive(Debug, Clone)]
pub struct BlockquoteLineData {
pub(crate) content: String,
pub(crate) is_explicit: bool,
pub(crate) prefix: Option<String>,
}
impl BlockquoteLineData {
pub fn explicit(content: String, prefix: String) -> Self {
Self {
content,
is_explicit: true,
prefix: Some(prefix),
}
}
pub fn lazy(content: String) -> Self {
Self {
content,
is_explicit: false,
prefix: None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BlockquoteContinuationStyle {
Explicit,
Lazy,
}
pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
let mut explicit_count = 0usize;
let mut lazy_count = 0usize;
for line in lines.iter().skip(1) {
if line.is_explicit {
explicit_count += 1;
} else {
lazy_count += 1;
}
}
if explicit_count > 0 && lazy_count == 0 {
BlockquoteContinuationStyle::Explicit
} else if lazy_count > 0 && explicit_count == 0 {
BlockquoteContinuationStyle::Lazy
} else if explicit_count >= lazy_count {
BlockquoteContinuationStyle::Explicit
} else {
BlockquoteContinuationStyle::Lazy
}
}
pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
for (idx, line) in lines.iter().enumerate() {
let Some(prefix) = line.prefix.as_ref() else {
continue;
};
counts
.entry(prefix.clone())
.and_modify(|entry| entry.0 += 1)
.or_insert((1, idx));
}
counts
.into_iter()
.max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
})
.map(|(prefix, _)| prefix)
.unwrap_or_else(|| fallback.to_string())
}
pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
let trimmed = content_line.trim_start();
trimmed.starts_with('>')
|| trimmed.starts_with('#')
|| trimmed.starts_with("```")
|| trimmed.starts_with("~~~")
|| is_unordered_list_marker(trimmed)
|| is_numbered_list_item(trimmed)
|| is_horizontal_rule(trimmed)
|| is_definition_list_item(trimmed)
|| (trimmed.starts_with('[') && trimmed.contains("]:"))
|| trimmed.starts_with(":::")
|| (trimmed.starts_with('<')
&& !trimmed.starts_with("<http")
&& !trimmed.starts_with("<https")
&& !trimmed.starts_with("<mailto:"))
}
pub fn reflow_blockquote_content(
lines: &[BlockquoteLineData],
explicit_prefix: &str,
continuation_style: BlockquoteContinuationStyle,
options: &ReflowOptions,
) -> Vec<String> {
let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
let segments = split_into_segments_strs(&content_strs);
let mut reflowed_content_lines: Vec<String> = Vec::new();
for segment in segments {
let hard_break_type = segment.last().and_then(|&line| {
let line = line.strip_suffix('\r').unwrap_or(line);
if line.ends_with('\\') {
Some("\\")
} else if line.ends_with(" ") {
Some(" ")
} else {
None
}
});
let pieces: Vec<&str> = segment
.iter()
.map(|&line| {
if let Some(l) = line.strip_suffix('\\') {
l.trim_end()
} else if let Some(l) = line.strip_suffix(" ") {
l.trim_end()
} else {
line.trim_end()
}
})
.collect();
let segment_text = pieces.join(" ");
let segment_text = segment_text.trim();
if segment_text.is_empty() {
continue;
}
let mut reflowed = reflow_line(segment_text, options);
if let Some(break_marker) = hard_break_type
&& !reflowed.is_empty()
{
let last_idx = reflowed.len() - 1;
if !has_hard_break(&reflowed[last_idx]) {
reflowed[last_idx].push_str(break_marker);
}
}
reflowed_content_lines.extend(reflowed);
}
let mut styled_lines: Vec<String> = Vec::new();
for (idx, line) in reflowed_content_lines.iter().enumerate() {
let force_explicit = idx == 0
|| continuation_style == BlockquoteContinuationStyle::Explicit
|| should_force_explicit_blockquote_line(line);
if force_explicit {
styled_lines.push(format!("{explicit_prefix}{line}"));
} else {
styled_lines.push(line.clone());
}
}
styled_lines
}
fn is_blockquote_content_boundary(content: &str) -> bool {
let trimmed = content.trim();
trimmed.is_empty()
|| is_block_boundary(trimmed)
|| crate::utils::table_utils::TableUtils::is_potential_table_row(content)
|| trimmed.starts_with(":::")
|| crate::utils::is_template_directive_only(content)
|| is_standalone_attr_list(content)
|| is_snippet_block_delimiter(content)
}
fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
let mut segments = Vec::new();
let mut current = Vec::new();
for &line in lines {
current.push(line);
if has_hard_break(line) {
segments.push(current);
current = Vec::new();
}
}
if !current.is_empty() {
segments.push(current);
}
segments
}
fn reflow_blockquote_paragraph_at_line(
content: &str,
lines: &[&str],
target_idx: usize,
options: &ReflowOptions,
) -> Option<ParagraphReflow> {
let mut anchor_idx = target_idx;
let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
parsed.nesting_level
} else {
let mut found = None;
let mut idx = target_idx;
loop {
if lines[idx].trim().is_empty() {
break;
}
if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
found = Some((idx, parsed.nesting_level));
break;
}
if idx == 0 {
break;
}
idx -= 1;
}
let (idx, level) = found?;
anchor_idx = idx;
level
};
let mut para_start = anchor_idx;
while para_start > 0 {
let prev_idx = para_start - 1;
let prev_line = lines[prev_idx];
if prev_line.trim().is_empty() {
break;
}
if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
break;
}
para_start = prev_idx;
continue;
}
let prev_lazy = prev_line.trim_start();
if is_blockquote_content_boundary(prev_lazy) {
break;
}
para_start = prev_idx;
}
while para_start < lines.len() {
let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
para_start += 1;
continue;
};
target_level = parsed.nesting_level;
break;
}
if para_start >= lines.len() || para_start > target_idx {
return None;
}
let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
let mut idx = para_start;
while idx < lines.len() {
if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
break;
}
let line = lines[idx];
if line.trim().is_empty() {
break;
}
if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
break;
}
collected.push((
idx,
BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
));
idx += 1;
continue;
}
let lazy_content = line.trim_start();
if is_blockquote_content_boundary(lazy_content) {
break;
}
collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
idx += 1;
}
if collected.is_empty() {
return None;
}
let para_end = collected[collected.len() - 1].0;
if target_idx < para_start || target_idx > para_end {
return None;
}
let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
let fallback_prefix = line_data
.iter()
.find_map(|d| d.prefix.clone())
.unwrap_or_else(|| "> ".to_string());
let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
let continuation_style = blockquote_continuation_style(&line_data);
let adjusted_line_length = options
.line_length
.saturating_sub(display_len(&explicit_prefix, options.length_mode))
.max(1);
let adjusted_options = ReflowOptions {
line_length: adjusted_line_length,
..options.clone()
};
let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
if styled_lines.is_empty() {
return None;
}
let mut start_byte = 0;
for line in lines.iter().take(para_start) {
start_byte += line.len() + 1;
}
let mut end_byte = start_byte;
for line in lines.iter().take(para_end + 1).skip(para_start) {
end_byte += line.len() + 1;
}
let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
if !includes_trailing_newline {
end_byte -= 1;
}
let reflowed_joined = styled_lines.join("\n");
let reflowed_text = if includes_trailing_newline {
if reflowed_joined.ends_with('\n') {
reflowed_joined
} else {
format!("{reflowed_joined}\n")
}
} else if reflowed_joined.ends_with('\n') {
reflowed_joined.trim_end_matches('\n').to_string()
} else {
reflowed_joined
};
Some(ParagraphReflow {
start_byte,
end_byte,
reflowed_text,
})
}
pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
}
pub fn reflow_paragraph_at_line_with_mode(
content: &str,
line_number: usize,
line_length: usize,
length_mode: ReflowLengthMode,
) -> Option<ParagraphReflow> {
let options = ReflowOptions {
line_length,
length_mode,
..Default::default()
};
reflow_paragraph_at_line_with_options(content, line_number, &options)
}
pub fn reflow_paragraph_at_line_with_options(
content: &str,
line_number: usize,
options: &ReflowOptions,
) -> Option<ParagraphReflow> {
if line_number == 0 {
return None;
}
let lines: Vec<&str> = content.lines().collect();
if line_number > lines.len() {
return None;
}
let target_idx = line_number - 1; let target_line = lines[target_idx];
let trimmed = target_line.trim();
if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
return Some(blockquote_reflow);
}
if is_paragraph_boundary(trimmed, target_line) {
return None;
}
let mut para_start = target_idx;
while para_start > 0 {
let prev_idx = para_start - 1;
let prev_line = lines[prev_idx];
let prev_trimmed = prev_line.trim();
if is_paragraph_boundary(prev_trimmed, prev_line) {
break;
}
para_start = prev_idx;
}
let mut para_end = target_idx;
while para_end + 1 < lines.len() {
let next_idx = para_end + 1;
let next_line = lines[next_idx];
let next_trimmed = next_line.trim();
if is_paragraph_boundary(next_trimmed, next_line) {
break;
}
para_end = next_idx;
}
let paragraph_lines = &lines[para_start..=para_end];
let mut start_byte = 0;
for line in lines.iter().take(para_start) {
start_byte += line.len() + 1; }
let mut end_byte = start_byte;
for line in paragraph_lines.iter() {
end_byte += line.len() + 1; }
let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
if !includes_trailing_newline {
end_byte -= 1;
}
let paragraph_text = paragraph_lines.join("\n");
let reflowed = reflow_markdown(¶graph_text, options);
let reflowed_text = if includes_trailing_newline {
if reflowed.ends_with('\n') {
reflowed
} else {
format!("{reflowed}\n")
}
} else {
if reflowed.ends_with('\n') {
reflowed.trim_end_matches('\n').to_string()
} else {
reflowed
}
};
Some(ParagraphReflow {
start_byte,
end_byte,
reflowed_text,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_helper_function_text_ends_with_abbreviation() {
let abbreviations = get_abbreviations(&None);
assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
assert!(!text_ends_with_abbreviation("items.", &abbreviations));
assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); assert!(!text_ends_with_abbreviation("word", &abbreviations)); assert!(!text_ends_with_abbreviation("", &abbreviations)); }
#[test]
fn test_is_unordered_list_marker() {
assert!(is_unordered_list_marker("- item"));
assert!(is_unordered_list_marker("* item"));
assert!(is_unordered_list_marker("+ item"));
assert!(is_unordered_list_marker("-")); assert!(is_unordered_list_marker("*"));
assert!(is_unordered_list_marker("+"));
assert!(!is_unordered_list_marker("---")); assert!(!is_unordered_list_marker("***")); assert!(!is_unordered_list_marker("- - -")); assert!(!is_unordered_list_marker("* * *")); assert!(!is_unordered_list_marker("*emphasis*")); assert!(!is_unordered_list_marker("-word")); assert!(!is_unordered_list_marker("")); assert!(!is_unordered_list_marker("text")); assert!(!is_unordered_list_marker("# heading")); }
#[test]
fn test_is_block_boundary() {
assert!(is_block_boundary("")); assert!(is_block_boundary("# Heading")); assert!(is_block_boundary("## Level 2")); assert!(is_block_boundary("```rust")); assert!(is_block_boundary("~~~")); assert!(is_block_boundary("> quote")); assert!(is_block_boundary("| cell |")); assert!(is_block_boundary("[link]: http://example.com")); assert!(is_block_boundary("---")); assert!(is_block_boundary("***")); assert!(is_block_boundary("- item")); assert!(is_block_boundary("* item")); assert!(is_block_boundary("+ item")); assert!(is_block_boundary("1. item")); assert!(is_block_boundary("10. item")); assert!(is_block_boundary(": definition")); assert!(is_block_boundary(":::")); assert!(is_block_boundary("::::: {.callout-note}"));
assert!(!is_block_boundary("regular text"));
assert!(!is_block_boundary("*emphasis*")); assert!(!is_block_boundary("[link](url)")); assert!(!is_block_boundary("some words here"));
}
#[test]
fn test_definition_list_boundary_in_single_line_paragraph() {
let options = ReflowOptions {
line_length: 80,
..Default::default()
};
let input = "Term\n: Definition of the term";
let result = reflow_markdown(input, &options);
assert!(
result.contains(": Definition"),
"Definition list item should not be merged into previous line. Got: {result:?}"
);
let lines: Vec<&str> = result.lines().collect();
assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
assert_eq!(lines[0], "Term");
assert_eq!(lines[1], ": Definition of the term");
}
#[test]
fn test_is_paragraph_boundary() {
assert!(is_paragraph_boundary("# Heading", "# Heading"));
assert!(is_paragraph_boundary("- item", "- item"));
assert!(is_paragraph_boundary(":::", ":::"));
assert!(is_paragraph_boundary(": definition", ": definition"));
assert!(is_paragraph_boundary("code", " code"));
assert!(is_paragraph_boundary("code", "\tcode"));
assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
assert!(is_paragraph_boundary("a | b", "a | b"));
assert!(!is_paragraph_boundary("regular text", "regular text"));
assert!(!is_paragraph_boundary("text", " text")); }
#[test]
fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
let result = reflow_paragraph_at_line(content, 3, 80);
assert!(result.is_none(), "Div marker line should not be reflowed");
}
}