use regex::Regex;
use std::collections::HashMap;
use std::fmt::Write as _;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{LazyLock, Mutex};
use std::time::{Duration, Instant};
use comrak::nodes::{AstNode, ListType, NodeValue, TableAlignment};
use comrak::{Arena, Options};
use crate::config::{DEFAULT_MIN_LINE_LEN, ListSpacing};
use crate::formatter::markdown::flowmark_comrak_options;
use crate::parser::frontmatter::split_frontmatter;
use crate::transform::cleanups::doc_cleanups;
use crate::typography::ellipses::ellipses as apply_ellipses;
use crate::typography::quotes::smart_quotes;
use crate::wrapping::LineWrapper;
use crate::wrapping::line_wrappers::{line_wrap_by_sentence, line_wrap_to_width};
use crate::wrapping::tag_handling::preprocess_tag_block_spacing;
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct FillPerfStats {
pub files: u64,
pub preprocess_ns: u128,
pub parse_ns: u128,
pub transforms_ns: u128,
pub render_ns: u128,
pub postprocess_ns: u128,
}
impl FillPerfStats {
pub fn total_ns(self) -> u128 {
self.preprocess_ns
+ self.parse_ns
+ self.transforms_ns
+ self.render_ns
+ self.postprocess_ns
}
}
#[derive(Debug, Default, Clone, Copy)]
struct FillPerfSample {
preprocess: Duration,
parse: Duration,
transforms: Duration,
render: Duration,
postprocess: Duration,
}
impl FillPerfStats {
fn add_sample(&mut self, sample: FillPerfSample) {
self.files += 1;
self.preprocess_ns += sample.preprocess.as_nanos();
self.parse_ns += sample.parse.as_nanos();
self.transforms_ns += sample.transforms.as_nanos();
self.render_ns += sample.render.as_nanos();
self.postprocess_ns += sample.postprocess.as_nanos();
}
}
static PERF_STATS_ENABLED: AtomicBool = AtomicBool::new(false);
static PERF_STATS: Mutex<FillPerfStats> = Mutex::new(FillPerfStats {
files: 0,
preprocess_ns: 0,
parse_ns: 0,
transforms_ns: 0,
render_ns: 0,
postprocess_ns: 0,
});
pub fn set_fill_perf_stats_enabled(enabled: bool) {
PERF_STATS_ENABLED.store(enabled, Ordering::Relaxed);
}
pub fn reset_fill_perf_stats() {
if let Ok(mut stats) = PERF_STATS.lock() {
*stats = FillPerfStats::default();
}
}
pub fn get_fill_perf_stats() -> FillPerfStats {
if let Ok(stats) = PERF_STATS.lock() { *stats } else { FillPerfStats::default() }
}
fn record_fill_perf_sample(sample: FillPerfSample) {
if let Ok(mut stats) = PERF_STATS.lock() {
stats.add_sample(sample);
}
}
const REF_LABEL_START: char = '\u{F000}';
const REF_LABEL_SEP: char = '\u{F001}';
static LINK_REF_DEF: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r#"(?m)^[ \t]{0,3}\[([^\]]+)\]:[ \t]+<?([^\s>]+)>?(?:[ \t]+(?:"([^"]*)"|'([^']*)'|\(([^)]*)\)))?[ \t]*$"#,
)
.expect("valid LINK_REF_DEF regex")
});
static FULL_REF_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[([^\]]+)\]").expect("valid FULL_REF_LINK regex"));
static BADGE_FULL_REF_LINK: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\[(!\[[^\]]*\]\([^)]*\))\]\[([^\]]+)\]").expect("valid BADGE_FULL_REF_LINK regex")
});
static BADGE_COLLAPSED_REF_LINK: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\[(!\[[^\]]*\]\([^)]*\))\]\[\]").expect("valid BADGE_COLLAPSED_REF_LINK regex")
});
static BADGE_SHORTCUT_REF_LINK: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\[(!\[[^\]]*\]\([^)]*\))\]([^\[(:]|$)")
.expect("valid BADGE_SHORTCUT_REF_LINK regex")
});
static COLLAPSED_REF_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[\]").expect("valid COLLAPSED_REF_LINK regex"));
static SHORTCUT_REF_LINK: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\[([^\]]+)\]([^\[(:]|$)").expect("valid SHORTCUT_REF_LINK regex")
});
static IMAGE_FULL_REF: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\[([^\]]+)\]").expect("valid IMAGE_FULL_REF regex"));
static IMAGE_COLLAPSED_REF: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"!\[([^\]]+)\]\[\]").expect("valid IMAGE_COLLAPSED_REF regex"));
static IMAGE_SHORTCUT_REF: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"!\[([^\]]+)\]([^\[(:]|$)").expect("valid IMAGE_SHORTCUT_REF regex")
});
static BLANK_LINE_WS: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?m)^[ \t]+$").expect("valid BLANK_LINE_WS regex"));
static CODE_FENCE_SPACE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?m)^([ \t]*```)[^\S\n]+(\w)").expect("valid CODE_FENCE_SPACE regex")
});
static NUMBERED_ITEM_TWO_SPACES: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^(\d+)\. ").expect("valid NUMBERED_ITEM_TWO_SPACES regex"));
fn normalize_blank_lines(text: &str) -> String {
BLANK_LINE_WS.replace_all(text, "").into_owned()
}
fn normalize_code_fences(text: &str) -> String {
if !text.contains("```") {
return text.to_string();
}
CODE_FENCE_SPACE.replace_all(text, "$1$2").into_owned()
}
fn normalize_numbered_lists(text: &str) -> String {
if !text.contains(". ") {
return text.to_string();
}
let mut result = String::new();
for line in text.lines() {
if let Some(caps) = NUMBERED_ITEM_TWO_SPACES.captures(line) {
let num = &caps[1];
let fixed = line.replacen(&format!("{num}. "), &format!("{num}. "), 1);
result.push_str(&fixed);
} else {
result.push_str(line);
}
result.push('\n');
}
if !text.ends_with('\n') && result.ends_with('\n') {
result.pop();
}
result
}
fn normalize_comrak_output(text: &str) -> String {
let text = normalize_blank_lines(text);
let text = normalize_code_fences(&text);
let text = normalize_numbered_lists(&text);
collapse_blank_lines_outside_code(&text)
}
fn is_closing_fence(trimmed: &str, fence_str: &str) -> bool {
if fence_str.is_empty() || !trimmed.starts_with(fence_str) {
return false;
}
let fence_char = fence_str.chars().next().unwrap_or('`');
trimmed[fence_str.len()..].chars().all(|c| c == fence_char || c.is_whitespace())
}
fn detect_opening_fence(trimmed: &str) -> Option<String> {
let is_backtick_fence = trimmed.starts_with("```");
let is_tilde_fence = trimmed.starts_with("~~~");
if is_backtick_fence || is_tilde_fence {
let fence_char = if is_backtick_fence { '`' } else { '~' };
let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count();
Some(std::iter::repeat_n(fence_char, fence_len).collect())
} else {
None
}
}
fn transform_outside_code_fences<F>(text: &str, mut process_outside: F) -> String
where
F: FnMut(&str) -> Vec<String>,
{
let lines: Vec<&str> = text.lines().collect();
let had_trailing_newline = text.ends_with('\n');
let mut result: Vec<String> = Vec::new();
let mut in_code = false;
let mut fence_str = String::new();
for line in &lines {
if in_code {
result.push((*line).to_string());
if is_closing_fence(line.trim(), &fence_str) {
in_code = false;
}
} else if let Some(fs) = detect_opening_fence(line.trim()) {
fence_str = fs;
in_code = true;
result.push((*line).to_string());
} else {
result.extend(process_outside(line));
}
}
let mut output = result.join("\n");
if had_trailing_newline {
output.push('\n');
}
output
}
fn collapse_blank_lines_outside_code(text: &str) -> String {
let lines: Vec<&str> = text.lines().collect();
let had_trailing_newline = text.ends_with('\n');
let mut result: Vec<&str> = Vec::new();
let mut in_code = false;
let mut fence_str = String::new();
let mut consecutive_empty: usize = 0;
for line in &lines {
if in_code {
result.push(line);
if is_closing_fence(line.trim(), &fence_str) {
in_code = false;
consecutive_empty = 0;
}
} else if let Some(fs) = detect_opening_fence(line.trim()) {
fence_str = fs;
in_code = true;
consecutive_empty = 0;
result.push(line);
} else if line.trim().is_empty() {
consecutive_empty += 1;
if consecutive_empty <= 1 {
result.push(line);
}
} else {
consecutive_empty = 0;
result.push(line);
}
}
let mut output = result.join("\n");
if had_trailing_newline {
output.push('\n');
}
output
}
const AUTOLINK_OPEN: char = '\u{F003}';
const AUTOLINK_CLOSE: char = '\u{F004}';
const ENTITY_AMP: char = '\u{F005}';
static HTML_ENTITY_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"&(?:[a-zA-Z][a-zA-Z0-9]*|#[0-9]+|#x[0-9a-fA-F]+);").expect("valid regex")
});
static ANGLE_AUTOLINK_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"<((?:https?|ftp|mailto):[^\s>]+|[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})>",
)
.expect("valid ANGLE_AUTOLINK_RE regex")
});
fn protect_autolinks(text: &str) -> String {
let lines: Vec<&str> = text.lines().collect();
let had_trailing_newline = text.ends_with('\n');
let mut result_lines: Vec<String> = Vec::new();
let mut in_code = false;
let mut fence_str = String::new();
let mut in_html_comment = false;
for line in &lines {
if in_code {
result_lines.push((*line).to_string());
if is_closing_fence(line.trim(), &fence_str) {
in_code = false;
}
continue;
}
if in_html_comment {
result_lines.push((*line).to_string());
if line.contains("-->") {
in_html_comment = false;
}
continue;
}
if let Some(fs) = detect_opening_fence(line.trim()) {
fence_str = fs;
in_code = true;
result_lines.push((*line).to_string());
continue;
}
if line.trim().starts_with(FNDEF_MARKER_START)
|| line.trim().starts_with(REFDEF_MARKER_PREFIX)
{
result_lines.push((*line).to_string());
if !line.contains("-->") {
in_html_comment = true;
}
continue;
}
let replaced = ANGLE_AUTOLINK_RE.replace_all(line, |caps: ®ex::Captures| {
format!("{AUTOLINK_OPEN}{}{AUTOLINK_CLOSE}", &caps[1])
});
result_lines.push(replaced.into_owned());
}
let mut output = result_lines.join("\n");
if had_trailing_newline && !output.ends_with('\n') {
output.push('\n');
}
output
}
fn restore_autolinks(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut chars = text.chars().peekable();
while let Some(ch) = chars.next() {
if ch == AUTOLINK_OPEN {
let mut url = String::new();
for inner_ch in chars.by_ref() {
if inner_ch == AUTOLINK_CLOSE {
break;
}
url.push(inner_ch);
}
result.push('<');
result.push_str(&url);
result.push('>');
} else {
result.push(ch);
}
}
result
}
fn protect_html_entities(text: &str) -> String {
transform_outside_code_fences(text, |line| {
let replaced = HTML_ENTITY_RE.replace_all(line, |caps: ®ex::Captures| {
format!("{ENTITY_AMP}{}", &caps[0][1..])
});
vec![replaced.into_owned()]
})
}
fn restore_html_entities(text: &str) -> String {
text.replace(ENTITY_AMP, "&")
}
const REFDEF_MARKER_PREFIX: &str = "<!-- \u{F002}REFDEF:";
static FOOTNOTE_DEF_START: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^[ \t]{0,3}\[\^([^\]]+)\]:[ \t]+").expect("valid FOOTNOTE_DEF_START regex")
});
fn extract_link_ref_defs(text: &str) -> (HashMap<String, String>, String) {
let mut defs: HashMap<String, String> = HashMap::new();
let result = transform_outside_code_fences(text, |line| {
if let Some(caps) = LINK_REF_DEF.captures(line) {
let label = &caps[1];
if label.starts_with('^') {
return vec![line.to_string()];
}
let url = caps.get(2).map_or("", |m| m.as_str());
let title = caps
.get(3)
.or_else(|| caps.get(4))
.or_else(|| caps.get(5))
.map_or("", |m| m.as_str());
let destination =
if title.is_empty() { url.to_string() } else { format!("{url} \"{title}\"") };
defs.insert(label.to_lowercase(), destination);
vec![format!("{REFDEF_MARKER_PREFIX}{line} -->")]
} else {
vec![line.to_string()]
}
});
(defs, result)
}
fn lowercase_refdef_label(def: &str) -> String {
if let Some(caps) = LINK_REF_DEF.captures(def) {
if let Some(label) = caps.get(1) {
let mut result = String::with_capacity(def.len());
result.push_str(&def[..label.start()]);
result.push_str(&label.as_str().to_lowercase());
result.push_str(&def[label.end()..]);
return result;
}
}
def.to_string()
}
const FNDEF_MARKER_START: &str = "<!-- \u{F002}FNDEF";
fn extract_footnote_defs(text: &str) -> String {
let lines: Vec<&str> = text.lines().collect();
let had_trailing_newline = text.ends_with('\n');
let mut result_lines: Vec<String> = Vec::new();
let mut in_code = false;
let mut fence_str = String::new();
let mut i = 0;
while i < lines.len() {
let line = lines[i];
if in_code {
if is_closing_fence(line.trim(), &fence_str) {
in_code = false;
}
result_lines.push(line.to_string());
i += 1;
continue;
}
if let Some(fs) = detect_opening_fence(line.trim()) {
fence_str = fs;
in_code = true;
result_lines.push(line.to_string());
i += 1;
continue;
}
if FOOTNOTE_DEF_START.is_match(line) {
let mut def_lines = vec![line.to_string()];
let mut j = i + 1;
while j < lines.len() {
let cont = lines[j];
if cont.starts_with(" ") || cont.starts_with('\t') || cont.trim().is_empty() {
def_lines.push(cont.to_string());
j += 1;
} else {
break;
}
}
let mut trailing_blanks = 0;
while def_lines.last().is_some_and(|l| l.trim().is_empty()) {
def_lines.pop();
trailing_blanks += 1;
}
result_lines.push(FNDEF_MARKER_START.to_string());
for dl in &def_lines {
result_lines.push(dl.clone());
}
result_lines.push("-->".to_string());
for _ in 0..trailing_blanks {
result_lines.push(String::new());
}
i = j;
} else {
result_lines.push(line.to_string());
i += 1;
}
}
let mut output = result_lines.join("\n");
if had_trailing_newline && !output.ends_with('\n') {
output.push('\n');
}
output
}
fn replace_until_stable<F>(text: &mut String, re: &Regex, replacer: F)
where
F: Fn(®ex::Captures) -> String,
{
loop {
let new = re.replace(text.as_str(), &replacer);
if new == *text {
break;
}
*text = new.into_owned();
}
}
fn badge_alt_lowercase(image_markdown: &str) -> Option<String> {
let stripped = image_markdown.strip_prefix("![")?;
let alt_end = stripped.find(']')?;
Some(stripped[..alt_end].to_lowercase())
}
fn encode_hex_label(label: &str) -> String {
use std::fmt::Write;
let mut out = String::with_capacity(label.len() * 2);
for b in label.as_bytes() {
let _ = write!(out, "{b:02x}");
}
out
}
fn decode_hex_label(hex: &str) -> Option<String> {
if hex.is_empty() || hex.len() % 2 != 0 {
return None;
}
let mut bytes = Vec::with_capacity(hex.len() / 2);
for chunk in hex.as_bytes().chunks(2) {
let high = (chunk[0] as char).to_digit(16)?;
let low = (chunk[1] as char).to_digit(16)?;
let byte = u8::try_from((high << 4) | low).ok()?;
bytes.push(byte);
}
String::from_utf8(bytes).ok()
}
fn encode_ref_links(text: &str, defs: &HashMap<String, String>) -> String {
if defs.is_empty() {
return text.to_string();
}
transform_outside_code_fences(text, |line| {
let mut result = line.to_string();
result = BADGE_FULL_REF_LINK
.replace_all(&result, |caps: ®ex::Captures| {
let text_part = &caps[1];
let label = caps[2].to_lowercase();
if defs.contains_key(&label) {
let hex = encode_hex_label(&label);
format!("[{text_part}]({REF_LABEL_START}{hex}{REF_LABEL_SEP})")
} else {
caps[0].to_string()
}
})
.into_owned();
result = BADGE_COLLAPSED_REF_LINK
.replace_all(&result, |caps: ®ex::Captures| {
let text_part = &caps[1];
match badge_alt_lowercase(text_part) {
Some(label) if defs.contains_key(&label) => {
let hex = encode_hex_label(&label);
format!("[{text_part}]({REF_LABEL_START}{hex}{REF_LABEL_SEP})")
}
_ => caps[0].to_string(),
}
})
.into_owned();
result = BADGE_SHORTCUT_REF_LINK
.replace_all(&result, |caps: ®ex::Captures| {
let text_part = &caps[1];
let trailing = &caps[2];
match badge_alt_lowercase(text_part) {
Some(label) if defs.contains_key(&label) => {
let hex = encode_hex_label(&label);
format!("[{text_part}]({REF_LABEL_START}{hex}{REF_LABEL_SEP}){trailing}")
}
_ => caps[0].to_string(),
}
})
.into_owned();
replace_until_stable(&mut result, &FULL_REF_LINK, |caps: ®ex::Captures| {
let text_part = &caps[1];
let label = caps[2].to_lowercase();
if defs.contains_key(&label) {
let hex = encode_hex_label(&label);
format!("[{text_part}]({REF_LABEL_START}{hex}{REF_LABEL_SEP})")
} else {
caps[0].to_string()
}
});
replace_until_stable(&mut result, &COLLAPSED_REF_LINK, |caps: ®ex::Captures| {
let text_part = &caps[1];
let label = text_part.to_lowercase();
if defs.contains_key(&label) {
let hex = encode_hex_label(&label);
format!("[{text_part}]({REF_LABEL_START}{hex}{REF_LABEL_SEP})")
} else {
caps[0].to_string()
}
});
result = SHORTCUT_REF_LINK
.replace_all(&result, |caps: ®ex::Captures| {
let text_part = &caps[1];
let trailing = &caps[2];
let label = text_part.to_lowercase();
if defs.contains_key(&label) {
let hex = encode_hex_label(&label);
format!("[{text_part}]({REF_LABEL_START}{hex}{REF_LABEL_SEP}){trailing}")
} else {
caps[0].to_string()
}
})
.into_owned();
vec![result]
})
}
fn inline_image_refs(text: &str, defs: &HashMap<String, String>) -> String {
if defs.is_empty() {
return text.to_string();
}
transform_outside_code_fences(text, |line| {
let mut result = line.to_string();
result = IMAGE_FULL_REF
.replace_all(&result, |caps: ®ex::Captures| {
let alt = &caps[1];
let label = caps[2].to_lowercase();
if let Some(dest) = defs.get(&label) {
format!("")
} else {
caps[0].to_string()
}
})
.into_owned();
result = IMAGE_COLLAPSED_REF
.replace_all(&result, |caps: ®ex::Captures| {
let alt = &caps[1];
let label = alt.to_lowercase();
if let Some(dest) = defs.get(&label) {
format!("")
} else {
caps[0].to_string()
}
})
.into_owned();
result = IMAGE_SHORTCUT_REF
.replace_all(&result, |caps: ®ex::Captures| {
let alt = &caps[1];
let trailing = &caps[2];
let label = alt.to_lowercase();
if let Some(dest) = defs.get(&label) {
format!("{trailing}")
} else {
caps[0].to_string()
}
})
.into_owned();
vec![result]
})
}
fn apply_typography_to_fndef_bodies(text: &str, do_smartquotes: bool, do_ellipses: bool) -> String {
let mut result = String::new();
let mut remaining = text.as_bytes();
let marker = FNDEF_MARKER_START.as_bytes();
let end_marker = b"-->";
while !remaining.is_empty() {
if let Some(pos) = remaining.windows(marker.len()).position(|w| w == marker) {
result.push_str(&String::from_utf8_lossy(&remaining[..pos]));
let after_marker = &remaining[pos..];
if let Some(end_pos) =
after_marker.windows(end_marker.len()).position(|w| w == end_marker)
{
let block_end = end_pos + end_marker.len();
let block = &String::from_utf8_lossy(&after_marker[..block_end]);
if let Some(first_nl) = block.find('\n') {
let header = &block[..=first_nl];
let body_and_close = &block[first_nl + 1..];
if let Some(close_pos) = body_and_close.rfind("-->") {
let body = &body_and_close[..close_pos];
let close = &body_and_close[close_pos..];
let mut transformed = body.to_string();
if do_smartquotes {
transformed = smart_quotes(&transformed);
}
if do_ellipses {
transformed = apply_ellipses(&transformed);
}
result.push_str(header);
result.push_str(&transformed);
result.push_str(close);
} else {
result.push_str(block);
}
} else {
result.push_str(block);
}
remaining = &after_marker[block_end..];
} else {
result.push_str(&String::from_utf8_lossy(after_marker));
break;
}
} else {
result.push_str(&String::from_utf8_lossy(remaining));
break;
}
}
result
}
fn protect_escapes_outside_code(text: &str, escape_set: &[char]) -> String {
transform_outside_code_fences(text, |line| {
let processed = replace_escapes_in_line(line, escape_set);
vec![processed]
})
}
fn replace_escapes_in_line(line: &str, escape_set: &[char]) -> String {
const PUA_FILLER: char = '\u{E100}';
if !line.contains('\\') {
return line.to_string();
}
let mut result = String::with_capacity(line.len());
let mut chars = line.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '\\' {
if let Some(&next) = chars.peek() {
if escape_set.contains(&next) {
chars.next(); let pua = char::from_u32(0xE000 + next as u32).expect("valid PUA");
result.push(pua);
result.push(PUA_FILLER);
continue;
}
}
result.push(ch);
} else {
result.push(ch);
}
}
result
}
fn restore_pua_escape_placeholders(text: &str) -> String {
const PUA_FILLER: char = '\u{E100}';
if !text.contains(|c: char| ('\u{E000}'..='\u{E0FF}').contains(&c)) {
return text.to_string();
}
let mut result = String::with_capacity(text.len());
let mut chars = text.chars().peekable();
while let Some(ch) = chars.next() {
if ('\u{E000}'..='\u{E0FF}').contains(&ch) {
if chars.peek() == Some(&PUA_FILLER) {
chars.next(); let original = char::from_u32(ch as u32 - 0xE000).unwrap_or(ch);
result.push('\\');
result.push(original);
} else {
result.push(ch);
}
} else {
result.push(ch);
}
}
result
}
fn postprocess_period_escapes(text: &str) -> String {
if !text.contains("\\.") {
return text.to_string();
}
transform_outside_code_fences(text, |line| {
let trimmed_start = line.trim_start();
if trimmed_start.starts_with('#') {
return vec![remove_period_escapes_preserving_code(line)];
}
let after_quotes =
trimmed_start.trim_start_matches(|c: char| c == '>' || c.is_whitespace());
let after_list_marker = after_quotes
.strip_prefix("- ")
.or_else(|| after_quotes.strip_prefix("* "))
.or_else(|| after_quotes.strip_prefix("+ "))
.map_or(after_quotes, |rest| {
rest.strip_prefix("[ ] ")
.or_else(|| rest.strip_prefix("[x] "))
.or_else(|| rest.strip_prefix("[X] "))
.unwrap_or(rest)
});
let digit_end = after_list_marker
.find(|c: char| !c.is_ascii_digit())
.unwrap_or(after_list_marker.len());
if digit_end > 0 && after_list_marker[digit_end..].starts_with("\\.") {
vec![line.to_string()]
} else {
vec![remove_period_escapes_preserving_code(line)]
}
})
}
fn remove_period_escapes_preserving_code(line: &str) -> String {
let bytes = line.as_bytes();
let len = bytes.len();
let mut result = String::with_capacity(len);
let mut i = 0;
while i < len {
if bytes[i] == b'`' {
let bt_count = bytes[i..].iter().take_while(|&&b| b == b'`').count();
result.push_str(&line[i..i + bt_count]);
i += bt_count;
while i < len {
if bytes[i] == b'`' {
let close_count = bytes[i..].iter().take_while(|&&b| b == b'`').count();
result.push_str(&line[i..i + close_count]);
i += close_count;
if close_count == bt_count {
break;
}
} else {
let ch = line[i..].chars().next().expect("valid UTF-8");
result.push(ch);
i += ch.len_utf8();
}
}
} else if bytes[i] == b'\\' && i + 1 < len && bytes[i + 1] == b'.' {
result.push('.');
i += 2;
} else {
let ch = line[i..].chars().next().expect("valid UTF-8");
result.push(ch);
i += ch.len_utf8();
}
}
result
}
fn last_content_line<'a>(node: &'a AstNode<'a>) -> usize {
let data = node.data.borrow();
match &data.value {
NodeValue::List(_) | NodeValue::Item(_) | NodeValue::TaskItem(_) => {
drop(data);
if let Some(last_child) = node.children().last() {
last_content_line(last_child)
} else {
let sp = node.data.borrow().sourcepos;
if sp.end.line >= sp.start.line { sp.end.line } else { sp.start.line }
}
}
NodeValue::CodeBlock(cb) if !cb.fenced => {
let sp = data.sourcepos;
let content_lines = cb.literal.trim_end_matches('\n').lines().count();
if content_lines == 0 { sp.start.line } else { sp.start.line + content_lines - 1 }
}
_ => {
let sp = data.sourcepos;
if sp.end.line >= sp.start.line { sp.end.line } else { sp.start.line }
}
}
}
fn is_block_element(node: &AstNode) -> bool {
matches!(
node.data.borrow().value,
NodeValue::Paragraph
| NodeValue::Heading(_)
| NodeValue::List(_)
| NodeValue::BlockQuote
| NodeValue::CodeBlock(_)
| NodeValue::ThematicBreak
| NodeValue::HtmlBlock(_)
| NodeValue::Table(_)
| NodeValue::FootnoteDefinition(_)
| NodeValue::Alert(_)
)
}
fn inline_ends_with_hard_break<'a>(node: &'a AstNode<'a>) -> bool {
let children: Vec<_> = node.children().collect();
if let Some(last_child) = children.last() {
if matches!(last_child.data.borrow().value, NodeValue::LineBreak) {
return true;
}
if let NodeValue::Text(ref text) = last_child.data.borrow().value {
if text.ends_with('\\') {
return true;
}
}
}
false
}
fn is_html_comment_only(node: &AstNode) -> bool {
if let NodeValue::HtmlBlock(html) = &node.data.borrow().value {
let trimmed = html.literal.trim();
trimmed.starts_with("<!--")
&& trimmed.ends_with("-->")
&& !trimmed.contains('\n')
&& !trimmed.contains(FNDEF_MARKER_START)
&& !trimmed.contains(REFDEF_MARKER_PREFIX)
} else {
false
}
}
fn is_multiline_html_comment_block(trimmed: &str) -> bool {
if !trimmed.contains('\n') {
return false;
}
if !(trimmed.starts_with("<!--") && trimmed.ends_with("-->")) {
return false;
}
if trimmed.find("-->") != Some(trimmed.len() - "-->".len()) {
return false;
}
!trimmed.lines().any(|l| l.trim().is_empty())
}
fn is_refdef_marker(node: &AstNode) -> bool {
if let NodeValue::HtmlBlock(html) = &node.data.borrow().value {
html.literal.trim().starts_with(REFDEF_MARKER_PREFIX)
} else {
false
}
}
fn render_block_children<'a>(
node: &'a AstNode<'a>,
line_wrapper: &LineWrapper,
list_spacing: ListSpacing,
prefix: &str,
subsequent_prefix: &str,
in_heading: &mut bool,
options: &Options,
) -> String {
let mut output = String::new();
let mut prev_was_block = false;
let mut prev_ended_with_double_newline = false;
let mut prev_was_hard_break_heading = false;
let mut prev_was_refdef_only = false;
let mut prev_source_end_line: usize = 0;
let mut prev_was_html_comment = false;
let mut prev_was_list_or_table = false;
let mut prev_was_paragraph = false;
let mut prev_was_thematic_break = false;
let mut prev_was_code_block = false;
let mut prev_was_list = false;
for child in node.children() {
let child_is_block = is_block_element(child);
let child_is_refdef_only = is_refdef_marker(child);
let child_is_html_comment = is_html_comment_only(child);
let child_is_list = matches!(child.data.borrow().value, NodeValue::List(_));
let child_is_code_block = matches!(child.data.borrow().value, NodeValue::CodeBlock(_));
let child_is_paragraph = matches!(child.data.borrow().value, NodeValue::Paragraph);
let child_is_thematic_break = matches!(child.data.borrow().value, NodeValue::ThematicBreak);
let child_is_table = matches!(child.data.borrow().value, NodeValue::Table(_));
let child_is_blockquote = matches!(child.data.borrow().value, NodeValue::BlockQuote);
let child_is_hard_break_heading =
matches!(child.data.borrow().value, NodeValue::Heading(_))
&& inline_ends_with_hard_break(child);
let child_source_start = child.data.borrow().sourcepos.start.line;
let child_source_end = last_content_line(child);
let originally_tight =
prev_source_end_line > 0 && child_source_start <= prev_source_end_line + 1;
let suppress_for_tight = if originally_tight {
if prev_was_html_comment {
true
} else if child_is_html_comment {
!prev_was_list_or_table
} else if child_is_list && prev_was_paragraph {
true
} else if child_is_code_block && prev_was_paragraph {
true
} else if prev_was_thematic_break || child_is_thematic_break {
true
} else if child_is_table && prev_was_paragraph {
true
} else if child_is_blockquote && prev_was_paragraph {
true
} else if child_is_paragraph && prev_was_code_block {
true
} else if child_is_list && (prev_was_list || prev_was_code_block) {
true
} else if child_is_blockquote && prev_was_list {
true
} else if child_is_code_block && prev_was_list {
true
} else if child_is_code_block && prev_was_code_block {
true
} else {
false
}
} else {
false
};
let both_refdefs = prev_was_refdef_only && child_is_refdef_only;
let need_separator = child_is_block
&& prev_was_block
&& !prev_ended_with_double_newline
&& !prev_was_hard_break_heading
&& !child_is_hard_break_heading
&& !both_refdefs
&& !suppress_for_tight;
if need_separator {
output.push('\n');
}
let block_output = render_block(
child,
line_wrapper,
list_spacing,
prefix,
subsequent_prefix,
in_heading,
options,
);
prev_ended_with_double_newline = block_output.ends_with("\n\n");
prev_was_hard_break_heading = matches!(child.data.borrow().value, NodeValue::Heading(_))
&& inline_ends_with_hard_break(child);
output.push_str(&block_output);
prev_was_block = child_is_block;
prev_was_refdef_only = child_is_refdef_only;
prev_was_html_comment = child_is_html_comment;
prev_was_list_or_table =
matches!(child.data.borrow().value, NodeValue::List(_) | NodeValue::Table(_));
prev_was_paragraph = matches!(child.data.borrow().value, NodeValue::Paragraph);
prev_was_thematic_break = child_is_thematic_break;
prev_was_code_block = child_is_code_block;
prev_was_list = child_is_list;
prev_source_end_line = child_source_end;
}
output
}
#[allow(clippy::too_many_arguments)]
fn render_block_children_quoted<'a>(
node: &'a AstNode<'a>,
line_wrapper: &LineWrapper,
list_spacing: ListSpacing,
prefix: &str,
subsequent_prefix: &str,
blank_prefix: &str,
in_heading: &mut bool,
options: &Options,
) -> String {
let mut output = String::new();
let mut prev_was_block = false;
let mut prev_ended_with_double_newline = false;
let mut prev_source_end_line: usize = 0;
for child in node.children() {
let child_is_block = is_block_element(child);
let child_is_blockquote = matches!(child.data.borrow().value, NodeValue::BlockQuote);
let child_source_start = child.data.borrow().sourcepos.start.line;
let child_source_end = last_content_line(child);
let originally_tight =
prev_source_end_line > 0 && child_source_start <= prev_source_end_line + 1;
let suppress = child_is_blockquote && originally_tight;
if child_is_block && prev_was_block && !prev_ended_with_double_newline && !suppress {
output.push_str(blank_prefix);
output.push_str(" \n");
}
let block_output = render_block(
child,
line_wrapper,
list_spacing,
prefix,
subsequent_prefix,
in_heading,
options,
);
prev_ended_with_double_newline = block_output.ends_with("\n\n");
output.push_str(&block_output);
prev_was_block = child_is_block;
prev_source_end_line = child_source_end;
}
output
}
fn render_block<'a>(
node: &'a AstNode<'a>,
line_wrapper: &LineWrapper,
list_spacing: ListSpacing,
prefix: &str,
subsequent_prefix: &str,
in_heading: &mut bool,
options: &Options,
) -> String {
let mut output = String::new();
match &node.data.borrow().value {
NodeValue::Document => {
output = render_block_children(
node,
line_wrapper,
list_spacing,
prefix,
subsequent_prefix,
in_heading,
options,
);
}
NodeValue::Paragraph => {
let inline_text = render_inline_children(node, options, *in_heading);
let inline_text = if let Some(tasklist) = get_tasklist_marker(node) {
format!("{tasklist}{inline_text}")
} else {
inline_text
};
let wrapped = line_wrapper(&inline_text, prefix, subsequent_prefix);
output.push_str(&wrapped);
output.push('\n');
}
NodeValue::Heading(heading) => {
*in_heading = true;
let level = heading.level;
let hashes = "#".repeat(level as usize);
let inline_text = render_inline_children(node, options, true);
*in_heading = false;
let ends_with_hard_break =
inline_ends_with_hard_break(node) || inline_text.ends_with('\\');
let _ = writeln!(output, "{prefix}{hashes} {inline_text}");
if !ends_with_hard_break {
output.push('\n');
}
}
NodeValue::List(list) => {
let any_item_is_complex = node.children().any(|item| {
let children: Vec<_> = item.children().collect();
let has_sublist =
children.iter().any(|c| matches!(c.data.borrow().value, NodeValue::List(_)));
let has_code = children
.iter()
.any(|c| matches!(c.data.borrow().value, NodeValue::CodeBlock(_)));
let para_count = children
.iter()
.filter(|c| matches!(c.data.borrow().value, NodeValue::Paragraph))
.count();
has_sublist || has_code || para_count > 1
});
let is_tight = match list_spacing {
ListSpacing::Preserve => list.tight,
ListSpacing::Tight => !any_item_is_complex,
ListSpacing::Loose => false,
};
let is_ordered = matches!(list.list_type, ListType::Ordered);
let start = list.start;
let bullet = list.bullet_char;
for (i, child) in node.children().enumerate() {
let (item_prefix, item_subsequent) = if is_ordered {
let num = start + i;
let p = format!("{num}. ");
let s = " ".repeat(num.to_string().len() + 2);
(format!("{prefix}{p}"), format!("{subsequent_prefix}{s}"))
} else {
let marker = bullet as char;
(format!("{prefix}{marker} "), format!("{subsequent_prefix} "))
};
if !is_tight && i > 0 {
let blank_prefix = subsequent_prefix.trim_end();
if blank_prefix.is_empty() {
output.push('\n');
} else {
output.push_str(blank_prefix);
output.push('\n');
}
}
render_list_item(
child,
&mut output,
line_wrapper,
list_spacing,
&item_prefix,
&item_subsequent,
in_heading,
options,
);
}
}
NodeValue::BlockQuote => {
let q_prefix = format!("{prefix}> ");
let q_subsequent = format!("{subsequent_prefix}> ");
let inner = render_block_children_quoted(
node,
line_wrapper,
list_spacing,
&q_prefix,
&q_subsequent,
&format!("{subsequent_prefix}>"),
in_heading,
options,
);
output.push_str(inner.trim_end_matches('\n'));
output.push('\n');
}
NodeValue::CodeBlock(code_block) => {
let info = &code_block.info;
let literal = &code_block.literal;
let code_content = literal.trim_end_matches('\n');
let fence_char = if code_block.fenced {
if code_block.fence_char == b'~' { '~' } else { '`' }
} else {
'`'
};
let fence_len = min_fence_length(code_content, fence_char).max(if code_block.fenced {
code_block.fence_length
} else {
3
});
let fence: String = std::iter::repeat_n(fence_char, fence_len).collect();
let _ = writeln!(output, "{prefix}{fence}{info}");
if !code_content.is_empty() {
let empty_prefix = subsequent_prefix.trim_end();
for line in code_content.split('\n') {
if line.is_empty() {
output.push_str(empty_prefix);
output.push('\n');
} else {
let _ = writeln!(output, "{subsequent_prefix}{line}");
}
}
}
let _ = writeln!(output, "{subsequent_prefix}{fence}");
}
NodeValue::ThematicBreak => {
let _ = writeln!(output, "{prefix}* * *");
}
NodeValue::HtmlBlock(html) => {
let literal = &html.literal;
let trimmed = literal.trim();
if let Some(rest) = trimmed.strip_prefix(REFDEF_MARKER_PREFIX) {
if let Some(def_text) = rest.strip_suffix("-->") {
let def_text = def_text.trim();
let lowered = lowercase_refdef_label(def_text);
let _ = writeln!(output, "{prefix}{lowered}");
return output;
}
}
if trimmed.starts_with(FNDEF_MARKER_START) {
if let Some(first_nl) = literal.find('\n') {
let rest = &literal[first_nl + 1..];
if let Some(end_pos) = rest.rfind("-->") {
let fn_text = rest[..end_pos].trim_end();
if let Some(caps) = FOOTNOTE_DEF_START.captures(fn_text) {
let label = caps[1].to_string();
let match_end = caps.get(0).map_or(0, |m| m.end());
let label_prefix = format!("[^{label}]: ");
let fn_prefix = format!("{prefix}{label_prefix}");
let fn_subsequent = format!("{prefix} ");
let mut body_lines: Vec<&str> = Vec::new();
for (li, line) in fn_text.lines().enumerate() {
if li == 0 {
body_lines.push(&line[match_end..]);
} else {
let stripped = line
.strip_prefix(" ")
.or_else(|| line.strip_prefix('\t'))
.unwrap_or(line);
body_lines.push(stripped);
}
}
let has_blank_lines =
body_lines.iter().skip(1).any(|l| l.trim().is_empty());
if has_blank_lines {
let mut paragraphs: Vec<Vec<&str>> = vec![Vec::new()];
for line in &body_lines {
if line.trim().is_empty() {
if !paragraphs
.last()
.expect("paragraphs is non-empty")
.is_empty()
{
paragraphs.push(Vec::new());
}
} else {
paragraphs
.last_mut()
.expect("paragraphs is non-empty")
.push(line);
}
}
if paragraphs.last().is_some_and(Vec::is_empty) {
paragraphs.pop();
}
for (pi, para) in paragraphs.iter().enumerate() {
let is_blockquote = para.iter().all(|l| l.starts_with('>'));
if is_blockquote {
let bq_body: Vec<&str> = para
.iter()
.map(|l| {
l.strip_prefix("> ")
.unwrap_or(l.strip_prefix('>').unwrap_or(l))
})
.collect();
let joined = bq_body.join(" ");
let bq_prefix = if pi == 0 {
format!("{fn_prefix}> ")
} else {
format!("{fn_subsequent}> ")
};
let bq_subsequent = format!("{fn_subsequent}> ");
let wrapped =
line_wrapper(joined.trim(), &bq_prefix, &bq_subsequent);
output.push_str(&wrapped);
} else {
let joined = para.join(" ");
let (p, sp) = if pi == 0 {
(fn_prefix.clone(), fn_subsequent.clone())
} else {
(fn_subsequent.clone(), fn_subsequent.clone())
};
let wrapped = line_wrapper(joined.trim(), &p, &sp);
output.push_str(&wrapped);
}
output.push_str("\n\n");
}
} else {
let blockquote_start_idx =
body_lines.iter().skip(1).position(|l| l.starts_with('>'));
let list_start_idx = body_lines.iter().skip(1).position(|l| {
l.starts_with("- ")
|| l.starts_with("* ")
|| l.starts_with("+ ")
});
if let Some(bq_idx) = blockquote_start_idx {
let bq_idx = bq_idx + 1; let preamble = body_lines[..bq_idx].join(" ");
let wrapped =
line_wrapper(preamble.trim(), &fn_prefix, &fn_subsequent);
output.push_str(&wrapped);
output.push('\n');
for line in &body_lines[bq_idx..] {
let bq_body = line
.strip_prefix("> ")
.unwrap_or(line.strip_prefix('>').unwrap_or(line));
let bq_prefix = format!("{fn_subsequent}> ");
let bq_subsequent = format!("{fn_subsequent}> ");
let wrapped = line_wrapper(
bq_body.trim(),
&bq_prefix,
&bq_subsequent,
);
output.push_str(&wrapped);
}
output.push_str("\n\n");
} else if let Some(idx) = list_start_idx {
let idx = idx + 1; let preamble = body_lines[..idx].join(" ");
let wrapped =
line_wrapper(preamble.trim(), &fn_prefix, &fn_subsequent);
output.push_str(&wrapped);
if list_spacing == ListSpacing::Loose {
output.push_str("\n\n");
} else {
output.push('\n');
}
let mut current_marker = "";
let mut current_text = String::new();
for line in &body_lines[idx..] {
let is_item_start = line.starts_with("- ")
|| line.starts_with("* ")
|| line.starts_with("+ ");
if is_item_start {
if !current_text.is_empty() {
let list_prefix =
format!("{fn_subsequent}{current_marker}");
let list_subsequent = format!("{fn_subsequent} ");
let wrapped = line_wrapper(
current_text.trim(),
&list_prefix,
&list_subsequent,
);
output.push_str(&wrapped);
output.push('\n');
}
current_marker = &line[..2];
current_text = line[2..].to_string();
} else {
current_text.push(' ');
current_text.push_str(line);
}
}
if !current_text.is_empty() {
let list_prefix =
format!("{fn_subsequent}{current_marker}");
let list_subsequent = format!("{fn_subsequent} ");
let wrapped = line_wrapper(
current_text.trim(),
&list_prefix,
&list_subsequent,
);
output.push_str(&wrapped);
}
output.push_str("\n\n");
} else {
let body = body_lines.join(" ");
let wrapped =
line_wrapper(body.trim(), &fn_prefix, &fn_subsequent);
output.push_str(&wrapped);
output.push_str("\n\n");
}
}
} else {
for line in fn_text.lines() {
let _ = writeln!(output, "{prefix}{line}");
}
}
return output;
}
}
}
if is_multiline_html_comment_block(trimmed) {
for (i, line) in trimmed.split('\n').enumerate() {
let p = if i == 0 { prefix } else { subsequent_prefix };
let _ = writeln!(output, "{p}{}", line.trim());
}
return output;
}
let has_text_content = !trimmed.is_empty()
&& trimmed.contains(|c: char| c.is_alphabetic())
&& trimmed.chars().filter(|&c| c == '<').count() > 0;
if has_text_content && trimmed.len() > 40 {
let mut paragraphs: Vec<String> = Vec::new();
let mut cur: Vec<&str> = Vec::new();
for line in literal.lines() {
if line.trim().is_empty() {
if !cur.is_empty() {
paragraphs
.push(cur.iter().map(|s| s.trim()).collect::<Vec<_>>().join(" "));
cur.clear();
}
} else {
cur.push(line);
}
}
if !cur.is_empty() {
paragraphs.push(cur.iter().map(|s| s.trim()).collect::<Vec<_>>().join(" "));
}
for (pi, para) in paragraphs.iter().enumerate() {
if pi > 0 {
output.push('\n');
}
let (p, sp) = if pi == 0 {
(prefix, subsequent_prefix)
} else {
(subsequent_prefix, subsequent_prefix)
};
output.push_str(&line_wrapper(para.trim(), p, sp));
output.push('\n');
}
} else {
output.push_str(prefix);
output.push_str(literal);
if !literal.ends_with('\n') {
output.push('\n');
}
}
}
NodeValue::Table(_) => {
let children: Vec<_> = node.children().collect();
if children.is_empty() {
return output;
}
let header = &children[0];
output.push_str(&render_table_row(header, options));
let alignments = get_table_alignments(node);
let delimiters: Vec<String> = alignments
.iter()
.map(|a| match a {
TableAlignment::None => "---".to_string(),
TableAlignment::Left => ":---".to_string(),
TableAlignment::Center => ":---:".to_string(),
TableAlignment::Right => "---:".to_string(),
})
.collect();
let _ = writeln!(output, "| {} |", delimiters.join(" | "));
for child in children.iter().skip(1) {
output.push_str(&render_table_row(child, options));
}
}
NodeValue::TableRow(_) | NodeValue::TableCell => {
}
NodeValue::FootnoteDefinition(footnote) => {
let label = &footnote.name;
let label_prefix = format!("[^{label}]: ");
let fn_prefix = format!("{prefix}{label_prefix}");
let fn_subsequent = format!("{subsequent_prefix} ");
let mut first_child = true;
for child in node.children() {
if !first_child && list_spacing == ListSpacing::Loose {
output.push('\n');
}
let (p, sp) = if first_child {
(fn_prefix.clone(), fn_subsequent.clone())
} else {
(fn_subsequent.clone(), fn_subsequent.clone())
};
let child_output =
render_block(child, line_wrapper, list_spacing, &p, &sp, in_heading, options);
output.push_str(&child_output);
first_child = false;
}
if !output.ends_with("\n\n") {
if output.ends_with('\n') {
output.push('\n');
} else {
output.push_str("\n\n");
}
}
}
NodeValue::Alert(alert) => {
let alert_type = format!("{:?}", alert.alert_type).to_uppercase();
let _ = writeln!(output, "> [!{alert_type}]");
let q_prefix = format!("{prefix}> ");
let q_subsequent = format!("{subsequent_prefix}> ");
let inner = render_block_children_quoted(
node,
line_wrapper,
list_spacing,
&q_prefix,
&q_subsequent,
&format!("{subsequent_prefix}>"),
in_heading,
options,
);
output.push_str(inner.trim_end_matches('\n'));
output.push('\n');
}
_ => {
for child in node.children() {
output.push_str(&render_block(
child,
line_wrapper,
list_spacing,
prefix,
subsequent_prefix,
in_heading,
options,
));
}
}
}
output
}
fn item_needs_child_spacing<'a>(node: &'a AstNode<'a>, list_spacing: ListSpacing) -> bool {
let children: Vec<_> = node.children().collect();
if children.len() <= 1 {
return false;
}
match list_spacing {
ListSpacing::Loose => true,
ListSpacing::Preserve => {
let mut prev_end: usize = 0;
for c in &children {
let start = c.data.borrow().sourcepos.start.line;
if prev_end > 0 && start > prev_end + 1 {
return true;
}
prev_end = last_content_line(c);
}
false
}
ListSpacing::Tight => {
let has_code =
children.iter().any(|c| matches!(c.data.borrow().value, NodeValue::CodeBlock(_)));
if has_code {
return true;
}
let has_effectively_loose_sublist = children.iter().any(|c| {
if let NodeValue::List(sub_list) = &c.data.borrow().value {
if !sub_list.tight {
return true;
}
c.children().any(|item| {
let ch: Vec<_> = item.children().collect();
let has_sub = ch
.iter()
.any(|gc| matches!(gc.data.borrow().value, NodeValue::List(_)));
let has_code = ch
.iter()
.any(|gc| matches!(gc.data.borrow().value, NodeValue::CodeBlock(_)));
let paras = ch
.iter()
.filter(|gc| matches!(gc.data.borrow().value, NodeValue::Paragraph))
.count();
has_sub || has_code || paras > 1
})
} else {
false
}
});
if has_effectively_loose_sublist {
return true;
}
let mut prev_end: usize = 0;
for c in &children {
let start = c.data.borrow().sourcepos.start.line;
if prev_end > 0 && start > prev_end + 1 {
return true;
}
prev_end = last_content_line(c);
}
false
}
}
}
#[allow(clippy::too_many_arguments)]
fn render_list_item<'a>(
node: &'a AstNode<'a>,
output: &mut String,
line_wrapper: &LineWrapper,
list_spacing: ListSpacing,
item_prefix: &str,
item_subsequent: &str,
in_heading: &mut bool,
options: &Options,
) {
let mut first_child = true;
let children: Vec<_> = node.children().collect();
let parent_is_tight = node.parent().is_some_and(|parent| {
let data = parent.data.borrow();
if let NodeValue::List(list) = &data.value {
match list_spacing {
ListSpacing::Preserve => list.tight,
ListSpacing::Tight => {
let any_complex = parent.children().any(|item| {
let ch: Vec<_> = item.children().collect();
let has_sub =
ch.iter().any(|c| matches!(c.data.borrow().value, NodeValue::List(_)));
let has_code = ch
.iter()
.any(|c| matches!(c.data.borrow().value, NodeValue::CodeBlock(_)));
let paras = ch
.iter()
.filter(|c| matches!(c.data.borrow().value, NodeValue::Paragraph))
.count();
has_sub || has_code || paras > 1
});
!any_complex
}
ListSpacing::Loose => false,
}
} else {
false
}
});
let needs_spacing = item_needs_child_spacing(node, list_spacing);
for (i, child) in children.iter().enumerate() {
let (p, sp) = if first_child {
(item_prefix.to_string(), item_subsequent.to_string())
} else {
(item_subsequent.to_string(), item_subsequent.to_string())
};
if !first_child && needs_spacing {
let prev_ended_double = if i > 0 {
matches!(children[i - 1].data.borrow().value, NodeValue::Heading(_))
} else {
false
};
let current_is_hard_break_heading =
matches!(&child.data.borrow().value, NodeValue::Heading(_))
&& inline_ends_with_hard_break(child);
let current_is_tag_block =
if let NodeValue::HtmlBlock(html) = &child.data.borrow().value {
let trimmed = html.literal.trim();
!trimmed.contains('\n')
&& ((trimmed.starts_with("<!--") && trimmed.ends_with("-->"))
|| (trimmed.starts_with("{%") && trimmed.ends_with("%}"))
|| (trimmed.starts_with("{#") && trimmed.ends_with("#}"))
|| (trimmed.starts_with("{{") && trimmed.ends_with("}}")))
} else {
false
};
let suppress_nested_blank = if !parent_is_tight && i > 0 {
let child_value = &child.data.borrow().value;
let should_check = matches!(child_value, NodeValue::CodeBlock(_))
|| (matches!(child_value, NodeValue::List(_))
&& list_spacing == ListSpacing::Preserve);
if should_check {
let prev_end = children[i - 1].data.borrow().sourcepos.end.line;
let curr_start = child.data.borrow().sourcepos.start.line;
curr_start <= prev_end + 1
} else {
false
}
} else {
false
};
if !prev_ended_double
&& !current_is_hard_break_heading
&& !current_is_tag_block
&& !suppress_nested_blank
{
if item_subsequent.trim().is_empty() {
output.push('\n');
} else {
output.push_str(item_subsequent);
output.push('\n');
}
}
}
let child_output =
render_block(child, line_wrapper, list_spacing, &p, &sp, in_heading, options);
output.push_str(&child_output);
first_child = false;
}
}
fn render_inline_children<'a>(
node: &'a AstNode<'a>,
options: &Options,
in_heading: bool,
) -> String {
let mut output = String::new();
for child in node.children() {
output.push_str(&render_inline(child, options, in_heading));
}
output
}
fn is_autolink(node: &AstNode, link: &comrak::nodes::NodeLink) -> bool {
let Some(first_child) = node.first_child() else {
return false;
};
if first_child.next_sibling().is_some() {
return false;
}
let text = match &first_child.data.borrow().value {
NodeValue::Text(t) => t.clone(),
_ => return false,
};
let url = &link.url;
let has_scheme = url.contains("://") || url.starts_with("mailto:");
let is_email = !url.contains("://") && url.contains('@');
if !has_scheme && !is_email {
return false;
}
text == *url || url.strip_prefix("mailto:").is_some_and(|stripped| text == stripped)
}
fn render_inline<'a>(node: &'a AstNode<'a>, options: &Options, in_heading: bool) -> String {
match &node.data.borrow().value {
NodeValue::Text(text) => text.to_string(),
NodeValue::Code(code) => {
let text = &code.literal;
let decoded = restore_pua_escape_placeholders(text);
if code.num_backticks >= 2 && (decoded.starts_with('`') || decoded.ends_with('`')) {
format!("`` {text} ``")
} else {
format!("`{text}`")
}
}
NodeValue::Emph => {
let inner = render_inline_children(node, options, in_heading);
format!("*{inner}*")
}
NodeValue::Strong => {
let inner = render_inline_children(node, options, in_heading);
format!("**{inner}**")
}
NodeValue::Strikethrough => {
let inner = render_inline_children(node, options, in_heading);
format!("~~{inner}~~")
}
NodeValue::Link(link) => {
let inner = render_inline_children(node, options, in_heading);
if link.url.starts_with(REF_LABEL_START) {
if let Some(sep_pos) = link.url.find(REF_LABEL_SEP) {
let hex = &link.url[REF_LABEL_START.len_utf8()..sep_pos];
let decoded = decode_hex_label(hex);
let label = decoded.as_deref().unwrap_or(hex);
if inner.as_str() == label {
format!("[{inner}][]")
} else {
format!("[{inner}][{label}]")
}
} else {
let url = &link.url[REF_LABEL_START.len_utf8()..];
let title = if link.title.is_empty() {
String::new()
} else {
format!(" \"{}\"", link.title.replace('"', "\\\""))
};
format!("[{inner}]({url}{title})")
}
} else if link.title.is_empty() && is_autolink(node, link) {
inner.clone()
} else {
let title = if link.title.is_empty() {
String::new()
} else {
format!(" \"{}\"", link.title.replace('"', "\\\""))
};
format!("[{inner}]({}{})", link.url, title)
}
}
NodeValue::Image(image) => {
let inner = render_inline_children(node, options, in_heading);
let title = if image.title.is_empty() {
String::new()
} else {
format!(" \"{}\"", image.title.replace('"', "\\\""))
};
format!("", image.url, title)
}
NodeValue::HtmlInline(html) => html.clone(),
NodeValue::SoftBreak => "\n".to_string(),
NodeValue::LineBreak => "\\\n".to_string(),
NodeValue::Escaped => {
let inner = render_inline_children(node, options, in_heading);
format!("\\{inner}")
}
NodeValue::FootnoteReference(fr) => {
format!("[^{}]", fr.name)
}
NodeValue::Math(math) => {
if math.display_math {
format!("$${}$$", math.literal)
} else {
format!("${}$", math.literal)
}
}
NodeValue::WikiLink(wl) => {
format!("[[{}]]", wl.url)
}
_ => {
render_inline_children(node, options, in_heading)
}
}
}
fn get_tasklist_marker<'a>(para_node: &'a AstNode<'a>) -> Option<String> {
if let Some(parent) = para_node.parent() {
if let NodeValue::TaskItem(checked) = &parent.data.borrow().value {
let marker = if checked.symbol.is_some() { "[x] " } else { "[ ] " };
if parent.children().next().is_some_and(|c| std::ptr::eq(c, para_node)) {
return Some(marker.to_string());
}
}
}
None
}
fn get_table_alignments<'a>(table_node: &'a AstNode<'a>) -> Vec<TableAlignment> {
if let NodeValue::Table(table) = &table_node.data.borrow().value {
table.alignments.clone()
} else {
vec![]
}
}
fn render_table_row<'a>(row_node: &'a AstNode<'a>, options: &Options) -> String {
let cells: Vec<String> = row_node
.children()
.map(|cell| {
let content = render_inline_children(cell, options, false);
content.replace('|', "\\|")
})
.collect();
format!("| {} |\n", cells.join(" | "))
}
static BACKTICK_FENCE_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?m)^[ ]{0,3}(`{3,})").expect("valid backtick fence regex"));
static TILDE_FENCE_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?m)^[ ]{0,3}(~{3,})").expect("valid tilde fence regex"));
fn min_fence_length(code_content: &str, fence_char: char) -> usize {
let re = match fence_char {
'`' => &*BACKTICK_FENCE_RE,
'~' => &*TILDE_FENCE_RE,
_ => return 3,
};
let max_len = re
.captures_iter(code_content)
.map(|caps| caps.get(1).expect("capture group 1 always exists").as_str().len())
.max()
.unwrap_or(0);
std::cmp::max(3, max_len + 1)
}
#[allow(clippy::too_many_arguments, clippy::fn_params_excessive_bools)]
pub fn fill_markdown(
markdown_text: &str,
dedent_input: bool,
width: usize,
semantic: bool,
cleanups: bool,
smartquotes: bool,
ellipses: bool,
line_wrapper: Option<LineWrapper>,
list_spacing: ListSpacing,
) -> String {
const ESCAPE_CHARS: &[char] = &[
'\\', '~', '*', '#', '-', '+', '>', '.', '!', '[', ']', '(', ')', '{', '}', '$', '_', '|',
'`', '"', '%', '&', '\'', ',', '/', ':', ';', '<', '=', '?', '@', '^',
];
let line_wrapper = line_wrapper.unwrap_or_else(|| {
if semantic {
line_wrap_by_sentence(width, DEFAULT_MIN_LINE_LEN, true)
} else {
line_wrap_to_width(width, true)
}
});
let perf_enabled = PERF_STATS_ENABLED.load(Ordering::Relaxed);
let mut perf_sample = FillPerfSample::default();
let (frontmatter, content) = split_frontmatter(markdown_text);
let mut text = if frontmatter.is_empty() { markdown_text.to_string() } else { content };
if dedent_input {
text = dedent(&text);
}
text = text.trim().to_string();
text.push('\n');
let preprocess_start = perf_enabled.then(Instant::now);
text = preprocess_tag_block_spacing(&text);
let (ref_defs, text_without_defs) = extract_link_ref_defs(&text);
text = inline_image_refs(&text_without_defs, &ref_defs);
text = encode_ref_links(&text, &ref_defs);
text = extract_footnote_defs(&text);
if smartquotes || ellipses {
text = apply_typography_to_fndef_bodies(&text, smartquotes, ellipses);
}
text = protect_autolinks(&text);
text = protect_html_entities(&text);
text = protect_escapes_outside_code(&text, ESCAPE_CHARS);
if let Some(start) = preprocess_start {
perf_sample.preprocess = start.elapsed();
}
let parse_start = perf_enabled.then(Instant::now);
let arena = Arena::new();
let options = flowmark_comrak_options();
let root = comrak::parse_document(&arena, &text, &options);
if let Some(start) = parse_start {
perf_sample.parse = start.elapsed();
}
let transforms_start = perf_enabled.then(Instant::now);
if cleanups {
doc_cleanups(root);
}
if smartquotes {
apply_smart_quotes_to_ast(root);
}
if ellipses {
apply_ellipses_to_ast(root);
}
if let Some(start) = transforms_start {
perf_sample.transforms = start.elapsed();
}
let render_start = perf_enabled.then(Instant::now);
let mut in_heading = false;
let result = render_block(root, &line_wrapper, list_spacing, "", "", &mut in_heading, &options);
if let Some(start) = render_start {
perf_sample.render = start.elapsed();
}
let postprocess_start = perf_enabled.then(Instant::now);
let result = restore_pua_escape_placeholders(&result);
let result = postprocess_period_escapes(&result);
let result = restore_html_entities(&result);
let result = restore_autolinks(&result);
let result = normalize_comrak_output(&result);
let result = if result.is_empty() { "\n".to_string() } else { result };
let result = if frontmatter.is_empty() { result } else { format!("{frontmatter}{result}") };
if let Some(start) = postprocess_start {
perf_sample.postprocess = start.elapsed();
}
if perf_enabled {
record_fill_perf_sample(perf_sample);
}
result
}
fn apply_smart_quotes_to_ast<'a>(root: &'a AstNode<'a>) {
for node in root.descendants() {
let is_para = matches!(
node.data.borrow().value,
NodeValue::Paragraph | NodeValue::Heading(_) | NodeValue::TableCell
);
if is_para {
apply_smart_quotes_to_inline_tree(node);
}
}
}
#[allow(clippy::items_after_statements)]
fn apply_smart_quotes_to_inline_tree<'a>(node: &'a AstNode<'a>) {
let mut text_nodes: Vec<&'a AstNode<'a>> = Vec::new();
let mut concatenated = String::new();
let mut char_boundaries: Vec<(usize, usize)> = Vec::new();
fn collect_text_nodes<'a>(
node: &'a AstNode<'a>,
text_nodes: &mut Vec<&'a AstNode<'a>>,
concatenated: &mut String,
char_boundaries: &mut Vec<(usize, usize)>,
) {
for child in node.children() {
let data = child.data.borrow();
match &data.value {
NodeValue::Text(text) => {
let start = concatenated.chars().count();
let len = text.chars().count();
concatenated.push_str(text);
char_boundaries.push((start, len));
text_nodes.push(child);
}
NodeValue::Code(code) => {
let last_char = code.literal.chars().last().unwrap_or(' ');
concatenated.push(if last_char.is_alphanumeric() || last_char == '_' {
last_char
} else {
' '
});
}
NodeValue::HtmlInline(_) => {
concatenated.push(' ');
}
NodeValue::SoftBreak => {
concatenated.push('\n');
}
_ => {
drop(data);
collect_text_nodes(child, text_nodes, concatenated, char_boundaries);
}
}
}
}
collect_text_nodes(node, &mut text_nodes, &mut concatenated, &mut char_boundaries);
if text_nodes.is_empty() {
return;
}
let converted = smart_quotes(&concatenated);
let converted_chars: Vec<char> = converted.chars().collect();
for (i, text_node) in text_nodes.iter().enumerate() {
let (start, len) = char_boundaries[i];
if start + len <= converted_chars.len() {
let new_text: String = converted_chars[start..start + len].iter().collect();
let mut data = text_node.data.borrow_mut();
if let NodeValue::Text(ref mut text) = data.value {
*text = new_text.into();
}
}
}
}
fn apply_ellipses_to_ast<'a>(root: &'a AstNode<'a>) {
for node in root.descendants() {
let mut data = node.data.borrow_mut();
if let NodeValue::Text(ref mut text) = data.value {
*text = apply_ellipses(text).into();
}
}
}
fn dedent(text: &str) -> String {
let lines: Vec<&str> = text.lines().collect();
if lines.is_empty() {
return text.to_string();
}
let min_indent = lines
.iter()
.filter(|l| !l.trim().is_empty())
.map(|l| l.len() - l.trim_start().len())
.min()
.unwrap_or(0);
if min_indent == 0 {
return text.to_string();
}
lines
.iter()
.map(|l| if l.len() >= min_indent { &l[min_indent..] } else { l })
.collect::<Vec<_>>()
.join("\n")
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn extract_ref_defs_basic() {
let input = "Hello\n\n[foo]: https://example.com\n\nWorld\n";
let (defs, output) = extract_link_ref_defs(input);
assert_eq!(defs.get("foo").map(String::as_str), Some("https://example.com"));
assert!(output.contains(REFDEF_MARKER_PREFIX));
assert!(output.contains("https://example.com"));
assert!(output.contains("World"));
}
#[test]
fn extract_ref_defs_case_insensitive() {
let input = "[Foo]: https://example.com\n";
let (defs, _) = extract_link_ref_defs(input);
assert!(defs.contains_key("foo"));
assert!(!defs.contains_key("Foo"));
}
#[test]
fn extract_ref_defs_inside_code_fence_ignored() {
let input = "```\n[foo]: https://example.com\n```\n";
let (defs, output) = extract_link_ref_defs(input);
assert!(defs.is_empty());
assert!(output.contains("[foo]:"));
assert!(!output.contains(REFDEF_MARKER_PREFIX));
}
#[test]
fn extract_ref_defs_with_title() {
let input = "[bar]: https://example.com \"A title\"\n";
let (defs, output) = extract_link_ref_defs(input);
assert_eq!(defs.get("bar").map(String::as_str), Some("https://example.com \"A title\""));
assert!(output.contains(REFDEF_MARKER_PREFIX));
}
#[test]
fn extract_ref_defs_multiple() {
let input = "[a]: https://a.com\n[b]: https://b.com\n";
let (defs, _) = extract_link_ref_defs(input);
assert_eq!(defs.len(), 2);
assert!(defs.contains_key("a"));
assert!(defs.contains_key("b"));
}
#[test]
fn extract_ref_defs_skips_footnote_definitions() {
let input = "[normal]: https://example.com\n[^note]: https://another.com\n";
let (defs, output) = extract_link_ref_defs(input);
assert!(defs.contains_key("normal"), "Normal ref def should be extracted");
assert!(
!defs.contains_key("^note") && !defs.contains_key("note"),
"Footnote label should NOT be in ref def map"
);
assert!(output.contains(REFDEF_MARKER_PREFIX));
assert!(
output.contains("[^note]: https://another.com"),
"Footnote def should pass through unchanged, got:\n{output}"
);
}
#[test]
fn extract_footnote_basic() {
let input = "Text.\n\n[^note]: Footnote content.\n\nMore text.\n";
let output = extract_footnote_defs(input);
assert!(output.contains(FNDEF_MARKER_START));
assert!(output.contains("Footnote content."));
assert!(output.contains("More text."));
assert!(output.contains("-->"));
}
#[test]
fn extract_footnote_multiline() {
let input = "[^long]: First line.\n Continuation line.\n\nAfter.\n";
let output = extract_footnote_defs(input);
assert!(output.contains(FNDEF_MARKER_START));
assert!(output.contains("First line."));
assert!(output.contains("Continuation line."));
}
#[test]
fn extract_footnote_consecutive_blank_line_preserved() {
let input = "[^1]: First.\n\n[^2]: Second.\n";
let output = extract_footnote_defs(input);
let marker_count = output.matches(FNDEF_MARKER_START).count();
assert_eq!(marker_count, 2, "Should have two FNDEF markers, got:\n{output}");
assert!(
output.contains("-->\n\n"),
"Blank line between defs should be preserved, got:\n{output}"
);
}
#[test]
fn extract_footnote_with_autolink_blank_line_preserved() {
use crate::config::ListSpacing;
let input = "[^2]: <https://example.com/path>\n\n[^3]: <https://example.com/other>\n";
let extracted = extract_footnote_defs(input);
let marker_count = extracted.matches(FNDEF_MARKER_START).count();
assert_eq!(marker_count, 2, "Should have two FNDEF markers, got:\n{extracted}");
assert!(
extracted.contains("-->\n\n"),
"Blank line between defs should be preserved after extraction, got:\n{extracted}"
);
let protected = protect_autolinks(&extracted);
assert!(
protected.contains("-->\n\n"),
"Blank line between defs should be preserved after autolink protection, got:\n{protected}"
);
let result =
fill_markdown(input, true, 88, false, false, false, false, None, ListSpacing::Preserve);
assert!(
result.contains("\n\n[^3]:"),
"Full pipeline should preserve blank line between footnote defs with autolinks, got:\n{result}"
);
}
#[test]
fn extract_footnote_inside_code_fence_ignored() {
let input = "```\n[^note]: Not a footnote.\n```\n";
let output = extract_footnote_defs(input);
assert!(!output.contains(FNDEF_MARKER_START));
assert!(output.contains("[^note]:"));
}
fn refdef_map(pairs: &[(&str, &str)]) -> HashMap<String, String> {
pairs.iter().map(|(k, v)| ((*k).to_string(), (*v).to_string())).collect()
}
#[test]
fn encode_full_ref_link() {
let defs = refdef_map(&[("foo", "https://example.com")]);
let input = "See [click here][foo] for details.\n";
let output = encode_ref_links(input, &defs);
assert!(output.contains(REF_LABEL_START));
assert!(output.contains(REF_LABEL_SEP));
assert!(!output.contains("[foo]"));
}
#[test]
fn encode_collapsed_ref_link() {
let defs = refdef_map(&[("example", "https://example.com")]);
let input = "See [Example][] for details.\n";
let output = encode_ref_links(input, &defs);
assert!(output.contains(REF_LABEL_START));
}
#[test]
fn encode_shortcut_ref_link() {
let defs = refdef_map(&[("foo", "https://example.com")]);
let input = "See [foo] for details.\n";
let output = encode_ref_links(input, &defs);
assert!(output.contains(REF_LABEL_START), "shortcut ref should be encoded");
assert!(output.contains(") for details."));
}
#[test]
fn encode_shortcut_ref_lowercases_label() {
let defs = refdef_map(&[("foo", "https://example.com")]);
let input = "See [Foo] here.\n";
let output = encode_ref_links(input, &defs);
let expected_payload = encode_hex_label("foo");
assert_eq!(expected_payload, "666f6f");
assert!(
output.contains(&format!("{REF_LABEL_START}{expected_payload}{REF_LABEL_SEP}")),
"expected hex-encoded label in output: {output:?}"
);
}
#[test]
fn hex_label_round_trip_handles_spaces_and_punctuation() {
for label in ["foo", "st. john's school", "an example", "café"] {
let hex = encode_hex_label(label);
assert!(
hex.bytes().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f')),
"hex payload must be URL-safe: {hex:?}"
);
assert_eq!(decode_hex_label(&hex).as_deref(), Some(label));
}
}
#[test]
fn decode_hex_label_rejects_invalid() {
assert!(decode_hex_label("").is_none());
assert!(decode_hex_label("abc").is_none());
assert!(decode_hex_label("xy").is_none());
}
#[test]
fn encode_unknown_label_unchanged() {
let defs = refdef_map(&[("known", "https://example.com")]);
let input = "See [text][unknown] for details.\n";
let output = encode_ref_links(input, &defs);
assert_eq!(input, output);
}
#[test]
fn encode_empty_labels_passthrough() {
let defs: HashMap<String, String> = HashMap::new();
let input = "See [text][foo] for details.\n";
let output = encode_ref_links(input, &defs);
assert_eq!(input, output);
}
#[test]
fn encode_inside_code_fence_unchanged() {
let defs = refdef_map(&[("foo", "https://example.com")]);
let input = "```\n[text][foo]\n```\n";
let output = encode_ref_links(input, &defs);
assert!(output.contains("[text][foo]"));
}
#[test]
fn inline_image_full_ref() {
let defs = refdef_map(&[("img", "https://example.com/img.png")]);
let input = "![alt][img]\n";
let output = inline_image_refs(input, &defs);
assert_eq!(output, "\n");
}
#[test]
fn inline_image_collapsed_ref() {
let defs = refdef_map(&[("alt", "https://example.com/img.png")]);
let input = "![alt][]\n";
let output = inline_image_refs(input, &defs);
assert_eq!(output, "\n");
}
#[test]
fn inline_image_shortcut_ref() {
let defs = refdef_map(&[("alt", "https://example.com/img.png")]);
let input = "![alt]\n";
let output = inline_image_refs(input, &defs);
assert_eq!(output, "\n");
}
#[test]
fn inline_image_with_title() {
let defs = refdef_map(&[("img", "https://example.com/img.png \"My title\"")]);
let input = "![alt][img]\n";
let output = inline_image_refs(input, &defs);
assert_eq!(output, "\n");
}
#[test]
fn inline_image_label_lowercased_lookup() {
let defs = refdef_map(&[("img", "https://example.com/img.png")]);
let input = "![Alt][IMG]\n";
let output = inline_image_refs(input, &defs);
assert_eq!(output, "\n");
}
#[test]
fn inline_image_no_def_unchanged() {
let defs: HashMap<String, String> = HashMap::new();
let input = "![alt][missing]\n";
let output = inline_image_refs(input, &defs);
assert_eq!(output, input);
}
#[test]
fn inline_image_inside_code_fence_unchanged() {
let defs = refdef_map(&[("img", "https://example.com/img.png")]);
let input = "```\n![alt][img]\n```\n";
let output = inline_image_refs(input, &defs);
assert!(output.contains("![alt][img]"));
}
#[test]
fn lowercase_refdef_label_basic() {
assert_eq!(
lowercase_refdef_label("[Foo]: https://example.com"),
"[foo]: https://example.com"
);
}
#[test]
fn lowercase_refdef_label_preserves_title() {
assert_eq!(
lowercase_refdef_label("[Foo Bar]: https://example.com \"A Title\""),
"[foo bar]: https://example.com \"A Title\""
);
}
#[test]
fn lowercase_refdef_label_passthrough_when_no_match() {
assert_eq!(lowercase_refdef_label("not a refdef"), "not a refdef");
}
#[test]
fn replace_until_stable_multiple_matches() {
let re = Regex::new(r"ab").unwrap();
let mut text = "ababab".to_string();
replace_until_stable(&mut text, &re, |_| "X".to_string());
assert_eq!(text, "XXX");
}
#[test]
fn markers_contain_pua_char() {
assert!(REFDEF_MARKER_PREFIX.contains('\u{F002}'));
assert!(FNDEF_MARKER_START.contains('\u{F002}'));
}
#[test]
fn user_html_comment_not_treated_as_marker() {
let user_comment = "<!-- REFDEF:see below -->";
assert!(!user_comment.starts_with(REFDEF_MARKER_PREFIX));
}
}