use once_cell::sync::Lazy;
use regex::{Captures, Regex};
use std::collections::HashMap;
use super::plugin_markers;
use super::preprocessor;
thread_local! {
static MATH_CONVERTER: std::cell::RefCell<Option<math_core::LatexToMathML>> =
std::cell::RefCell::new(
math_core::LatexToMathML::new(math_core::MathCoreConfig::default()).ok()
);
}
fn escape_html_text(input: &str) -> String {
input
.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
}
fn parse_args(args: &str) -> Vec<String> {
if args.trim().is_empty() {
return vec![];
}
args.split(',').map(|s| s.trim().to_string()).collect()
}
fn render_args_as_data(args: &str) -> String {
parse_args(args)
.iter()
.enumerate()
.map(|(i, arg)| format!("<data value=\"{}\">{}</data>", i, escape_html_text(arg)))
.collect::<Vec<_>>()
.join("")
}
fn map_table_plugin_option_to_class(option: &str) -> Option<&'static str> {
match option {
"striped" => Some("table-striped"),
"hover" => Some("table-hover"),
"dark" => Some("table-dark"),
"bordered" => Some("table-bordered"),
"borderless" => Some("table-borderless"),
"sm" => Some("table-sm"),
_ => None,
}
}
fn merge_class_attr(existing_attrs: &str, add_classes: &[String]) -> String {
if add_classes.is_empty() {
return existing_attrs.to_string();
}
let class_pattern = Regex::new(r#"class=\"([^\"]*)\""#).unwrap();
if let Some(class_caps) = class_pattern.captures(existing_attrs) {
let existing_classes = class_caps.get(1).map_or("", |m| m.as_str());
let mut class_list: Vec<String> = existing_classes
.split_whitespace()
.map(|s| s.to_string())
.collect();
for class_name in add_classes {
if !class_list.iter().any(|c| c == class_name) {
class_list.push(class_name.clone());
}
}
let merged = format!(r#"class=\"{}\""#, class_list.join(" "));
class_pattern.replace(existing_attrs, merged).to_string()
} else {
let mut attrs = existing_attrs.to_string();
attrs.push_str(&format!(r#" class=\"{}\""#, add_classes.join(" ")));
attrs
}
}
fn process_table_plugin(function_args: &str, content: &str) -> String {
let rendered_content = crate::parse(content);
let parsed_args = parse_args(function_args);
let is_responsive = parsed_args.iter().any(|arg| arg == "responsive");
let mut table_classes: Vec<String> = Vec::new();
for arg in &parsed_args {
if let Some(mapped_class) = map_table_plugin_option_to_class(arg) {
if !table_classes.iter().any(|c| c == mapped_class) {
table_classes.push(mapped_class.to_string());
}
}
}
let table_pattern = Regex::new(r"(?s)<table[^>]*>.*?</table>").unwrap();
let open_table_pattern = Regex::new(r"<table([^>]*)>").unwrap();
if let Some(table_match) = table_pattern.find(&rendered_content) {
let table_html = table_match.as_str();
let table_with_classes = open_table_pattern
.replace(table_html, |caps: &Captures| {
let existing_attrs = caps.get(1).map_or("", |m| m.as_str());
let merged_attrs = merge_class_attr(existing_attrs, &table_classes);
format!("<table{}>", merged_attrs)
})
.to_string();
let processed_table = if is_responsive {
format!(
"<div class=\"table-responsive\">{}</div>",
table_with_classes
)
} else {
table_with_classes
};
format!(
"{}{}{}",
&rendered_content[..table_match.start()],
processed_table,
&rendered_content[table_match.end()..]
)
} else {
eprintln!(
"[UMD warning] @table plugin requires a table inside its content: {}",
content.replace('\n', "\\n")
);
rendered_content
}
}
fn map_font_size_value(value: &str) -> (bool, String) {
if value.contains("rem") || value.contains("em") || value.contains("px") {
return (false, value.to_string()); }
let class = match value {
"2.5" => "fs-1",
"2" | "2.0" => "fs-2",
"1.75" => "fs-3",
"1.5" => "fs-4",
"1.25" => "fs-5",
"0.875" => "fs-6",
_ => return (false, format!("{}rem", value)), };
(true, class.to_string())
}
fn map_color_value(value: &str, is_background: bool) -> Option<(bool, String)> {
let trimmed = value.trim();
let bootstrap_colors = [
"primary",
"secondary",
"success",
"danger",
"warning",
"info",
"light",
"dark",
"body",
"body-secondary",
"body-tertiary",
"body-emphasis",
"blue",
"indigo",
"purple",
"pink",
"red",
"orange",
"yellow",
"green",
"teal",
"cyan",
];
let prefix = if is_background { "bg" } else { "text" };
for color in &bootstrap_colors {
if trimmed == *color || trimmed.starts_with(&format!("{}-", color)) {
return Some((true, format!("{}-{}", prefix, trimmed)));
}
}
if trimmed.starts_with('#') && (trimmed.len() == 4 || trimmed.len() == 7) {
if trimmed[1..].chars().all(|c| c.is_ascii_hexdigit()) {
return Some((false, trimmed.to_string()));
}
}
None
}
static UMD_BLOCKQUOTE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?m)^>\s*(.+?)\s*<\s*$").unwrap()
});
static TRIPLE_STAR_EMPHASIS: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\*\*\*([^*]+)\*\*\*").unwrap());
static CUSTOM_HEADER_ID: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?m)^(#{1,6})\s+(.+?)\s+\{#([a-zA-Z0-9_-]+)\}\s*$").unwrap());
#[derive(Debug, Clone)]
pub struct HeaderIdMap {
pub ids: HashMap<usize, String>,
pub tables: Vec<(String, String)>,
}
impl HeaderIdMap {
pub fn new() -> Self {
Self {
ids: HashMap::new(),
tables: Vec::new(),
}
}
}
pub fn preprocess_conflicts(input: &str) -> (String, HeaderIdMap) {
let mut result = preprocessor::remove_comments(input);
let mut header_map = HeaderIdMap::new();
let mut heading_counter = 0;
result = CUSTOM_HEADER_ID
.replace_all(&result, |caps: &Captures| {
heading_counter += 1;
let hashes = &caps[1];
let title = &caps[2];
let custom_id = &caps[3];
header_map
.ids
.insert(heading_counter, custom_id.to_string());
format!("{} {}", hashes, title)
})
.to_string();
result = UMD_BLOCKQUOTE
.replace_all(&result, |caps: &Captures| {
let content = &caps[1];
format!("{{{{UMD_BLOCKQUOTE:{}:UMD_BLOCKQUOTE}}}}", content)
})
.to_string();
let block_decoration_prefix = Regex::new(
r"(?m)^((?:(?:SIZE\([^)]+\)|COLOR\([^)]*\)|TRUNCATE|TOP|MIDDLE|BOTTOM|BASELINE|JUSTIFY|RIGHT|CENTER|LEFT):[ \t]*)+.*)$",
)
.unwrap();
result = block_decoration_prefix
.replace_all(&result, |caps: &Captures| {
use base64::{Engine as _, engine::general_purpose};
let encoded = general_purpose::STANDARD.encode(caps[1].as_bytes());
format!(
"{{{{BLOCK_DECORATION_B64:{}:BLOCK_DECORATION_B64}}}}",
encoded
)
})
.to_string();
result = plugin_markers::protect_inline_plugins(&result);
result = plugin_markers::protect_block_plugins(&result);
let (result, table_map) = crate::extensions::table::umd::extract_umd_tables(&result);
header_map.tables = table_map;
let result = preprocessor::process_definition_lists(&result);
(result, header_map)
}
fn convert_inline_decoration_to_html(function: &str, args: &str, content: &str) -> Option<String> {
match function {
"dfn" => Some(format!("<dfn>{}</dfn>", content)),
"kbd" => Some(format!("<kbd>{}</kbd>", content)),
"samp" => Some(format!("<samp>{}</samp>", content)),
"var" => Some(format!("<var>{}</var>", content)),
"cite" => Some(format!("<cite>{}</cite>", content)),
"q" => Some(format!("<q>{}</q>", content)),
"small" => Some(format!("<small>{}</small>", content)),
"u" => Some(format!("<u>{}</u>", content)),
"bdi" => Some(format!("<bdi>{}</bdi>", content)),
"ruby" => {
Some(format!(
"<ruby>{}<rp>(</rp><rt>{}</rt><rp>)</rp></ruby>",
content, args
))
}
"time" => {
Some(format!("<time datetime=\"{}\">{}</time>", args, content))
}
"data" => {
Some(format!("<data value=\"{}\">{}</data>", args, content))
}
"bdo" => {
Some(format!("<bdo dir=\"{}\">{}</bdo>", args, content))
}
"lang" => {
Some(format!("<span lang=\"{}\">{}</span>", args, content))
}
"abbr" => {
Some(format!("<abbr title=\"{}\">{}</abbr>", content, args))
}
"sup" => {
Some(format!("<sup>{}</sup>", args))
}
"sub" => {
Some(format!("<sub>{}</sub>", args))
}
"badge" => {
let badge_class = if args.ends_with("-pill") {
let color = args.trim_end_matches("-pill");
format!("badge rounded-pill bg-{}", color)
} else {
format!("badge bg-{}", args)
};
let link_regex = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
if let Some(link_caps) = link_regex.captures(content) {
let text = link_caps.get(1).map_or("", |m| m.as_str());
let url = link_caps.get(2).map_or("", |m| m.as_str());
Some(format!(
"<a href=\"{}\" class=\"{}\">{}</a>",
url, badge_class, text
))
} else {
Some(format!(
"<span class=\"{}\">{}</span>",
badge_class, content
))
}
}
"color" => {
let parts: Vec<&str> = args.split(',').collect();
let fg = parts.get(0).map_or("", |m| m.trim());
let bg = parts.get(1).map_or("", |m| m.trim());
let mut classes = Vec::new();
let mut styles = Vec::new();
if !fg.is_empty() && fg != "inherit" {
if let Some((is_class, value)) = map_color_value(fg, false) {
if is_class {
classes.push(value);
} else {
styles.push(format!("color: {}", value));
}
}
}
if !bg.is_empty() && bg != "inherit" {
if let Some((is_class, value)) = map_color_value(bg, true) {
if is_class {
classes.push(value);
} else {
styles.push(format!("background-color: {}", value));
}
}
}
if classes.is_empty() && styles.is_empty() {
Some(content.to_string())
} else {
let mut attrs = Vec::new();
if !classes.is_empty() {
attrs.push(format!("class=\"{}\"", classes.join(" ")));
}
if !styles.is_empty() {
attrs.push(format!("style=\"{}\"", styles.join("; ")));
}
Some(format!("<span {}>{}</span>", attrs.join(" "), content))
}
}
"size" => {
let (is_class, value) = map_font_size_value(args);
if is_class {
Some(format!("<span class=\"{}\">{}</span>", value, content))
} else {
Some(format!(
"<span style=\"font-size: {}\">{}</span>",
value, content
))
}
}
_ => None,
}
}
fn convert_inline_decoration_argsonly_to_html(function: &str, args: &str) -> Option<String> {
match function {
"sup" => Some(format!("<sup>{}</sup>", args)),
"sub" => Some(format!("<sub>{}</sub>", args)),
"math" => render_math_html(args, false),
_ => None,
}
}
fn convert_inline_decoration_noargs_to_html(function: &str) -> Option<String> {
match function {
"wbr" => Some("<wbr />".to_string()),
"br" => Some("<br />".to_string()),
_ => None,
}
}
fn render_math_html(formula: &str, block_display: bool) -> Option<String> {
let formula = formula.trim();
if formula.is_empty() {
return None;
}
let display = if block_display {
math_core::MathDisplay::Block
} else {
math_core::MathDisplay::Inline
};
let converted = MATH_CONVERTER.with(|converter_cell| {
let mut converter = converter_cell.borrow_mut();
converter
.as_mut()
.and_then(|converter| converter.convert_with_local_counter(formula, display).ok())
});
match converted {
Some(mathml) => Some(mathml),
None => Some(format!(
"<span class=\"umd-math-error\" data-math-source=\"{}\">{}</span>",
escape_html_text(formula),
escape_html_text(formula)
)),
}
}
fn render_popover_html(trigger_text: &str, raw_content: &str) -> String {
let popover_id = format!("umd-popover-{}", uuid::Uuid::new_v4().simple());
let content_html = crate::parse(raw_content);
format!(
"<button command=\"show-popover\" commandfor=\"{}\">{}</button><div id=\"{}\" popover>{}</div>",
popover_id,
escape_html_text(trigger_text.trim()),
popover_id,
content_html
)
}
fn is_valid_link_attr_token(token: &str) -> bool {
!token.is_empty()
&& token
.chars()
.all(|ch| ch.is_ascii_alphanumeric() || ch == '-' || ch == '_')
}
fn parse_link_attribute_spec(spec: &str) -> (Option<String>, Vec<String>) {
let mut id = None;
let mut classes = Vec::new();
for raw_token in spec.split_whitespace() {
let token = raw_token.trim();
if token.is_empty() {
continue;
}
if let Some(stripped) = token.strip_prefix('#') {
if is_valid_link_attr_token(stripped) {
id = Some(stripped.to_string());
}
continue;
}
if let Some(stripped) = token.strip_prefix('.') {
if is_valid_link_attr_token(stripped) {
classes.push(stripped.to_string());
}
continue;
}
if id.is_none() {
if is_valid_link_attr_token(token) {
id = Some(token.to_string());
}
} else if is_valid_link_attr_token(token) {
classes.push(token.to_string());
}
}
(id, classes)
}
fn apply_custom_link_attributes(html: &str) -> String {
let link_pattern =
Regex::new(r#"(?s)<a\s+([^>]*\bhref=\"[^\"]+\"[^>]*)>(.*?)</a>\s*\{([^}]+)\}"#).unwrap();
let class_pattern = Regex::new(r#"class=\"([^\"]*)\""#).unwrap();
let id_pattern = Regex::new(r#"\bid=\"[^\"]*\""#).unwrap();
link_pattern
.replace_all(html, |caps: &Captures| {
let mut attrs = caps[1].to_string();
let content = &caps[2];
let spec = &caps[3];
let (id, classes) = parse_link_attribute_spec(spec);
if let Some(id_value) = id {
if !id_pattern.is_match(&attrs) {
attrs.push_str(&format!(" id=\"{}\"", id_value));
}
}
if !classes.is_empty() {
if let Some(class_caps) = class_pattern.captures(&attrs) {
let existing = class_caps.get(1).map_or("", |m| m.as_str());
let mut class_list: Vec<String> =
existing.split_whitespace().map(|s| s.to_string()).collect();
for class_name in classes {
if !class_list.iter().any(|c| c == &class_name) {
class_list.push(class_name);
}
}
let merged = class_list.join(" ");
attrs = class_pattern
.replace(&attrs, format!("class=\"{}\"", merged))
.to_string();
} else {
attrs.push_str(&format!(" class=\"{}\"", classes.join(" ")));
}
}
format!("<a {}>{}</a>", attrs, content)
})
.to_string()
}
pub fn postprocess_conflicts(html: &str, header_map: &HeaderIdMap) -> String {
use crate::extensions::block_decorations;
let result = html.to_string();
let unescape_marker_quotes = |input: &str| -> String {
let marker_patterns = vec![
(
r"\{\{DEFINITION_LIST:([^\}]+):DEFINITION_LIST\}\}",
"{{DEFINITION_LIST:",
),
(
r"\{\{INLINE_PLUGIN:([^\}]+):INLINE_PLUGIN\}\}",
"{{INLINE_PLUGIN:",
),
(
r"\{\{BLOCK_PLUGIN:([^\}]+):BLOCK_PLUGIN\}\}",
"{{BLOCK_PLUGIN:",
),
(
r"\{\{BLOCK_PLUGIN_ARGSONLY:([^\}]+):BLOCK_PLUGIN_ARGSONLY\}\}",
"{{BLOCK_PLUGIN_ARGSONLY:",
),
(
r"\{\{INLINE_PLUGIN_ARGSONLY:([^\}]+):INLINE_PLUGIN_ARGSONLY\}\}",
"{{INLINE_PLUGIN_ARGSONLY:",
),
(
r"\{\{INLINE_PLUGIN_NOARGS:([^\}]+):INLINE_PLUGIN_NOARGS\}\}",
"{{INLINE_PLUGIN_NOARGS:",
),
];
let mut result = input.to_string();
for (pattern, _marker_start) in marker_patterns {
let re = Regex::new(pattern).unwrap();
result = re
.replace_all(&result, |caps: &Captures| {
let content = &caps[0];
content.replace(""", "\"")
})
.to_string();
}
result
};
let mut result = unescape_marker_quotes(&result);
let mut heading_counter = 0;
let header_regex = Regex::new(r"<h([1-6])>([^<]+)</h([1-6])>").unwrap();
result = header_regex
.replace_all(&result, |caps: &Captures| {
heading_counter += 1;
let level = &caps[1];
let title = &caps[2];
let close_level = &caps[3];
let id = if let Some(custom_id) = header_map.ids.get(&heading_counter) {
format!("h-{}", custom_id)
} else {
format!("h-{}", heading_counter)
};
format!(
"<h{}><a href=\"#{}\" aria-hidden=\"true\" class=\"anchor\" id=\"{}\"></a>{}</h{}>",
level, id, id, title, close_level
)
})
.to_string();
let umd_blockquote_marker = Regex::new(r"\{\{UMD_BLOCKQUOTE:(.+?):UMD_BLOCKQUOTE\}\}").unwrap();
result = umd_blockquote_marker
.replace_all(&result, |caps: &Captures| {
let content = &caps[1];
format!(
"<blockquote class=\"umd-blockquote\">{}</blockquote>",
content
)
})
.to_string();
let block_decoration_marker =
Regex::new(r"\{\{BLOCK_DECORATION_B64:([A-Za-z0-9+/=]+):BLOCK_DECORATION_B64\}\}").unwrap();
result = block_decoration_marker
.replace_all(&result, |caps: &Captures| {
use base64::{Engine as _, engine::general_purpose};
let encoded = &caps[1];
let decoration = general_purpose::STANDARD
.decode(encoded.as_bytes())
.ok()
.and_then(|bytes| String::from_utf8(bytes).ok())
.unwrap_or_else(|| encoded.to_string());
let placement_only = Regex::new(r"^(LEFT|CENTER|RIGHT|JUSTIFY):\s*$")
.unwrap()
.is_match(decoration.trim());
if decoration.contains('\n') || placement_only {
decoration
} else {
block_decorations::apply_block_decorations(&decoration)
}
})
.to_string();
let inline_plugin_marker =
Regex::new(r"\{\{INLINE_PLUGIN:(\w+):([\s\S]*?):([\s\S]*?):INLINE_PLUGIN\}\}").unwrap();
result = inline_plugin_marker
.replace_all(&result, |caps: &Captures| {
use base64::{Engine as _, engine::general_purpose};
let function = &caps[1];
let args = &caps[2];
let encoded_content = &caps[3];
let content = general_purpose::STANDARD
.decode(encoded_content.as_bytes())
.ok()
.and_then(|bytes| String::from_utf8(bytes).ok())
.unwrap_or_else(|| encoded_content.to_string());
if function == "math" {
let formula = if content.trim().is_empty() {
args
} else {
&content
};
if let Some(mathml) = render_math_html(formula, false) {
return mathml;
}
}
if function == "popover" {
return render_popover_html(args, &content);
}
if let Some(html) = convert_inline_decoration_to_html(function, args, &content) {
return html;
}
let args_html = render_args_as_data(args);
let escaped_content = escape_html_text(&content);
if escaped_content.is_empty() {
format!(
"<template class=\"umd-plugin umd-plugin-{}\">{}</template>",
function, args_html
)
} else {
format!(
"<template class=\"umd-plugin umd-plugin-{}\">{}{}</template>",
function, args_html, escaped_content
)
}
})
.to_string();
let inline_plugin_argsonly_marker =
Regex::new(r"\{\{INLINE_PLUGIN_ARGSONLY:(\w+):([\s\S]*?):INLINE_PLUGIN_ARGSONLY\}\}")
.unwrap();
result = inline_plugin_argsonly_marker
.replace_all(&result, |caps: &Captures| {
let function = &caps[1];
let args = &caps[2];
if let Some(html) = convert_inline_decoration_argsonly_to_html(function, args) {
return html;
}
let args_html = render_args_as_data(args);
format!(
"<template class=\"umd-plugin umd-plugin-{}\">{}</template>",
function, args_html
)
})
.to_string();
let inline_plugin_noargs_marker =
Regex::new(r"\{\{INLINE_PLUGIN_NOARGS:(\w+):INLINE_PLUGIN_NOARGS\}\}").unwrap();
result = inline_plugin_noargs_marker
.replace_all(&result, |caps: &Captures| {
let function = &caps[1];
if let Some(html) = convert_inline_decoration_noargs_to_html(function) {
return html;
}
format!(
"<template class=\"umd-plugin umd-plugin-{}\"></template>",
function
)
})
.to_string();
let block_plugin_marker =
Regex::new(r"\{\{BLOCK_PLUGIN:(\w+):([\s\S]*?):([\s\S]*?):BLOCK_PLUGIN\}\}").unwrap();
result = block_plugin_marker
.replace_all(&result, |caps: &Captures| {
use base64::{Engine as _, engine::general_purpose};
let function = &caps[1];
let args = &caps[2];
let encoded_content = &caps[3];
let content = general_purpose::STANDARD
.decode(encoded_content.as_bytes())
.ok()
.and_then(|bytes| String::from_utf8(bytes).ok())
.unwrap_or_else(|| encoded_content.to_string());
if function == "table" {
return process_table_plugin(args, &content);
}
if function == "math" {
let formula = if content.trim().is_empty() {
args
} else {
&content
};
if let Some(mathml) = render_math_html(formula, true) {
return mathml;
}
}
if function == "popover" {
return render_popover_html(args, &content);
}
let args_html = render_args_as_data(args);
let escaped_content = escape_html_text(&content);
if escaped_content.is_empty() {
format!(
"<template class=\"umd-plugin umd-plugin-{}\">{}</template>",
function, args_html
)
} else {
format!(
"<template class=\"umd-plugin umd-plugin-{}\">{}{}</template>",
function, args_html, escaped_content
)
}
})
.to_string();
let block_plugin_argsonly_marker =
Regex::new(r"\{\{BLOCK_PLUGIN_ARGSONLY:(\w+):([\s\S]*?):BLOCK_PLUGIN_ARGSONLY\}\}")
.unwrap();
result = block_plugin_argsonly_marker
.replace_all(&result, |caps: &Captures| {
use base64::{Engine as _, engine::general_purpose};
let function = &caps[1];
let encoded_args = &caps[2];
let args = general_purpose::STANDARD
.decode(encoded_args.as_bytes())
.ok()
.and_then(|bytes| String::from_utf8(bytes).ok())
.unwrap_or_else(|| encoded_args.to_string());
if function == "clear" && args.trim().is_empty() {
return "<div class=\"clearfix\"></div>".to_string();
}
if function == "math" {
if let Some(mathml) = render_math_html(&args, true) {
return mathml;
}
}
let args_html = render_args_as_data(&args);
format!(
"<template class=\"umd-plugin umd-plugin-{}\">{}</template>",
function, args_html
)
})
.to_string();
let wrapped_plugin =
Regex::new(r#"<p>\s*(<template class="umd-plugin[^"]*"[^>]*>.*?</template>)\s*</p>"#)
.unwrap();
result = wrapped_plugin.replace_all(&result, "$1").to_string();
let wrapped_clearfix = Regex::new(r#"<p>\s*(<div class="clearfix"></div>)\s*</p>"#).unwrap();
result = wrapped_clearfix.replace_all(&result, "$1").to_string();
let definition_list_marker =
Regex::new(r"\{\{DEFINITION_LIST:([\s\S]*?):DEFINITION_LIST\}\}").unwrap();
result = definition_list_marker
.replace_all(&result, |caps: &Captures| {
let items_json = &caps[1];
let items: Vec<(String, String)> = serde_json::from_str(items_json).unwrap_or_default();
if items.is_empty() {
return String::new();
}
let mut dl_html = String::from("<dl>");
for (term, definition) in items {
dl_html.push_str(&format!("<dt>{}</dt><dd>{}</dd>", term, definition));
}
dl_html.push_str("</dl>");
dl_html
})
.to_string();
let wrapped_dl = Regex::new(r"<p>\s*(<dl>.*?</dl>)\s*</p>").unwrap();
result = wrapped_dl.replace_all(&result, "$1").to_string();
result = apply_custom_link_attributes(&result);
result = apply_tasklist_indeterminate(&result);
result = apply_bootstrap_enhancements(&result, &header_map);
result
}
fn apply_tasklist_indeterminate(html: &str) -> String {
let pattern =
Regex::new(r#"<input([^>]*\btype=\"checkbox\"[^>]*)/?>\s*\{\{TASK_INDETERMINATE\}\}"#)
.unwrap();
pattern
.replace_all(html, |caps: &Captures| {
let mut attrs = caps[1].to_string();
if !attrs.contains("data-task=") {
attrs.push_str(" data-task=\"indeterminate\"");
}
if !attrs.contains("aria-checked=") {
attrs.push_str(" aria-checked=\"mixed\"");
}
format!("<input{} />", attrs)
})
.to_string()
}
fn apply_bootstrap_enhancements(html: &str, header_map: &HeaderIdMap) -> String {
let mut result = html.to_string();
let table_pattern = Regex::new(r"<table>").unwrap();
result = table_pattern
.replace_all(&result, "<table class=\"table\">")
.to_string();
let blockquote_pattern = Regex::new(r#"<blockquote>"#).unwrap();
result = blockquote_pattern
.replace_all(&result, "<blockquote class=\"blockquote\">")
.to_string();
let gfm_alert_pattern = Regex::new(
r#"<blockquote class="blockquote">\s*<p>\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]\s*(.*?)</p>\s*</blockquote>"#
).unwrap();
result = gfm_alert_pattern
.replace_all(&result, |caps: &Captures| {
let alert_type = &caps[1];
let content = &caps[2];
let (alert_class, icon_text) = match alert_type {
"NOTE" => ("alert-info", "Note"),
"TIP" => ("alert-success", "Tip"),
"IMPORTANT" => ("alert-primary", "Important"),
"WARNING" => ("alert-warning", "Warning"),
"CAUTION" => ("alert-danger", "Caution"),
_ => ("alert-info", "Note"),
};
format!(
"<div class=\"alert {}\" role=\"alert\"><strong>{}:</strong> {}</div>",
alert_class, icon_text, content
)
})
.to_string();
for (marker, html) in &header_map.tables {
let marker_text = marker.trim();
let comrak_marker = format!("<p>{}</p>", marker_text);
result = result.replace(&comrak_marker, html);
}
result = process_table_cell_alignment(&result);
result
}
fn process_table_cell_alignment(html: &str) -> String {
let mut result = html.to_string();
let td_pattern = Regex::new(r"<td([^>]*)>(.*?)</td>").unwrap();
result = td_pattern
.replace_all(&result, |caps: &Captures| {
let existing_attrs = &caps[1];
let content = &caps[2];
process_cell_content("td", existing_attrs, content)
})
.to_string();
let th_pattern = Regex::new(r"<th([^>]*)>(.*?)</th>").unwrap();
result = th_pattern
.replace_all(&result, |caps: &Captures| {
let existing_attrs = &caps[1];
let content = &caps[2];
process_cell_content("th", existing_attrs, content)
})
.to_string();
result
}
fn process_cell_content(tag: &str, existing_attrs: &str, content: &str) -> String {
let (align_class, remaining_content) =
if let Some(stripped) = content.trim_start().strip_prefix("TOP:") {
("align-top", stripped.trim_start())
} else if let Some(stripped) = content.trim_start().strip_prefix("MIDDLE:") {
("align-middle", stripped.trim_start())
} else if let Some(stripped) = content.trim_start().strip_prefix("BOTTOM:") {
("align-bottom", stripped.trim_start())
} else if let Some(stripped) = content.trim_start().strip_prefix("BASELINE:") {
("align-baseline", stripped.trim_start())
} else {
("", content)
};
if align_class.is_empty() {
format!("<{}{}>{}</{}>", tag, existing_attrs, content, tag)
} else {
if existing_attrs.contains("class=") {
let new_attrs =
existing_attrs.replace("class=\"", &format!("class=\"{} ", align_class));
format!("<{}{}>{}</{}>", tag, new_attrs, remaining_content, tag)
} else {
format!(
"<{} class=\"{}\"{}>{}</{}>",
tag, align_class, existing_attrs, remaining_content, tag
)
}
}
}
pub fn detect_ambiguous_syntax(input: &str) -> Vec<String> {
let mut warnings = Vec::new();
if TRIPLE_STAR_EMPHASIS.is_match(input) && input.contains("'''") {
warnings.push(
"Detected both ***text*** (Markdown) and '''text''' (UMD). \
Consider using **text** for Markdown bold-italic."
.to_string(),
);
}
if input.contains("COLOR(") && input.contains("\n:") {
warnings.push(
"Detected COLOR() syntax near Markdown definition list. \
Ensure proper spacing to avoid ambiguity."
.to_string(),
);
}
warnings
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_umd_blockquote_preprocessing() {
let input = "> This is a UMD quote <";
let (output, _) = preprocess_conflicts(input);
assert!(output.contains("{{UMD_BLOCKQUOTE:"));
assert!(!output.starts_with(">"));
}
#[test]
fn test_umd_blockquote_postprocessing() {
let header_map = HeaderIdMap::new();
let input = "{{UMD_BLOCKQUOTE:Test content:UMD_BLOCKQUOTE}}";
let output = postprocess_conflicts(input, &header_map);
assert!(output.contains("<blockquote class=\"umd-blockquote\">Test content</blockquote>"));
}
#[test]
fn test_markdown_blockquote_unchanged() {
let input = "> Standard Markdown quote\n> Second line";
let (output, _) = preprocess_conflicts(input);
assert_eq!(output, input);
}
#[test]
fn test_roundtrip_blockquote() {
let header_map = HeaderIdMap::new();
let input = "> UMD style <";
let (preprocessed, _) = preprocess_conflicts(input);
let postprocessed = postprocess_conflicts(&preprocessed, &header_map);
assert!(postprocessed.contains("<blockquote class=\"umd-blockquote\">"));
}
#[test]
fn test_custom_header_id() {
let input = "# My Header {#custom-id}\n\nContent";
let (output, header_map) = preprocess_conflicts(input);
assert_eq!(header_map.ids.get(&1), Some(&"custom-id".to_string()));
assert!(!output.contains("{#custom-id}"));
assert!(output.contains("# My Header"));
}
#[test]
fn test_multiple_custom_header_ids() {
let input = "# First {#first}\n\n## Second {#second}\n\n### Third";
let (_output, header_map) = preprocess_conflicts(input);
assert_eq!(header_map.ids.get(&1), Some(&"first".to_string()));
assert_eq!(header_map.ids.get(&2), Some(&"second".to_string()));
assert_eq!(header_map.ids.get(&3), None); }
#[test]
fn test_apply_custom_header_ids() {
let mut header_map = HeaderIdMap::new();
header_map.ids.insert(1, "my-custom-id".to_string());
let html = "<h1>Header</h1>";
let output = postprocess_conflicts(html, &header_map);
assert!(output.contains("id=\"h-my-custom-id\""));
assert!(output.contains("href=\"#h-my-custom-id\""));
assert!(!output.contains("heading-1"));
}
#[test]
fn test_sequential_header_ids() {
let header_map = HeaderIdMap::new();
let html = "<h1>First</h1><h2>Second</h2>";
let output = postprocess_conflicts(html, &header_map);
assert!(output.contains("id=\"h-1\""));
assert!(output.contains("id=\"h-2\""));
}
#[test]
fn test_detect_triple_emphasis_conflict() {
let input = "***Markdown*** and '''UMD'''";
let warnings = detect_ambiguous_syntax(input);
assert!(!warnings.is_empty());
assert!(warnings[0].contains("***text***"));
}
#[test]
fn test_detect_color_definition_conflict() {
let input = "COLOR(red): text\n: definition";
let warnings = detect_ambiguous_syntax(input);
assert!(!warnings.is_empty());
assert!(warnings[0].contains("COLOR()"));
}
#[test]
fn test_no_warnings_for_clean_syntax() {
let input = "# Heading\n\n**Bold** and ''UMD bold''";
let warnings = detect_ambiguous_syntax(input);
assert!(warnings.is_empty());
}
#[test]
fn test_bootstrap_table_class() {
let header_map = HeaderIdMap::new();
let input = "<table><tr><td>Cell</td></tr></table>";
let output = postprocess_conflicts(input, &header_map);
assert!(output.contains(r#"<table class="table">"#));
}
#[test]
fn test_bootstrap_blockquote_class() {
let header_map = HeaderIdMap::new();
let input = "<blockquote><p>Quote</p></blockquote>";
let output = postprocess_conflicts(input, &header_map);
assert!(output.contains(r#"<blockquote class="blockquote">"#));
}
#[test]
fn test_gfm_alert_note() {
let header_map = HeaderIdMap::new();
let input = r#"<blockquote class="blockquote"><p>[!NOTE] This is a note</p></blockquote>"#;
let output = postprocess_conflicts(input, &header_map);
assert!(output.contains(r#"<div class="alert alert-info" role="alert">"#));
assert!(output.contains("<strong>Note:</strong>"));
assert!(output.contains("This is a note"));
}
#[test]
fn test_gfm_alert_warning() {
let header_map = HeaderIdMap::new();
let input = r#"<blockquote class="blockquote"><p>[!WARNING] Be careful</p></blockquote>"#;
let output = postprocess_conflicts(input, &header_map);
assert!(output.contains(r#"<div class="alert alert-warning" role="alert">"#));
assert!(output.contains("<strong>Warning:</strong>"));
}
#[test]
fn test_umd_blockquote_no_bootstrap_class() {
let header_map = HeaderIdMap::new();
let input = "{{UMD_BLOCKQUOTE:Test content:UMD_BLOCKQUOTE}}";
let output = postprocess_conflicts(input, &header_map);
assert!(output.contains(r#"<blockquote class="umd-blockquote">"#));
assert!(!output.contains(r#"class="blockquote""#));
}
#[test]
fn test_definition_list() {
let input = ":Term 1|Definition 1\n:Term 2|Definition 2";
let (preprocessed, _) = preprocess_conflicts(input);
assert!(preprocessed.contains("{{DEFINITION_LIST:"));
}
#[test]
fn test_definition_list_html_output() {
let header_map = HeaderIdMap::new();
let input = ":HTML|HyperText Markup Language\n:CSS|Cascading Style Sheets";
let (preprocessed, _) = preprocess_conflicts(input);
let output = postprocess_conflicts(&preprocessed, &header_map);
assert!(output.contains("<dl>"));
assert!(output.contains("<dt>HTML</dt>"));
assert!(output.contains("<dd>HyperText Markup Language</dd>"));
assert!(output.contains("<dt>CSS</dt>"));
assert!(output.contains("<dd>Cascading Style Sheets</dd>"));
assert!(output.contains("</dl>"));
}
#[test]
fn test_table_cell_vertical_alignment() {
let header_map = HeaderIdMap::new();
let input =
r#"<table class="table"><tr><td>TOP: Cell1</td><td>MIDDLE: Cell2</td></tr></table>"#;
let output = postprocess_conflicts(input, &header_map);
assert!(output.contains(r#"class="align-top""#));
assert!(output.contains("Cell1"));
assert!(output.contains(r#"class="align-middle""#));
assert!(output.contains("Cell2"));
}
#[test]
fn test_table_cell_multiple_alignments() {
let header_map = HeaderIdMap::new();
let input = r#"<table><tr><th>BASELINE: Header</th><td>BOTTOM: Data</td></tr></table>"#;
let output = postprocess_conflicts(input, &header_map);
assert!(output.contains(r#"class="align-baseline""#));
assert!(output.contains(r#"class="align-bottom""#));
}
#[test]
fn test_tasklist_indeterminate_marker() {
let header_map = HeaderIdMap::new();
let input = r#"<li><input type="checkbox" disabled="" /> {{TASK_INDETERMINATE}}Item</li>"#;
let output = postprocess_conflicts(input, &header_map);
assert!(output.contains(r#"data-task="indeterminate""#));
assert!(output.contains(r#"aria-checked="mixed""#));
assert!(!output.contains("{{TASK_INDETERMINATE}}"));
}
#[test]
fn test_custom_link_attributes_id_and_class() {
let header_map = HeaderIdMap::new();
let input = r#"<p><a href="/docs">Docs</a>{docs-link btn btn-primary}</p>"#;
let output = postprocess_conflicts(input, &header_map);
assert!(
output.contains(r#"<a href="/docs" id="docs-link" class="btn btn-primary">Docs</a>"#)
);
assert!(!output.contains("{docs-link btn btn-primary}"));
}
#[test]
fn test_custom_link_attributes_merge_class() {
let header_map = HeaderIdMap::new();
let input = r#"<a href="/home" class="existing">Home</a>{home-link new}"#;
let output = postprocess_conflicts(input, &header_map);
assert!(output.contains(r#"id="home-link""#));
assert!(output.contains(r#"class="existing new""#));
}
}
pub fn apply_base_url_to_links(html: &str, base_url: &str) -> String {
let normalized_base = if base_url.ends_with('/') && base_url.len() > 1 {
&base_url[..base_url.len() - 1]
} else {
base_url
};
let mut result = html.to_string();
let href_double = Regex::new(r#"((?:href|src|srcset)\s*=\s*)"(/[^"]*)""#).unwrap();
result = href_double
.replace_all(&result, |caps: &Captures| {
let attr = &caps[1];
let path = &caps[2];
let new_url = format!("{}{}", normalized_base, path);
format!("{}\"{}\"", attr, new_url)
})
.to_string();
let href_single = Regex::new(r"((?:href|src|srcset)\s*=\s*)'(/[^']*)'").unwrap();
result = href_single
.replace_all(&result, |caps: &Captures| {
let attr = &caps[1];
let path = &caps[2];
let new_url = format!("{}{}", normalized_base, path);
format!("{}'{}'", attr, new_url)
})
.to_string();
result
}