use once_cell::sync::Lazy;
use regex::Regex;
use std::borrow::Cow;
use crate::parser::core::CoreRule;
use crate::parser::inline::Text;
use crate::{MarkdownIt, Node};
static REPLACEMENTS: Lazy<Box<[(Regex, &'static str)]>> = Lazy::new(|| {
Box::new([
(Regex::new(r"\+-").unwrap(), "±"),
(Regex::new(r"\.{2,}").unwrap(), "…"),
(Regex::new(r"([?!])…").unwrap(), "$1.."),
(Regex::new(r"([?!]){4,}").unwrap(), "$1$1$1"),
(Regex::new(r",{2,}").unwrap(), ","),
(
Regex::new(r"(?m)(?P<pre>^|[^-])(?P<dash>---)(?P<post>[^-]|$)").unwrap(),
"$pre\u{2014}$post",
),
(
Regex::new(r"(?m)(?P<pre>^|\s)(?P<dash>--)(?P<post>\s|$)").unwrap(),
"$pre\u{2013}$post",
),
(
Regex::new(r"(?m)(?P<pre>^|[^-\s])(?P<dash>--)(?P<post>[^-\s]|$)").unwrap(),
"$pre\u{2013}$post",
),
])
});
static SCOPED_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\((c|tm|r)\)").unwrap());
static RARE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--").unwrap());
fn replace_abbreviation(input: &str) -> &'static str {
match input.to_lowercase().as_str() {
"(c)" => "©",
"(r)" => "®",
"(tm)" => "™",
_ => unreachable!("Got invalid abbreviation '{}'", input),
}
}
pub fn add(md: &mut MarkdownIt) {
md.add_rule::<TypographerRule>();
}
pub struct TypographerRule;
impl CoreRule for TypographerRule {
fn run(root: &mut Node, _: &MarkdownIt) {
root.walk_mut(|node, _| {
let Some(text_node) = node.cast_mut::<Text>() else { return; };
if SCOPED_RE.is_match(&text_node.content) {
text_node.content = SCOPED_RE
.replace_all(&text_node.content, |caps: ®ex::Captures| {
replace_abbreviation(caps.get(0).unwrap().as_str())
})
.to_string();
}
if RARE_RE.is_match(&text_node.content) {
let mut result = Cow::Borrowed(text_node.content.as_str());
for (pattern, replacement) in REPLACEMENTS.iter() {
if let Cow::Owned(s) = pattern.replace_all(&result, *replacement) {
result = Cow::Owned(s);
if let Cow::Owned(s) = pattern.replace_all(&result, *replacement) {
result = Cow::Owned(s);
}
}
}
if let Cow::Owned(s) = result {
text_node.content = s;
}
}
});
}
}