use std::collections::HashMap;
use crate::common::utils::is_punct_char;
use crate::parser::core::CoreRule;
use crate::parser::inline::Text;
use crate::plugins::cmark::block::paragraph::Paragraph;
use crate::plugins::cmark::inline::newline::{Hardbreak, Softbreak};
use crate::plugins::html::html_inline::HtmlInline;
use crate::{MarkdownIt, Node};
const APOSTROPHE: char = '\u{2019}';
const SINGLE_QUOTE: char = '\'';
const DOUBLE_QUOTE: char = '"';
const SPACE: char = ' ';
pub fn add(md: &mut MarkdownIt) {
add_with::<'‘', '’', '“', '”'>(md);
}
pub fn add_with<
const OPEN_SINGLE_QUOTE: char,
const CLOSE_SINGLE_QUOTE: char,
const OPEN_DOUBLE_QUOTE: char,
const CLOSE_DOUBLE_QUOTE: char,
>(
md: &mut MarkdownIt,
) {
md.add_rule::<SmartQuotesRule<
OPEN_SINGLE_QUOTE,
CLOSE_SINGLE_QUOTE,
OPEN_DOUBLE_QUOTE,
CLOSE_DOUBLE_QUOTE>>();
}
enum FlatToken<'a> {
LineBreak,
Text {
content: &'a str,
nesting_level: u32,
},
HtmlInline {
content: &'a str,
},
Irrelevant,
}
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
enum QuoteType {
Single,
Double,
}
struct QuoteMarker {
walk_index: usize,
quote_position: usize,
quote_type: QuoteType,
level: u32,
}
struct ReplacementOp {
walk_index: usize,
quote_position: usize,
quote: char,
}
pub struct SmartQuotesRule<
const OPEN_SINGLE_QUOTE: char,
const CLOSE_SINGLE_QUOTE: char,
const OPEN_DOUBLE_QUOTE: char,
const CLOSE_DOUBLE_QUOTE: char,
>;
impl<
const OPEN_SINGLE_QUOTE: char,
const CLOSE_SINGLE_QUOTE: char,
const OPEN_DOUBLE_QUOTE: char,
const CLOSE_DOUBLE_QUOTE: char,
> CoreRule
for SmartQuotesRule<
OPEN_SINGLE_QUOTE,
CLOSE_SINGLE_QUOTE,
OPEN_DOUBLE_QUOTE,
CLOSE_DOUBLE_QUOTE,
>
{
fn run(root: &mut Node, _: &MarkdownIt) {
let text_tokens = all_text_tokens(root);
let replacement_ops = Self::compute_replacements(text_tokens);
let mut current_index: usize = 0;
root.walk_mut(|node, _| {
if let Some(current_replacements) = replacement_ops.get(¤t_index) {
let text_node = node.cast_mut::<Text>()
.expect("Expected to find a text node at this index because we constructed our replacements HashMap accordingly.");
text_node.content = execute_replacements(current_replacements, &text_node.content);
};
current_index += 1;
});
}
}
impl<
const OPEN_SINGLE_QUOTE: char,
const CLOSE_SINGLE_QUOTE: char,
const OPEN_DOUBLE_QUOTE: char,
const CLOSE_DOUBLE_QUOTE: char,
>
SmartQuotesRule<OPEN_SINGLE_QUOTE, CLOSE_SINGLE_QUOTE, OPEN_DOUBLE_QUOTE, CLOSE_DOUBLE_QUOTE>
{
fn compute_replacements(text_tokens: Vec<FlatToken>) -> HashMap<usize, HashMap<usize, char>> {
let mut quote_stack: Vec<QuoteMarker> = Vec::new();
let mut replacement_ops: HashMap<usize, HashMap<usize, char>> = HashMap::new();
for (walk_index, token) in text_tokens.iter().enumerate() {
if let FlatToken::Text {
content,
nesting_level,
} = token
{
for op in Self::replace_smartquotes(
content,
walk_index,
*nesting_level,
&text_tokens,
&mut quote_stack,
) {
replacement_ops
.entry(op.walk_index)
.or_default()
.insert(op.quote_position, op.quote);
}
}
}
replacement_ops
}
fn replace_smartquotes(
content: &str,
walk_index: usize,
level: u32,
text_tokens: &[FlatToken],
quote_stack: &mut Vec<QuoteMarker>,
) -> Vec<ReplacementOp> {
truncate_stack(quote_stack, level);
let mut result: Vec<_> = Vec::new();
for (quote_position, quote_type) in find_quotes(content) {
let last_char = find_last_char_before(text_tokens, walk_index, quote_position);
let next_char = find_first_char_after(text_tokens, walk_index, quote_position);
let (can_open, can_close): (bool, bool) =
can_open_or_close("e_type, last_char, next_char);
if !can_open && !can_close {
if quote_type == QuoteType::Single {
result.push(ReplacementOp {
walk_index,
quote_position,
quote: APOSTROPHE,
});
}
continue;
}
if can_close {
if let Some((opening_op, closing_op, new_stack_len)) =
Self::try_close(quote_stack, walk_index, level, quote_type, quote_position)
{
quote_stack.truncate(new_stack_len);
result.push(opening_op);
result.push(closing_op);
continue;
}
}
if can_open {
quote_stack.push(QuoteMarker {
walk_index,
quote_position,
quote_type,
level,
});
} else if can_close && quote_type == QuoteType::Single {
result.push(ReplacementOp {
walk_index,
quote_position,
quote: APOSTROPHE,
});
}
}
result
}
fn try_close(
quote_stack: &[QuoteMarker],
walk_index: usize,
level: u32,
quote_type: QuoteType,
quote_position: usize,
) -> Option<(ReplacementOp, ReplacementOp, usize)> {
for (j, other_item) in quote_stack.iter().enumerate().rev() {
if other_item.level < level {
return None;
}
if other_item.quote_type == quote_type && other_item.level == level {
return Some((
ReplacementOp {
walk_index: other_item.walk_index,
quote_position: other_item.quote_position,
quote: if quote_type == QuoteType::Single {
OPEN_SINGLE_QUOTE
} else {
OPEN_DOUBLE_QUOTE
},
},
ReplacementOp {
walk_index,
quote_position,
quote: if quote_type == QuoteType::Single {
CLOSE_SINGLE_QUOTE
} else {
CLOSE_DOUBLE_QUOTE
},
},
j,
));
}
}
None
}
}
fn all_text_tokens(root: &Node) -> Vec<FlatToken> {
let mut result = Vec::new();
let mut walk_index = 0;
root.walk(|node, nesting_level| {
if let Some(text_node) = node.cast::<Text>() {
result.push(FlatToken::Text {
content: &text_node.content,
nesting_level,
});
} else if let Some(html_node) = node.cast::<HtmlInline>() {
result.push(FlatToken::HtmlInline {
content: &html_node.content,
});
} else if node.is::<Paragraph>() || node.is::<Hardbreak>() || node.is::<Softbreak>() {
result.push(FlatToken::LineBreak);
} else {
result.push(FlatToken::Irrelevant);
}
walk_index += 1;
});
result
}
fn can_open_or_close(quote_type: &QuoteType, last_char: char, next_char: char) -> (bool, bool) {
let is_double = *quote_type == QuoteType::Double;
let next_is_double = next_char == DOUBLE_QUOTE;
let last_is_digit = last_char.is_ascii_digit();
if next_is_double && is_double && last_is_digit {
return (false, false);
}
let is_last_punctuation = last_char.is_ascii_punctuation() || is_punct_char(last_char);
let is_next_punctuation = next_char.is_ascii_punctuation() || is_punct_char(next_char);
let is_last_whitespace = last_char.is_whitespace();
let is_next_whitespace = next_char.is_whitespace();
let can_open =
!is_next_whitespace && (!is_next_punctuation || is_last_whitespace || is_last_punctuation);
let can_close =
!is_last_whitespace && (!is_last_punctuation || is_next_whitespace || is_next_punctuation);
if can_open && can_close {
return (is_last_punctuation, is_next_punctuation);
}
(can_open, can_close)
}
fn execute_replacements(replacement_ops: &HashMap<usize, char>, content: &str) -> String {
content
.chars()
.enumerate()
.map(|(i, c)| *replacement_ops.get(&i).unwrap_or(&c))
.collect()
}
fn truncate_stack(quote_stack: &mut Vec<QuoteMarker>, level: u32) {
let stack_len = quote_stack
.iter()
.rev()
.skip_while(|qm| qm.level > level)
.count();
quote_stack.truncate(stack_len);
}
fn find_quotes(content: &str) -> impl Iterator<Item = (usize, QuoteType)> + '_ {
content.chars().enumerate().filter_map(|(p, c)| {
if c == SINGLE_QUOTE || c == DOUBLE_QUOTE {
Some((
p,
if c == SINGLE_QUOTE {
QuoteType::Single
} else {
QuoteType::Double
},
))
} else {
None
}
})
}
fn find_first_char_after(
text_tokens: &[FlatToken],
token_index: usize,
quote_position: usize,
) -> char {
for (idx_t, text_token) in text_tokens.iter().enumerate().skip(token_index) {
let token = match text_token {
FlatToken::LineBreak => return SPACE,
FlatToken::Text {
content,
nesting_level: _,
} => content,
FlatToken::HtmlInline {
content,
} => content,
FlatToken::Irrelevant => continue,
};
let start_index = if idx_t == token_index {
quote_position + 1
} else {
0
};
if let Some(c) = token.chars().nth(start_index) {
return c;
}
}
SPACE
}
fn find_last_char_before(
text_tokens: &[FlatToken],
token_index: usize,
quote_position: usize,
) -> char {
for idx_t in (0..=token_index).rev() {
let token = match &text_tokens[idx_t] {
FlatToken::LineBreak => return SPACE,
FlatToken::Text {
content,
nesting_level: _,
} => content,
FlatToken::HtmlInline {
content,
} => content,
FlatToken::Irrelevant => continue,
};
let start_index: usize = if idx_t == token_index {
quote_position
} else {
token.chars().count()
};
if start_index == 0 {
continue;
}
return token.chars().nth(start_index - 1).unwrap();
}
SPACE
}
#[cfg(test)]
mod tests {
#[test]
fn smartquotes_basics() {
let md = &mut crate::MarkdownIt::new();
crate::plugins::cmark::add(md);
crate::plugins::extra::smartquotes::add(md);
let html = md.parse(r#"'hello' "world""#).render();
assert_eq!(html.trim(), r#"<p>‘hello’ “world”</p>"#);
}
#[test]
fn smartquotes_shouldnt_affect_html() {
let md = &mut crate::MarkdownIt::new();
crate::plugins::cmark::add(md);
crate::plugins::html::html_inline::add(md);
crate::plugins::extra::smartquotes::add(md);
let html = md.parse(r#"<a href="hello"></a>"#).render();
assert_eq!(html.trim(), r#"<p><a href="hello"></a></p>"#);
}
#[test]
fn smartquotes_should_work_with_typographer() {
let md = &mut crate::MarkdownIt::new();
crate::plugins::cmark::add(md);
crate::plugins::html::html_inline::add(md);
crate::plugins::extra::typographer::add(md);
crate::plugins::extra::smartquotes::add(md);
let html = md.parse("\"**...**\"").render();
assert_eq!(html.trim(), "<p>“<strong>…</strong>”</p>");
}
}