use regex::Regex;
use std::sync::LazyLock;
use crate::wrapping::tag_handling::TEMPLATE_TAG_PATTERN;
static PARAGRAPH_BREAK_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\n\s*\n").expect("valid PARAGRAPH_BREAK_PATTERN regex"));
static QUOTE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r#"(?m)(^|\s|\u{2014})(?:"([^"\u{201c}\u{201d}]*)"|'([^'\u{2018}\u{2019}]*)')(\s|$|\.|,|;|:|\?|!|\u{2014}|\))"#,
)
.expect("valid QUOTE_PATTERN regex")
});
static APOSTROPHE_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(\w)'(\w)").expect("valid APOSTROPHE_PATTERN regex"));
static POSSESSIVE_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\w*[sS]'$").expect("valid POSSESSIVE_PATTERN regex"));
fn is_multi_paragraph(text: &str) -> bool {
PARAGRAPH_BREAK_PATTERN.is_match(text)
}
fn apply_smart_quotes_to_text(text: &str) -> String {
let result = QUOTE_PATTERN
.replace_all(text, |caps: ®ex::Captures<'_>| {
let prefix = caps.get(1).map_or("", |m| m.as_str());
let double_content = caps.get(2);
let single_content = caps.get(3);
let suffix = caps.get(4).map_or("", |m| m.as_str());
let content = double_content.or(single_content).map_or("", |m| m.as_str());
if is_multi_paragraph(content) {
return caps.get(0).expect("group 0 always exists").as_str().to_string();
}
if double_content.is_some() {
format!("{prefix}\u{201c}{content}\u{201d}{suffix}")
} else {
format!("{prefix}\u{2018}{content}\u{2019}{suffix}")
}
})
.into_owned();
let mut output = String::new();
let mut remaining = result.as_str();
loop {
let ws_pos = remaining.find(char::is_whitespace);
let (word, rest) = if let Some(pos) = ws_pos {
let after_word = &remaining[pos..];
let ws_end = after_word.find(|c: char| !c.is_whitespace()).unwrap_or(after_word.len());
let word_and_ws = &remaining[..pos + ws_end];
let rest = &remaining[pos + ws_end..];
(word_and_ws, rest)
} else {
(remaining, "")
};
if word.is_empty() && rest.is_empty() {
break;
}
let processed_word = process_word_apostrophe(word);
output.push_str(&processed_word);
if rest.is_empty() {
break;
}
remaining = rest;
}
output
}
fn process_word_apostrophe(word: &str) -> String {
let trimmed = word.trim_end();
let trailing_ws = &word[trimmed.len()..];
let quote_count = trimmed.matches('\'').count();
if quote_count != 1 {
return word.to_string();
}
if APOSTROPHE_PATTERN.is_match(trimmed) {
let replaced = trimmed.replace('\'', "\u{2019}");
return format!("{replaced}{trailing_ws}");
}
if POSSESSIVE_PATTERN.is_match(trimmed) {
let replaced = trimmed.replace('\'', "\u{2019}");
return format!("{replaced}{trailing_ws}");
}
word.to_string()
}
pub fn smart_quotes(text: &str) -> String {
let mut segments: Vec<String> = Vec::new();
let mut last_end = 0;
for m in TEMPLATE_TAG_PATTERN.find_iter(text) {
let start = m.start();
let end = m.end();
if start > last_end {
let before_text = &text[last_end..start];
segments.push(apply_smart_quotes_to_text(before_text));
}
segments.push(m.as_str().to_string());
last_end = end;
}
if last_end < text.len() {
let remaining = &text[last_end..];
segments.push(apply_smart_quotes_to_text(remaining));
}
segments.join("")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_double_quotes() {
assert_eq!(smart_quotes(r#"He said "hello" there"#), "He said \u{201c}hello\u{201d} there");
}
#[test]
fn test_apostrophe() {
assert_eq!(smart_quotes("I'm here"), "I\u{2019}m here");
}
#[test]
fn test_possessive_s() {
assert_eq!(smart_quotes("James' book"), "James\u{2019} book");
}
#[test]
fn test_template_tag_preserved() {
let input = r#"{% field kind="string" %}"#;
assert_eq!(smart_quotes(input), input);
}
#[test]
fn test_code_like_unchanged() {
let input = r#"x="foo""#;
assert_eq!(smart_quotes(input), input);
}
}