use regex::Regex;
pub fn sanitize_markdown(input: &str) -> String {
let unicode_re = Regex::new(r"[\u{2018}\u{2019}\u{201C}\u{201D}\u{2014}]").unwrap();
let ws_re = Regex::new(r"\s+").unwrap();
input
.lines()
.map(|line| {
let mut line = line.replace('\t', " ");
line = unicode_re
.replace_all(&line, |caps: ®ex::Captures| match &caps[0] {
"\u{2018}" | "\u{2019}" => "'",
"\u{201C}" | "\u{201D}" => "\"",
"\u{2014}" => "-",
_ => "",
})
.to_string();
let mut trimmed = ws_re.replace_all(line.trim(), " ").to_string();
trimmed = trimmed
.replace(" - ", "-")
.replace("- ", "-")
.replace(" -", "-");
let mut safe = trimmed.replace('\\', "\\\\").replace('"', "\\\"");
safe = safe
.replace("{", "{")
.replace("}", "}")
.replace("[", "[")
.replace("]", "]");
safe
})
.filter(|l| !l.is_empty())
.collect::<Vec<_>>()
.join(" ")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sanitize_markdown() {
let input = "This is a \"smart quote\" example";
let output = sanitize_markdown(input);
assert_eq!(output, "This is a \\\"smart quote\\\" example");
let input = "This—is an em-dash";
let output = sanitize_markdown(input);
assert_eq!(output, "This-is an em-dash");
let input = "Line one\n\nLine two\n \nLine three";
let output = sanitize_markdown(input);
assert_eq!(output, "Line one Line two Line three");
let input = "This has {braces} and [brackets]";
let output = sanitize_markdown(input);
assert_eq!(output, "This has {braces} and [brackets]");
let input = "Path\\to\\file";
let output = sanitize_markdown(input);
assert_eq!(output, "Path\\\\to\\\\file");
}
}