use crate::md_elem::elem::*;
use crate::md_elem::inline_regex_replace::{regex_replace_inlines, RegexReplaceError, Replaced};
use crate::md_elem::*;
use crate::select::{MatchReplace, Matcher, SelectError};
use fancy_regex::Regex;
use std::borrow::Cow;
#[derive(Debug)]
pub(crate) struct StringMatcher {
re: Regex,
replacement: Option<String>,
}
#[derive(Clone, Debug)]
pub(crate) enum StringMatchError {
RegexError(Box<fancy_regex::Error>),
ReplaceError(RegexReplaceError),
}
#[must_use]
pub(crate) enum StringMatch<'a> {
NoMatch(String),
Match(String, Option<(&'a Regex, &'a str)>),
}
impl StringMatchError {
pub(crate) fn to_select_error(&self, selector_name: &str) -> SelectError {
let message = match self {
StringMatchError::RegexError(err) => format!("regex evaluation error in {selector_name} selector: {err}"),
StringMatchError::ReplaceError(err) => {
format!("regex replacement error in {selector_name} selector: {err}")
}
};
SelectError::new(message)
}
}
impl PartialEq for StringMatcher {
fn eq(&self, other: &Self) -> bool {
self.re.as_str() == other.re.as_str() && self.replacement == other.replacement
}
}
impl StringMatcher {
pub(crate) fn match_replace(&self, haystack: String) -> Result<StringMatch<'_>, StringMatchError> {
match self.re.is_match(&haystack) {
Ok(is_match) => Ok(if is_match {
let replacement = self.replacement.as_ref().map(|r| (&self.re, r.as_str()));
StringMatch::Match(haystack, replacement)
} else {
StringMatch::NoMatch(haystack)
}),
Err(e) => Err(StringMatchError::RegexError(Box::new(e))),
}
}
pub(crate) fn match_replace_string(&self, haystack: String) -> Result<Replaced<String>, StringMatchError> {
let ok = match self.match_replace(haystack)? {
StringMatch::NoMatch(orig) => Replaced {
item: orig,
matched_any: false,
},
StringMatch::Match(orig, None) => Replaced {
item: orig,
matched_any: true,
},
StringMatch::Match(orig, Some((pattern, replace_str))) => {
let replaced_str = match pattern.replace_all(&orig, replace_str) {
Cow::Borrowed(_) => orig, Cow::Owned(s) => s,
};
Replaced {
item: replaced_str,
matched_any: true,
}
}
};
Ok(ok)
}
pub(crate) fn match_replace_inlines(
&self,
haystack: Vec<Inline>,
) -> Result<Replaced<Vec<Inline>>, StringMatchError> {
let inline_replacements = regex_replace_inlines(haystack, &self.re, self.replacement.as_deref())
.map_err(StringMatchError::ReplaceError)?;
Ok(inline_replacements)
}
pub(crate) fn match_replace_any(
&self,
mut haystack: Vec<MdElem>,
) -> Result<Replaced<Vec<MdElem>>, StringMatchError> {
let mut matched_any = false;
for item in &mut haystack {
let blank_elem = MdElem::Doc(Vec::new());
let item_to_replace = std::mem::replace(item, blank_elem);
let replaced = self.match_replace_node(item_to_replace)?;
matched_any |= replaced.matched_any;
let _ = std::mem::replace(item, replaced.item);
}
Ok(Replaced {
item: haystack,
matched_any,
})
}
fn match_replace_node(&self, node: MdElem) -> Result<Replaced<MdElem>, StringMatchError> {
match node {
MdElem::Doc(elems) => {
let replaced = self.match_replace_any(elems)?;
Ok(Replaced {
item: MdElem::Doc(replaced.item),
matched_any: replaced.matched_any,
})
}
MdElem::BlockQuote(block) => {
let replaced = self.match_replace_any(block.body)?;
Ok(Replaced {
item: MdElem::BlockQuote(BlockQuote { body: replaced.item }),
matched_any: replaced.matched_any,
})
}
MdElem::List(mut list) => {
let mut matched_any = false;
for item in &mut list.items {
let contents = std::mem::take(&mut item.item);
let replaced = self.match_replace_any(contents)?;
matched_any |= replaced.matched_any;
item.item = replaced.item;
}
Ok(Replaced {
item: MdElem::List(list),
matched_any,
})
}
MdElem::Section(section) => {
let replaced_title = self.match_replace_inlines(section.title)?;
let replaced_body = self.match_replace_any(section.body)?;
Ok(Replaced {
item: MdElem::Section(Section {
title: replaced_title.item,
body: replaced_body.item,
depth: section.depth,
}),
matched_any: replaced_title.matched_any || replaced_body.matched_any,
})
}
MdElem::Paragraph(p) => {
let replaced = self.match_replace_inlines(p.body)?;
Ok(Replaced {
item: MdElem::Paragraph(Paragraph { body: replaced.item }),
matched_any: replaced.matched_any,
})
}
MdElem::Table(mut table) => {
let mut matched_any = false;
for row in &mut table.rows {
for cell in row {
let mut replaced = self.match_replace_inlines(std::mem::take(cell))?;
matched_any |= replaced.matched_any;
std::mem::swap(cell, &mut replaced.item);
}
}
Ok(Replaced {
item: MdElem::Table(table),
matched_any,
})
}
MdElem::Inline(inline) => {
let mut replaced = self.match_replace_inlines(vec![inline])?;
let replaced_inline = replaced
.item
.pop()
.expect("while taking first element from replacement");
assert!(
replaced.item.is_empty(),
"unexpected extra element(s) after replacing inlines: {:?}",
replaced.item
);
Ok(Replaced {
item: MdElem::Inline(replaced_inline),
matched_any: replaced.matched_any,
})
}
MdElem::BlockHtml(html) => {
let replaced = self.match_replace_string(html.value)?;
Ok(Replaced {
item: MdElem::BlockHtml(BlockHtml { value: replaced.item }),
matched_any: replaced.matched_any,
})
}
MdElem::ThematicBreak | MdElem::CodeBlock(_) | MdElem::FrontMatter(_) => Ok(Replaced {
item: node,
matched_any: false,
}),
}
}
fn re_for_any() -> Regex {
Regex::new(".*").expect("internal error")
}
fn re_for_regex(re: Regex) -> Regex {
re
}
fn re_from_text(text: String, case_sensitive: bool, anchor_start: bool, anchor_end: bool) -> Regex {
let mut pattern = String::with_capacity(text.len() + 10); if !case_sensitive && !text.is_empty() {
pattern.push_str("(?i)");
}
if anchor_start {
pattern.push('^');
}
pattern.push_str(&fancy_regex::escape(&text));
if anchor_end {
pattern.push('$');
}
Regex::new(&pattern).expect("internal error")
}
}
impl From<MatchReplace> for StringMatcher {
fn from(value: MatchReplace) -> Self {
let MatchReplace { matcher, replacement } = value;
let re = match matcher {
Matcher::Text {
case_sensitive,
anchor_start,
text,
anchor_end,
} => Self::re_from_text(text, case_sensitive, anchor_start, anchor_end),
Matcher::Regex(re) => Self::re_for_regex(re.re),
Matcher::Any { .. } => Self::re_for_any(),
};
Self { re, replacement }
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::query::{ParseError, StringVariant};
use std::str::FromStr;
impl PartialEq for StringMatchError {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(StringMatchError::RegexError(_), StringMatchError::RegexError(_)) => {
true
}
(StringMatchError::ReplaceError(left), StringMatchError::ReplaceError(right)) => left == right,
_ => false,
}
}
}
#[test]
fn bareword() {
parse_and_check("hello", re_insensitive("hello"), "");
parse_and_check("hello ", re_insensitive("hello"), "");
parse_and_check("hello / goodbye", re_insensitive("hello / goodbye"), "");
parse_and_check("hello| goodbye", re_insensitive("hello"), "| goodbye");
parse_and_check("hello | goodbye", re_insensitive("hello"), "| goodbye");
parse_and_check_with(
StringVariant::AngleBracket,
"foo> rest",
re_insensitive("foo"),
"> rest",
);
}
#[test]
fn bareword_anchor_start() {
let m = parse_and_check("^ hello |after", re_insensitive("^hello"), "|after");
assert!(!m.matches("pre hello").unwrap());
assert!(m.matches("hello").unwrap());
assert!(m.matches("hello post").unwrap());
}
#[test]
fn bareword_anchor_end() {
let m = parse_and_check(" hello $ |after", re_insensitive("hello$"), "|after");
assert!(m.matches("pre hello").unwrap());
assert!(m.matches("hello").unwrap());
assert!(!m.matches("hello post").unwrap());
}
#[test]
fn only_starting_anchor() {
parse_and_check("^ |", StringMatcher::re_for_any(), "^ |");
parse_and_check("^", StringMatcher::re_for_any(), "^");
}
#[test]
fn only_ending_anchor() {
parse_and_check("$ |", StringMatcher::re_for_any(), "$ |");
parse_and_check("$", StringMatcher::re_for_any(), "$");
}
#[test]
fn only_both_anchors() {
let matcher = parse_and_check("^$ |after", re("^$"), "|after");
assert!(matcher.matches("").unwrap());
assert!(!matcher.matches("x").unwrap());
assert!(!matcher.matches("\n").unwrap());
parse_and_check("^ $ |after", re("^$"), "|after");
}
#[test]
fn bareword_case_sensitivity() {
let m = parse_and_check("hello", re_insensitive("hello"), "");
assert!(m.matches("hello").unwrap());
assert!(m.matches("HELLO").unwrap());
}
#[test]
fn quoted_case_sensitivity() {
let m = parse_and_check("'hello'", re("hello"), "");
assert!(m.matches("hello").unwrap());
assert!(!m.matches("HELLO").unwrap());
}
#[test]
fn quoted_anchor_start() {
let m = parse_and_check("^'hello'", re("^hello"), "");
assert!(!m.matches("pre hello").unwrap());
assert!(m.matches("hello").unwrap());
assert!(m.matches("hello post").unwrap());
}
#[test]
fn quoted_anchor_end() {
let m = parse_and_check("'hello'$", re("hello$"), "");
assert!(m.matches("pre hello").unwrap());
assert!(m.matches("hello").unwrap());
assert!(!m.matches("hello post").unwrap());
}
#[test]
fn anchor_whitespace() {
parse_and_check("^foo", re("(?i)^foo"), "");
parse_and_check("^ foo", re("(?i)^foo"), "");
parse_and_check("^ 'foo'", re("^foo"), "");
parse_and_check("bar$", re("(?i)bar$"), "");
parse_and_check("bar $", re("(?i)bar$"), "");
parse_and_check("'bar' $", re("bar$"), "");
parse_and_check("^ foobar $ ", re("(?i)^foobar$"), "");
}
#[test]
fn bareword_regex_char() {
let m = parse_and_check("hello.world", re_insensitive("hello\\.world"), "");
assert!(m.matches("hello.world").unwrap());
assert!(!m.matches("hello world").unwrap()); }
#[test]
fn bareword_end_delimiters() {
parse_and_check_with(
StringVariant::AngleBracket,
"hello>world",
re_insensitive("hello"),
">world",
);
parse_and_check_with(
StringVariant::AngleBracket,
"hello$world",
re_insensitive("hello$"),
"world", );
}
#[test]
fn double_quoted_string() {
parse_and_check(
r#" "hello world's ☃ \' \" \` \r \n \t says \"\u{2603}\" to me"_"#,
re("hello world's ☃ ' \" ' \r \n \t says \"☃\" to me"),
"_",
);
}
#[test]
fn single_quoted_string() {
parse_and_check(
r#" 'hello world\'s ☃ \' \" \` \r \n \t says "\u{2603}" to me'_"#,
re("hello world's ☃ ' \" ' \r \n \t says \"☃\" to me"),
"_",
);
}
#[test]
fn quote_errs() {
expect_empty(r#"" "#);
expect_empty(r#"' "#);
expect_empty(r#"'\"#);
expect_empty(r#""\x" "#);
expect_empty(r#""\u2603" "#);
expect_empty(r#""\u{}" "#);
expect_empty(r#""\u{12345678}" "#); expect_empty(r#""\u{snowman}" "#);
expect_empty(r#""\u{2603"#);
}
#[test]
fn regex() {
parse_and_check(r#"/foo/"#, StringMatcher::re_for_regex(Regex::new("foo").unwrap()), "");
parse_and_check(
r#"/foo /"#,
StringMatcher::re_for_regex(Regex::new("foo ").unwrap()),
"",
);
parse_and_check(
r#"/foo/bar"#,
StringMatcher::re_for_regex(Regex::new("foo").unwrap()),
"bar",
);
parse_and_check(r#"//"#, StringMatcher::re_for_regex(Regex::new("").unwrap()), "");
parse_and_check(
r#"/(a|b)/"#,
StringMatcher::re_for_regex(Regex::new("(a|b)").unwrap()),
"",
);
parse_and_check(r#"/\d/"#, StringMatcher::re_for_regex(Regex::new("\\d").unwrap()), "");
parse_and_check(
r#"/fizz\/buzz/"#,
StringMatcher::re_for_regex(Regex::new("fizz/buzz").unwrap()),
"",
);
expect_empty(r#"/unclosed"#);
expect_err(r#"/(unclosed paren/"#);
}
#[test]
fn any() {
let empty_matcher = parse_and_check("| rest", StringMatcher::re_for_any(), "| rest");
assert!(empty_matcher.matches("").unwrap());
parse_and_check("| rest", StringMatcher::re_for_any(), "| rest");
parse_and_check("*| rest", StringMatcher::re_for_any(), "| rest");
parse_and_check("* | rest", StringMatcher::re_for_any(), "| rest");
parse_and_check_with(
StringVariant::AngleBracket,
"> rest",
StringMatcher::re_for_any(),
"> rest",
);
}
#[test]
fn fancy_regex_lookahead() {
let re_instance = re(r#"foo(?=bar)"#); let matcher = StringMatcher {
re: re_instance,
replacement: None,
};
assert!(matcher.matches("foobar").unwrap());
assert!(!matcher.matches("foo").unwrap());
assert!(!matcher.matches("foobaz").unwrap());
}
fn parse_and_check_with(
string_variant: StringVariant,
text: &str,
expect_re: Regex,
expect_remaining: &str,
) -> StringMatcher {
let (actual_matcher, actual_remaining) = match Matcher::parse(string_variant, text) {
Ok(parsed) => parsed,
Err(err) => {
let public_err = ParseError::new(err);
panic!("{public_err:?}")
}
};
let expect = StringMatcher {
re: expect_re,
replacement: None,
};
let actual_string_matcher: StringMatcher = actual_matcher.into();
assert_eq!(actual_string_matcher, expect);
assert_eq!(actual_remaining, expect_remaining);
expect
}
fn parse_and_check(text: &str, expect: Regex, expect_remaining: &str) -> StringMatcher {
parse_and_check_with(StringVariant::Pipe, text, expect, expect_remaining)
}
fn expect_empty(text: &str) {
parse_and_check(text, StringMatcher::re_for_any(), text);
}
fn expect_err(text: &str) {
if let Ok(unexpected) = Matcher::parse(StringVariant::Pipe, text) {
panic!("unexpected success: {unexpected:?}")
}
}
fn re(value: &str) -> Regex {
Regex::new(value).expect("test error")
}
fn re_insensitive(value: &str) -> Regex {
let mut s = String::with_capacity(value.len() + 3);
s.push_str("(?i)");
s.push_str(value);
re(&s)
}
impl From<&str> for StringMatcher {
fn from(value: &str) -> Self {
Self {
re: Regex::from_str(value).unwrap(),
replacement: None,
}
}
}
impl StringMatcher {
pub(crate) fn matches(&self, haystack: &str) -> Result<bool, StringMatchError> {
Ok(self.match_replace_string(haystack.to_string())?.matched_any)
}
}
}