pub(crate) trait RegexEngine: Sized + Send + Sync {
type Error: RegexError;
fn is_match(&self, text: &str) -> Result<bool, Self::Error>;
fn pattern(&self) -> &str;
}
#[derive(Debug)]
pub(crate) enum RegexFailureReason {
FancyRegex(fancy_regex::Error),
Panicked,
}
pub(crate) trait RegexError {
fn into_failure_reason(self) -> RegexFailureReason;
}
#[derive(Debug)]
pub(crate) enum FancyRegexError {
Engine(fancy_regex::Error),
Panicked,
}
impl RegexError for FancyRegexError {
fn into_failure_reason(self) -> RegexFailureReason {
match self {
Self::Engine(e) => RegexFailureReason::FancyRegex(e),
Self::Panicked => RegexFailureReason::Panicked,
}
}
}
impl RegexEngine for fancy_regex::Regex {
type Error = FancyRegexError;
fn is_match(&self, text: &str) -> Result<bool, Self::Error> {
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
fancy_regex::Regex::is_match(self, text)
})) {
Ok(Ok(matched)) => Ok(matched),
Ok(Err(e)) => Err(FancyRegexError::Engine(e)),
Err(_) => Err(FancyRegexError::Panicked),
}
}
fn pattern(&self) -> &str {
self.as_str()
}
}
#[derive(Debug)]
pub(crate) struct RegexBackendPanic;
impl RegexError for RegexBackendPanic {
fn into_failure_reason(self) -> RegexFailureReason {
RegexFailureReason::Panicked
}
}
impl RegexEngine for regex::Regex {
type Error = RegexBackendPanic;
fn is_match(&self, text: &str) -> Result<bool, Self::Error> {
std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
regex::Regex::is_match(self, text)
}))
.map_err(|_| RegexBackendPanic)
}
fn pattern(&self) -> &str {
self.as_str()
}
}
#[derive(Debug)]
pub(crate) enum LiteralMatchError {}
impl RegexError for LiteralMatchError {
fn into_failure_reason(self) -> RegexFailureReason {
match self {}
}
}
pub(crate) enum LiteralMatcher {
Prefix {
literal: String,
original: String,
},
Exact {
exact: String,
original: String,
},
Alternation {
alternatives: Vec<String>,
original: String,
},
NoWhitespace {
original: String,
},
}
impl RegexEngine for LiteralMatcher {
type Error = LiteralMatchError;
#[inline]
fn is_match(&self, text: &str) -> Result<bool, Self::Error> {
match self {
Self::Prefix { literal, .. } => Ok(text.starts_with(literal.as_str())),
Self::Exact { exact, .. } => Ok(text == exact.as_str()),
Self::Alternation { alternatives, .. } => {
Ok(alternatives.iter().any(|a| a.as_str() == text))
}
Self::NoWhitespace { .. } => Ok(!text.chars().any(is_ecma_whitespace)),
}
}
fn pattern(&self) -> &str {
match self {
Self::Prefix { original, .. }
| Self::Exact { original, .. }
| Self::Alternation { original, .. }
| Self::NoWhitespace { original } => original.as_str(),
}
}
}
#[derive(Debug, PartialEq)]
pub(crate) enum PatternOptimization {
Prefix(String),
Exact(String),
Alternation(Vec<String>),
NoWhitespace,
}
#[inline]
pub(crate) fn is_ecma_whitespace(c: char) -> bool {
matches!(
c,
'\t' | '\n' | '\x0b' | '\x0c' | '\r' | ' ' | '\u{00a0}' | '\u{1680}' | '\u{2000}'
..='\u{200a}'
| '\u{2028}'
| '\u{2029}'
| '\u{202f}'
| '\u{205f}'
| '\u{3000}'
| '\u{feff}'
)
}
fn parse_literal_part(s: &str) -> Option<String> {
let mut result = String::new();
let mut chars = s.chars().peekable();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next()? {
c @ ('/' | '-' | '_' | '$' | '.') => result.push(c),
_ => return None,
}
} else if c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '/') {
result.push(c);
} else {
return None;
}
}
Some(result)
}
fn parse_literal_alternation(inner: &str) -> Option<Vec<String>> {
let mut alternatives: Vec<String> = inner
.split('|')
.map(parse_literal_part)
.collect::<Option<Vec<_>>>()?;
alternatives.sort();
Some(alternatives)
}
pub(crate) fn analyze_pattern(pattern: &str) -> Option<PatternOptimization> {
if pattern == r"^\S*$" {
return Some(PatternOptimization::NoWhitespace);
}
if let Some(inner) = pattern
.strip_prefix("^(")
.and_then(|s| s.strip_suffix(")$"))
{
let alternatives = parse_literal_alternation(inner)?;
return Some(PatternOptimization::Alternation(alternatives));
}
let suffix = pattern.strip_prefix('^')?;
let mut literal = String::new();
let mut chars = suffix.chars().peekable();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next()? {
c @ ('/' | '-' | '_' | '$' | '.') => literal.push(c),
_ => return None,
}
} else if c == '$' {
if chars.peek().is_none() {
return Some(PatternOptimization::Exact(literal));
}
return None;
} else if c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '/') {
literal.push(c);
} else {
return None;
}
}
Some(PatternOptimization::Prefix(literal))
}
#[cfg(test)]
mod tests {
use super::{analyze_pattern, PatternOptimization};
use test_case::test_case;
#[test_case(r"^\S*$", Some(PatternOptimization::NoWhitespace) ; "no whitespace sentinel")]
#[test_case(
r"^(get|put|post)$",
Some(PatternOptimization::Alternation(vec!["get".into(), "post".into(), "put".into()])) ;
"sorted alternation"
)]
#[test_case(r"^(a|b|c^)$", None ; "invalid char in alternative")]
#[test_case(r"^(x-foo|x-bar)$", Some(PatternOptimization::Alternation(vec!["x-bar".into(), "x-foo".into()])) ; "alternation with dash")]
#[test_case(r"^(single)$", Some(PatternOptimization::Alternation(vec!["single".into()])) ; "single alternative")]
#[allow(clippy::needless_pass_by_value)]
fn test_analyze_pattern_new(pattern: &str, expected: Option<PatternOptimization>) {
assert_eq!(analyze_pattern(pattern), expected);
}
}