use std::fmt;
use std::sync::Arc;
use crate::error::{FerriError, Result};
pub type UrlPredicate = Arc<dyn Fn(&str) -> bool + Send + Sync>;
#[derive(Clone)]
pub enum UrlMatcher {
Any,
Glob {
pattern: String,
regex: regex::Regex,
},
Regex(regex::Regex),
Predicate(UrlPredicate),
}
impl UrlMatcher {
#[must_use]
pub fn any() -> Self {
Self::Any
}
pub fn glob(pattern: impl Into<String>) -> Result<Self> {
let pattern = pattern.into();
if pattern.is_empty() {
return Ok(Self::Any);
}
let regex_source = glob_to_regex_pattern(&pattern);
let regex = regex::Regex::new(®ex_source).map_err(|e| {
FerriError::invalid_argument(
"url",
format!("glob {pattern:?} (compiled as {regex_source:?}) is not a valid regex: {e}"),
)
})?;
Ok(Self::Glob { pattern, regex })
}
#[must_use]
pub fn regex(re: regex::Regex) -> Self {
Self::Regex(re)
}
pub fn regex_from_source(source: &str, flags: &str) -> Result<Self> {
let mut inline_flags = String::new();
for c in flags.chars() {
match c {
'i' | 'm' | 's' => inline_flags.push(c),
'g' | 'u' => {},
other => {
return Err(FerriError::invalid_argument(
"url",
format!("unsupported JS regex flag {other:?} (supported: i, m, s, g, u)"),
));
},
}
}
let pattern = if inline_flags.is_empty() {
source.to_string()
} else {
format!("(?{inline_flags}){source}")
};
let regex = regex::Regex::new(&pattern)
.map_err(|e| FerriError::invalid_argument("url", format!("regex {source:?} failed to compile: {e}")))?;
Ok(Self::Regex(regex))
}
pub fn predicate<F>(f: F) -> Self
where
F: Fn(&str) -> bool + Send + Sync + 'static,
{
Self::Predicate(Arc::new(f))
}
#[must_use]
pub fn matches(&self, url: &str) -> bool {
match self {
Self::Any => true,
Self::Glob { regex, .. } | Self::Regex(regex) => regex.is_match(url),
Self::Predicate(f) => f(url),
}
}
#[must_use]
pub fn identifier(&self) -> String {
match self {
Self::Any => String::new(),
Self::Glob { pattern, .. } => pattern.clone(),
Self::Regex(r) => r.as_str().to_string(),
Self::Predicate(_) => "<predicate>".to_string(),
}
}
#[must_use]
pub fn regex_source_for_prefilter(&self) -> String {
match self {
Self::Any | Self::Predicate(_) => ".*".to_string(),
Self::Glob { regex, .. } | Self::Regex(regex) => regex.as_str().to_string(),
}
}
#[must_use]
pub fn equivalent(&self, other: &Self) -> bool {
match (self, other) {
(Self::Any, Self::Any) => true,
(Self::Glob { pattern: a, .. }, Self::Glob { pattern: b, .. }) => a == b,
(Self::Regex(a), Self::Regex(b)) => a.as_str() == b.as_str(),
(Self::Predicate(a), Self::Predicate(b)) => Arc::ptr_eq(a, b),
_ => false,
}
}
}
impl fmt::Debug for UrlMatcher {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Any => write!(f, "UrlMatcher::Any"),
Self::Glob { pattern, .. } => f.debug_struct("UrlMatcher::Glob").field("pattern", pattern).finish(),
Self::Regex(r) => f.debug_tuple("UrlMatcher::Regex").field(&r.as_str()).finish(),
Self::Predicate(_) => write!(f, "UrlMatcher::Predicate(<fn>)"),
}
}
}
#[must_use]
pub fn glob_to_regex_pattern(glob: &str) -> String {
const ESCAPED: &[char] = &['$', '^', '+', '.', '*', '(', ')', '|', '\\', '?', '{', '}', '[', ']'];
let chars: Vec<char> = glob.chars().collect();
let mut out = String::with_capacity(glob.len() * 2 + 2);
out.push('^');
let mut in_group = false;
let mut i = 0;
while i < chars.len() {
let c = chars[i];
if c == '\\' && i + 1 < chars.len() {
i += 1;
let esc = chars[i];
if ESCAPED.contains(&esc) {
out.push('\\');
}
out.push(esc);
i += 1;
continue;
}
if c == '*' {
let char_before = if i == 0 { None } else { Some(chars[i - 1]) };
let mut star_count = 1;
while i + 1 < chars.len() && chars[i + 1] == '*' {
star_count += 1;
i += 1;
}
if star_count > 1 {
let char_after = chars.get(i + 1).copied();
if char_after == Some('/') {
if char_before == Some('/') {
out.push_str("((.+/)|)");
} else {
out.push_str("(.*/)");
}
i += 1; } else {
out.push_str("(.*)");
}
} else {
out.push_str("([^/]*)");
}
i += 1;
continue;
}
match c {
'{' => {
in_group = true;
out.push('(');
},
'}' => {
in_group = false;
out.push(')');
},
',' => {
if in_group {
out.push('|');
} else {
out.push('\\');
out.push(c);
}
},
_ => {
if ESCAPED.contains(&c) {
out.push('\\');
}
out.push(c);
},
}
i += 1;
}
out.push('$');
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_glob_yields_anchor_only() {
assert_eq!(glob_to_regex_pattern(""), "^$");
}
#[test]
fn single_star_is_segment_wildcard() {
assert_eq!(glob_to_regex_pattern("*"), "^([^/]*)$");
}
#[test]
fn double_star_standalone_is_any() {
assert_eq!(glob_to_regex_pattern("**"), "^(.*)$");
}
#[test]
fn slash_double_star_slash_collapses() {
assert_eq!(glob_to_regex_pattern("/**/"), "^/((.+/)|)$");
}
#[test]
fn leading_double_star_slash_matches_everything_including_empty() {
assert_eq!(glob_to_regex_pattern("**/api"), "^(.*/)api$");
}
#[test]
fn regex_metachars_are_escaped() {
assert_eq!(glob_to_regex_pattern("a.b+c"), "^a\\.b\\+c$");
}
#[test]
fn brace_group_becomes_alternation() {
assert_eq!(glob_to_regex_pattern("*.{png,jpg}"), "^([^/]*)\\.(png|jpg)$");
}
#[test]
fn comma_outside_group_is_literal() {
assert_eq!(glob_to_regex_pattern("a,b"), "^a\\,b$");
}
#[test]
fn backslash_escape_of_metachar_produces_escaped_literal() {
assert_eq!(glob_to_regex_pattern(r"\*"), "^\\*$");
}
#[test]
fn backslash_escape_of_plain_char_produces_plain() {
assert_eq!(glob_to_regex_pattern(r"\a"), "^a$");
}
#[test]
fn playwright_canonical_api_glob_compiles_and_matches() {
let m = UrlMatcher::glob("**/api/*").expect("valid glob");
assert!(m.matches("https://example.com/api/users"));
assert!(m.matches("https://example.com/v1/api/users"));
assert!(!m.matches("https://example.com/api/users/123")); assert!(m.matches("/api/x"));
}
#[test]
fn any_matches_all() {
let m = UrlMatcher::any();
assert!(m.matches(""));
assert!(m.matches("https://example.com"));
}
#[test]
fn empty_glob_collapses_to_any() {
let m = UrlMatcher::glob("").unwrap();
assert!(matches!(m, UrlMatcher::Any));
}
#[test]
fn regex_substring_match_when_unanchored() {
let re = regex::Regex::new(r"/api/").unwrap();
let m = UrlMatcher::regex(re);
assert!(m.matches("https://example.com/api/users"));
assert!(!m.matches("https://example.com/rest/users"));
}
#[test]
fn regex_from_source_with_case_insensitive_flag() {
let m = UrlMatcher::regex_from_source(r"/API/", "i").unwrap();
assert!(m.matches("https://example.com/api/x"));
assert!(m.matches("https://example.com/API/x"));
}
#[test]
fn regex_from_source_global_flag_is_accepted_and_ignored() {
let m = UrlMatcher::regex_from_source(r"/api/", "g").unwrap();
assert!(m.matches("https://example.com/api/x"));
}
#[test]
fn regex_from_source_unknown_flag_rejected() {
let err = UrlMatcher::regex_from_source(r"/api/", "x").unwrap_err();
assert!(matches!(err, FerriError::InvalidArgument { .. }));
}
#[test]
fn predicate_matcher_invokes_closure() {
let m = UrlMatcher::predicate(|url| url.contains("/api/"));
assert!(m.matches("https://example.com/api/users"));
assert!(!m.matches("https://example.com/static/users"));
}
#[test]
fn identifier_of_each_variant() {
assert_eq!(UrlMatcher::any().identifier(), "");
assert_eq!(UrlMatcher::glob("**/api").unwrap().identifier(), "**/api");
assert_eq!(UrlMatcher::regex(regex::Regex::new("x").unwrap()).identifier(), "x");
assert_eq!(UrlMatcher::predicate(|_| true).identifier(), "<predicate>");
}
#[test]
fn equivalent_same_glob_source() {
let a = UrlMatcher::glob("**/api").unwrap();
let b = UrlMatcher::glob("**/api").unwrap();
assert!(a.equivalent(&b));
}
#[test]
fn equivalent_different_glob_source() {
let a = UrlMatcher::glob("**/api").unwrap();
let b = UrlMatcher::glob("**/v2").unwrap();
assert!(!a.equivalent(&b));
}
#[test]
fn equivalent_same_regex_source() {
let a = UrlMatcher::regex(regex::Regex::new("x").unwrap());
let b = UrlMatcher::regex(regex::Regex::new("x").unwrap());
assert!(a.equivalent(&b));
}
#[test]
fn equivalent_predicate_is_pointer_identity() {
let a = UrlMatcher::predicate(|u| u.contains("/api/"));
let b = a.clone();
assert!(a.equivalent(&b));
let c = UrlMatcher::predicate(|u| u.contains("/api/"));
assert!(!a.equivalent(&c)); }
#[test]
fn cross_variant_not_equivalent() {
let g = UrlMatcher::glob("x").unwrap();
let r = UrlMatcher::regex(regex::Regex::new("x").unwrap());
assert!(!g.equivalent(&r));
}
}