zendriver_interception/url_pattern.rs
1//! CDP-style URL pattern matching.
2//!
3//! Compiles a CDP wildcard pattern (`*` matches any sequence, `?` matches a
4//! single character) into a [`regex::Regex`]. All other regex metacharacters
5//! in the input are escaped so the user-facing surface stays the simple CDP
6//! syntax used by `Fetch.RequestPattern.urlPattern`.
7
8use regex::Regex;
9
10use crate::error::InterceptionError;
11
12/// A compiled CDP URL pattern.
13///
14/// Constructed via [`UrlPattern::new`] from a string using CDP wildcard
15/// syntax. Use [`UrlPattern::matches`] to test URLs against it, and
16/// [`UrlPattern::pattern_str`] to recover the original pattern (e.g. when
17/// forwarding to `Fetch.enable { patterns }`).
18#[derive(Debug, Clone)]
19pub struct UrlPattern {
20 pattern: String,
21 regex: Regex,
22}
23
24impl UrlPattern {
25 /// Compile a CDP-style URL pattern.
26 ///
27 /// `*` matches any sequence of characters (including empty), `?` matches
28 /// exactly one character, and every other regex metacharacter is escaped
29 /// so the user sees the simple wildcard surface.
30 ///
31 /// Returns [`InterceptionError::InvalidPattern`] if the resulting regex
32 /// fails to compile.
33 #[allow(clippy::result_large_err)] // InterceptionError shape fixed by error.rs; boxing is a cross-cutting decision tracked separately.
34 pub fn new(pattern: impl Into<String>) -> Result<Self, InterceptionError> {
35 let pattern = pattern.into();
36 let regex_src = compile_to_regex(&pattern);
37 let regex = Regex::new(®ex_src)
38 .map_err(|e| InterceptionError::InvalidPattern(format!("{pattern}: {e}")))?;
39 Ok(Self { pattern, regex })
40 }
41
42 /// Test whether `url` matches this pattern.
43 pub fn matches(&self, url: &str) -> bool {
44 self.regex.is_match(url)
45 }
46
47 /// The original pattern string passed to [`UrlPattern::new`].
48 ///
49 /// Useful when forwarding the pattern to CDP via
50 /// `Fetch.enable { patterns: [{ urlPattern: ... }] }`.
51 pub fn pattern_str(&self) -> &str {
52 &self.pattern
53 }
54}
55
56/// Convert a CDP wildcard pattern into an anchored regex source.
57///
58/// `*` → `.*`, `?` → `.`, all other regex metacharacters are escaped via
59/// [`regex::escape`]. The result is anchored at both ends so `matches`
60/// behaves as a full-URL match.
61fn compile_to_regex(pattern: &str) -> String {
62 let mut out = String::with_capacity(pattern.len() + 4);
63 out.push('^');
64 for ch in pattern.chars() {
65 match ch {
66 '*' => out.push_str(".*"),
67 '?' => out.push('.'),
68 other => out.push_str(®ex::escape(&other.to_string())),
69 }
70 }
71 out.push('$');
72 out
73}
74
75#[cfg(test)]
76#[allow(clippy::panic, clippy::unwrap_used)]
77mod tests {
78 use super::*;
79
80 #[test]
81 fn wildcard_star_matches_all() {
82 let p = UrlPattern::new("*").unwrap();
83 assert!(p.matches("https://example.com/"));
84 assert!(p.matches("http://foo.bar/baz?qux=1"));
85 assert!(p.matches(""));
86 assert_eq!(p.pattern_str(), "*");
87 }
88
89 #[test]
90 fn subdomain_wildcard_matches() {
91 let p = UrlPattern::new("*.example.com/*").unwrap();
92 assert!(p.matches("https://api.example.com/foo"));
93 assert!(p.matches("ws://cdn.example.com/socket"));
94 assert!(!p.matches("https://example.org/foo"));
95 }
96
97 #[test]
98 fn invalid_pattern_errors() {
99 // Our wildcard expansion escapes every regex metachar in the input
100 // so syntax-level failures (`[`, `\`, etc.) cannot happen via user
101 // input. The deterministic failure path is `regex`'s default
102 // 10 MiB compiled-size limit: enough `*` wildcards expand into
103 // enough `.*` repetition to trip it.
104 let huge = "*".repeat(50_000);
105 let err = UrlPattern::new(huge).expect_err("expected size-limit failure");
106 match err {
107 InterceptionError::InvalidPattern(msg) => {
108 assert!(
109 msg.contains("exceeds size limit") || msg.contains("size"),
110 "expected size-limit message, got: {msg}",
111 );
112 }
113 other => panic!("unexpected error variant: {other:?}"),
114 }
115 }
116}