Skip to main content

halter_hooks/
matcher.rs

1// pattern: Functional Core
2
3//! Typed, once-compiled matcher for hook event-matcher strings.
4//!
5//! Patterns are compiled at config-load time via `CompiledMatcher::compile`.
6//! Invalid patterns fail at that boundary; the engine consumes the compiled
7//! matcher and never sees a raw string (H22/H27, defense-in-depth). Glob
8//! patterns use `globset` with subtree semantics: `*.example.com` matches
9//! `api.example.com` and `api.prod.example.com` alike (H14). The hand-rolled
10//! `wildcard_match_impl` is retired in favor of the same globset path (M30).
11
12use globset::{Glob, GlobBuilder, GlobMatcher};
13use regex::Regex;
14use thiserror::Error;
15
16/// A matcher pattern compiled once at config-parse time.
17#[derive(Debug, Clone)]
18pub enum CompiledMatcher {
19    /// Case-insensitive literal match. No wildcards, no regex metacharacters.
20    Exact(String),
21    /// `globset`-backed glob match. `*.example.com` matches subtree.
22    Glob(GlobMatcher),
23    /// Regular-expression match.
24    Regex(Regex),
25}
26
27#[derive(Debug, Error)]
28pub enum MatcherCompileError {
29    #[error("invalid regex pattern '{pattern}': {source}")]
30    Regex {
31        pattern: String,
32        #[source]
33        source: regex::Error,
34    },
35    #[error("invalid glob pattern '{pattern}': {source}")]
36    Glob {
37        pattern: String,
38        #[source]
39        source: globset::Error,
40    },
41}
42
43impl CompiledMatcher {
44    /// Compile an event-matcher pattern. Patterns containing regex
45    /// metacharacters are treated as regex; patterns containing only glob
46    /// wildcards (`*`, `?`) are compiled as globs; everything else is a
47    /// case-insensitive literal.
48    pub fn compile(pattern: &str) -> Result<Self, MatcherCompileError> {
49        let trimmed = pattern.trim();
50        if trimmed.is_empty() {
51            // Empty matcher compiles to an "Exact empty" marker. Callers
52            // generally filter empty matchers before reaching here, but we
53            // accept it gracefully.
54            return Ok(Self::Exact(String::new()));
55        }
56        if looks_like_regex(trimmed) {
57            let regex = Regex::new(trimmed).map_err(|source| MatcherCompileError::Regex {
58                pattern: trimmed.to_owned(),
59                source,
60            })?;
61            return Ok(Self::Regex(regex));
62        }
63        if looks_like_glob(trimmed) {
64            return Self::compile_glob(trimmed);
65        }
66        Ok(Self::Exact(trimmed.to_owned()))
67    }
68
69    /// Force-compile a pattern as a regex, regardless of shape. Used when
70    /// the caller wants regex semantics explicitly (event matcher strings in
71    /// `hooks.json`).
72    pub fn compile_regex(pattern: &str) -> Result<Self, MatcherCompileError> {
73        let trimmed = pattern.trim();
74        let regex = Regex::new(trimmed).map_err(|source| MatcherCompileError::Regex {
75            pattern: trimmed.to_owned(),
76            source,
77        })?;
78        Ok(Self::Regex(regex))
79    }
80
81    /// Force-compile a pattern as a glob. `*.example.com` becomes a subtree
82    /// match. The bare `*` is retained as a universal match.
83    pub fn compile_glob(pattern: &str) -> Result<Self, MatcherCompileError> {
84        let trimmed = pattern.trim();
85        if trimmed == "*" {
86            // Any input matches. Express as a glob that matches any sequence,
87            // including the empty string.
88            let glob = Glob::new("*").map_err(|source| MatcherCompileError::Glob {
89                pattern: trimmed.to_owned(),
90                source,
91            })?;
92            return Ok(Self::Glob(glob.compile_matcher()));
93        }
94        let rewritten = rewrite_glob_for_subtree(trimmed);
95        let glob = GlobBuilder::new(&rewritten)
96            .case_insensitive(true)
97            .literal_separator(false)
98            .build()
99            .map_err(|source| MatcherCompileError::Glob {
100                pattern: trimmed.to_owned(),
101                source,
102            })?;
103        Ok(Self::Glob(glob.compile_matcher()))
104    }
105
106    pub fn is_match(&self, input: &str) -> bool {
107        match self {
108            Self::Exact(literal) => literal.eq_ignore_ascii_case(input),
109            Self::Glob(matcher) => matcher.is_match(input),
110            Self::Regex(regex) => regex.is_match(input),
111        }
112    }
113}
114
115/// `*.example.com` should be a subtree match (matching `a.b.example.com` too).
116/// The ASCII shell-glob-style `*` in globset matches a single path segment by
117/// default; rewrite a leading `*.` to `**.` to cross the `.` separator.
118fn rewrite_glob_for_subtree(pattern: &str) -> String {
119    if let Some(rest) = pattern.strip_prefix("*.") {
120        format!("**.{rest}")
121    } else {
122        pattern.to_owned()
123    }
124}
125
126fn looks_like_regex(pattern: &str) -> bool {
127    pattern.chars().any(|ch| {
128        matches!(
129            ch,
130            '[' | ']' | '(' | ')' | '{' | '}' | '+' | '^' | '$' | '\\' | '|'
131        )
132    })
133}
134
135fn looks_like_glob(pattern: &str) -> bool {
136    pattern.chars().any(|ch| matches!(ch, '*' | '?'))
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    /// AC3.1: `*.example.com` matches `api.example.com`,
144    /// `api.prod.example.com`, and deeply nested subdomains.
145    #[test]
146    fn review_hook_runtime_ac3_1_glob_matches_subtree() {
147        let matcher = CompiledMatcher::compile_glob("*.example.com").expect("compile");
148        assert!(matcher.is_match("api.example.com"));
149        assert!(matcher.is_match("api.prod.example.com"));
150        assert!(matcher.is_match("deeply.nested.example.com"));
151    }
152
153    /// AC3.2: `*` matches any host/event string.
154    #[test]
155    fn review_hook_runtime_ac3_2_bare_star_matches_anything() {
156        let matcher = CompiledMatcher::compile_glob("*").expect("compile");
157        assert!(matcher.is_match(""));
158        assert!(matcher.is_match("literally anything"));
159        assert!(matcher.is_match("api.example.com"));
160    }
161
162    /// AC3.3: literal patterns match only themselves.
163    #[test]
164    fn review_hook_runtime_ac3_3_literal_matches_only_exact() {
165        let matcher = CompiledMatcher::compile("api.example.com").expect("compile");
166        assert!(matcher.is_match("api.example.com"));
167        assert!(!matcher.is_match("api2.example.com"));
168        assert!(!matcher.is_match("api.example.com.evil.com"));
169        // Case-insensitive by design (HTTP hosts and hook event identifiers
170        // are both case-insensitive).
171        assert!(matcher.is_match("API.Example.Com"));
172    }
173
174    /// AC3.4: invalid regex fails at compile time with a parse error.
175    #[test]
176    fn review_hook_runtime_ac3_4_invalid_regex_rejected_at_compile() {
177        let error = CompiledMatcher::compile_regex("(").expect_err("invalid regex must reject");
178        assert!(matches!(error, MatcherCompileError::Regex { .. }));
179    }
180
181    /// AC3.6: glob parity with the retired `wildcard_match_impl`.
182    #[test]
183    fn review_hook_runtime_ac3_6_glob_parity_with_legacy_matcher() {
184        // Legacy wildcard semantics: case-insensitive, `*` matches any run of
185        // characters, literal tokens match exactly.
186        let pairs: &[(&str, &str, bool)] = &[
187            ("git *", "git status", true),
188            ("git *", "cargo test", false),
189            ("shell", "Shell", true),
190            ("*", "anything", true),
191            ("read*", "readtokens", true),
192            ("read*", "write", false),
193        ];
194        for (pattern, candidate, expected) in pairs {
195            let matcher = CompiledMatcher::compile(pattern).expect("compile");
196            assert_eq!(
197                matcher.is_match(candidate),
198                *expected,
199                "pattern {pattern} vs {candidate}",
200            );
201        }
202    }
203}