Skip to main content

gatekpr_patterns/
registry.rs

1//! Pattern registry for managing compiled regex patterns
2//!
3//! Provides a centralized way to manage and match patterns across the codebase.
4
5use regex::Regex;
6use std::collections::HashMap;
7use thiserror::Error;
8
9/// Errors from pattern operations
10#[derive(Error, Debug)]
11pub enum PatternError {
12    /// Invalid regex pattern
13    #[error("Invalid regex pattern '{pattern}': {message}")]
14    InvalidPattern { pattern: String, message: String },
15
16    /// Pattern not found in registry
17    #[error("Pattern not found: {0}")]
18    NotFound(String),
19}
20
21/// Result type for pattern operations
22pub type Result<T> = std::result::Result<T, PatternError>;
23
24/// A match result from pattern matching
25#[derive(Debug, Clone)]
26pub struct PatternMatch {
27    /// The pattern key that matched
28    pub pattern_key: String,
29    /// Start position of the match
30    pub start: usize,
31    /// End position of the match
32    pub end: usize,
33    /// The matched text
34    pub text: String,
35}
36
37/// Registry for managing compiled regex patterns
38///
39/// Patterns are compiled once and stored for efficient repeated matching.
40///
41/// # Example
42///
43/// ```
44/// use gatekpr_patterns::PatternRegistry;
45///
46/// let mut registry = PatternRegistry::new();
47/// registry.register("email", r"[\w\.-]+@[\w\.-]+\.\w+").unwrap();
48///
49/// assert!(registry.is_match("email", "test@example.com"));
50/// ```
51#[derive(Default)]
52pub struct PatternRegistry {
53    patterns: HashMap<String, Regex>,
54}
55
56impl PatternRegistry {
57    /// Create a new empty pattern registry
58    pub fn new() -> Self {
59        Self::default()
60    }
61
62    /// Register a new pattern
63    ///
64    /// Returns an error if the pattern is invalid regex.
65    pub fn register(&mut self, key: &str, pattern: &str) -> Result<()> {
66        let regex = Regex::new(pattern).map_err(|e| PatternError::InvalidPattern {
67            pattern: pattern.to_string(),
68            message: e.to_string(),
69        })?;
70        self.patterns.insert(key.to_string(), regex);
71        Ok(())
72    }
73
74    /// Register multiple patterns at once
75    pub fn register_many(&mut self, patterns: &[(&str, &str)]) -> Result<()> {
76        for (key, pattern) in patterns {
77            self.register(key, pattern)?;
78        }
79        Ok(())
80    }
81
82    /// Check if a pattern matches the given text
83    pub fn is_match(&self, key: &str, text: &str) -> bool {
84        self.patterns
85            .get(key)
86            .map(|regex| regex.is_match(text))
87            .unwrap_or(false)
88    }
89
90    /// Check if any of the given patterns match
91    pub fn any_match(&self, keys: &[&str], text: &str) -> bool {
92        keys.iter().any(|key| self.is_match(key, text))
93    }
94
95    /// Check if all of the given patterns match
96    pub fn all_match(&self, keys: &[&str], text: &str) -> bool {
97        keys.iter().all(|key| self.is_match(key, text))
98    }
99
100    /// Find all matches for a pattern in text
101    pub fn find_all(&self, key: &str, text: &str) -> Vec<PatternMatch> {
102        self.patterns
103            .get(key)
104            .map(|regex| {
105                regex
106                    .find_iter(text)
107                    .map(|m| PatternMatch {
108                        pattern_key: key.to_string(),
109                        start: m.start(),
110                        end: m.end(),
111                        text: m.as_str().to_string(),
112                    })
113                    .collect()
114            })
115            .unwrap_or_default()
116    }
117
118    /// Find matches from any of the given patterns
119    pub fn find_any(&self, keys: &[&str], text: &str) -> Vec<PatternMatch> {
120        keys.iter()
121            .flat_map(|key| self.find_all(key, text))
122            .collect()
123    }
124
125    /// Get the first match for a pattern
126    pub fn find_first(&self, key: &str, text: &str) -> Option<PatternMatch> {
127        self.patterns.get(key).and_then(|regex| {
128            regex.find(text).map(|m| PatternMatch {
129                pattern_key: key.to_string(),
130                start: m.start(),
131                end: m.end(),
132                text: m.as_str().to_string(),
133            })
134        })
135    }
136
137    /// Get pattern by key
138    pub fn get(&self, key: &str) -> Option<&Regex> {
139        self.patterns.get(key)
140    }
141
142    /// Check if a pattern key exists
143    pub fn contains(&self, key: &str) -> bool {
144        self.patterns.contains_key(key)
145    }
146
147    /// Get the number of registered patterns
148    pub fn len(&self) -> usize {
149        self.patterns.len()
150    }
151
152    /// Check if the registry is empty
153    pub fn is_empty(&self) -> bool {
154        self.patterns.is_empty()
155    }
156
157    /// Get all pattern keys
158    pub fn keys(&self) -> impl Iterator<Item = &str> {
159        self.patterns.keys().map(|s| s.as_str())
160    }
161
162    /// Merge another registry into this one
163    pub fn merge(&mut self, other: PatternRegistry) {
164        self.patterns.extend(other.patterns);
165    }
166
167    /// Create a new registry by merging multiple registries
168    pub fn merged(registries: Vec<PatternRegistry>) -> Self {
169        let mut result = Self::new();
170        for registry in registries {
171            result.merge(registry);
172        }
173        result
174    }
175}
176
177/// A pattern set groups related patterns for a specific domain
178///
179/// # Example
180///
181/// ```
182/// use gatekpr_patterns::PatternSet;
183///
184/// let set = PatternSet::new("webhooks")
185///     .with_pattern("gdpr_data_request", r"customers[/_]data[/_]request")
186///     .with_pattern("gdpr_redact", r"customers[/_]redact")
187///     .build()
188///     .unwrap();
189///
190/// assert!(set.is_match("gdpr_data_request", "customers/data_request"));
191/// ```
192pub struct PatternSet {
193    name: String,
194    patterns: Vec<(String, String)>,
195}
196
197impl PatternSet {
198    /// Create a new pattern set builder
199    pub fn new(name: &str) -> Self {
200        Self {
201            name: name.to_string(),
202            patterns: Vec::new(),
203        }
204    }
205
206    /// Add a pattern to the set
207    pub fn with_pattern(mut self, key: &str, pattern: &str) -> Self {
208        self.patterns.push((key.to_string(), pattern.to_string()));
209        self
210    }
211
212    /// Add multiple patterns at once
213    pub fn with_patterns(mut self, patterns: &[(&str, &str)]) -> Self {
214        for (key, pattern) in patterns {
215            self.patterns.push((key.to_string(), pattern.to_string()));
216        }
217        self
218    }
219
220    /// Build the pattern set into a registry
221    pub fn build(self) -> Result<PatternRegistry> {
222        let mut registry = PatternRegistry::new();
223        for (key, pattern) in self.patterns {
224            registry.register(&key, &pattern)?;
225        }
226        Ok(registry)
227    }
228
229    /// Get the name of this pattern set
230    pub fn name(&self) -> &str {
231        &self.name
232    }
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238
239    #[test]
240    fn test_register_and_match() {
241        let mut registry = PatternRegistry::new();
242        registry.register("test", r"\d+").unwrap();
243
244        assert!(registry.is_match("test", "abc123"));
245        assert!(!registry.is_match("test", "abc"));
246        assert!(!registry.is_match("nonexistent", "123"));
247    }
248
249    #[test]
250    fn test_find_all() {
251        let mut registry = PatternRegistry::new();
252        registry.register("digits", r"\d+").unwrap();
253
254        let matches = registry.find_all("digits", "a1b22c333");
255        assert_eq!(matches.len(), 3);
256        assert_eq!(matches[0].text, "1");
257        assert_eq!(matches[1].text, "22");
258        assert_eq!(matches[2].text, "333");
259    }
260
261    #[test]
262    fn test_any_match() {
263        let mut registry = PatternRegistry::new();
264        registry.register("a", r"aaa").unwrap();
265        registry.register("b", r"bbb").unwrap();
266        registry.register("c", r"ccc").unwrap();
267
268        assert!(registry.any_match(&["a", "b"], "xxxaaayyy"));
269        assert!(!registry.any_match(&["a", "b"], "xxxcccyyy"));
270    }
271
272    #[test]
273    fn test_pattern_set() {
274        let registry = PatternSet::new("test")
275            .with_pattern("email", r"[\w\.-]+@[\w\.-]+")
276            .with_pattern("phone", r"\d{3}-\d{4}")
277            .build()
278            .unwrap();
279
280        assert!(registry.is_match("email", "test@example.com"));
281        assert!(registry.is_match("phone", "123-4567"));
282    }
283
284    #[test]
285    fn test_invalid_pattern() {
286        let mut registry = PatternRegistry::new();
287        let result = registry.register("invalid", r"[invalid");
288        assert!(result.is_err());
289    }
290}