word_tally/options/
patterns.rs

1//! Regular expression pattern matching for word filtering.
2
3use core::{
4    cmp::Ordering,
5    fmt::{self, Display, Formatter},
6    hash::{Hash, Hasher},
7};
8
9use regex::RegexSet;
10use serde::{Deserialize, Serialize, de::Error};
11
12use crate::WordTallyError;
13
14/// Collection of regex pattern strings.
15pub type PatternList = Vec<String>;
16
17/// Base struct for regex pattern filtering.
18///
19/// Contains a `Vec` of raw regexp input `String`s and their compiled `RegexSet`.
20#[derive(Clone, Debug)]
21struct Patterns {
22    /// Original pattern strings.
23    input_patterns: PatternList,
24    /// Compiled regex set for matching.
25    regex_set: RegexSet,
26}
27
28impl Default for Patterns {
29    fn default() -> Self {
30        Self {
31            input_patterns: Vec::new(),
32            regex_set: RegexSet::empty(),
33        }
34    }
35}
36
37impl PartialEq for Patterns {
38    fn eq(&self, other: &Self) -> bool {
39        self.input_patterns == other.input_patterns
40    }
41}
42
43impl Eq for Patterns {}
44
45impl PartialOrd for Patterns {
46    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
47        Some(self.cmp(other))
48    }
49}
50
51impl Ord for Patterns {
52    fn cmp(&self, other: &Self) -> Ordering {
53        self.input_patterns.cmp(&other.input_patterns)
54    }
55}
56
57impl Hash for Patterns {
58    fn hash<H: Hasher>(&self, state: &mut H) {
59        self.input_patterns.hash(state);
60    }
61}
62
63impl Display for Patterns {
64    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
65        write!(f, "{}", self.input_patterns.join(","))
66    }
67}
68
69impl AsRef<[String]> for Patterns {
70    fn as_ref(&self) -> &[String] {
71        &self.input_patterns
72    }
73}
74
75impl Patterns {
76    /// Creates a pattern set and compiles the `RegexSet`.
77    fn new(input_patterns: PatternList) -> Result<Self, WordTallyError> {
78        let regex_set = RegexSet::new(&input_patterns).map_err(|e| WordTallyError::Pattern {
79            kind: "regex".to_string(),
80            message: e.to_string(),
81        })?;
82
83        Ok(Self {
84            input_patterns,
85            regex_set,
86        })
87    }
88
89    /// Creates a pattern set from a slice of strings.
90    fn from_slice(input_patterns: &[String]) -> Result<Self, WordTallyError> {
91        Self::new(input_patterns.to_vec())
92    }
93
94    /// Checks if a word matches any pattern in the `RegexSet`.
95    fn matches(&self, word: &str) -> bool {
96        self.regex_set.is_match(word)
97    }
98
99    /// Returns a slice of the original input patterns.
100    #[allow(clippy::missing_const_for_fn)]
101    // Make this const when `const_vec_string_slice` is fully stabilized.
102    // Requires stable `Vec::as_slice` in const contexts (tracked in rust-lang/rust#129041).
103    fn as_patterns(&self) -> &[String] {
104        &self.input_patterns
105    }
106}
107
108/// Regex patterns used to exclude matching words.
109///
110/// # Examples
111///
112/// ```
113/// use word_tally::ExcludeSet;
114///
115/// // Create a pattern to exclude words ending with "ly"
116/// let patterns = ExcludeSet::new(vec!["ly$".to_string()]).unwrap();
117///
118/// // Test matching
119/// assert!(patterns.matches("quickly"));
120/// assert!(!patterns.matches("quick"));
121/// ```
122#[derive(Clone, Debug, Default)]
123pub struct ExcludeSet(Patterns);
124
125impl ExcludeSet {
126    /// Creates patterns from owned pattern strings.
127    ///
128    /// # Examples
129    ///
130    /// ```
131    /// use word_tally::ExcludeSet;
132    ///
133    /// // Create pattern for excluding numeric words
134    /// let patterns = ExcludeSet::new(vec![r"^\d+$".to_string()]).unwrap();
135    /// assert_eq!(patterns.len(), 1);
136    ///
137    /// // Test empty patterns
138    /// let empty = ExcludeSet::default();
139    /// assert!(empty.is_empty());
140    /// ```
141    ///
142    /// # Errors
143    ///
144    /// Returns an error if any pattern cannot be compiled into a valid regular expression.
145    pub fn new(input_patterns: PatternList) -> Result<Self, WordTallyError> {
146        Ok(Self(Patterns::new(input_patterns)?))
147    }
148
149    /// Tests if a word matches any pattern.
150    #[must_use]
151    pub fn matches(&self, word: &str) -> bool {
152        self.0.matches(word)
153    }
154
155    /// Returns a slice of the original pattern strings.
156    #[must_use]
157    pub fn as_patterns(&self) -> &[String] {
158        self.0.as_patterns()
159    }
160
161    /// Returns the number of patterns.
162    #[must_use]
163    pub const fn len(&self) -> usize {
164        self.0.input_patterns.len()
165    }
166
167    /// Returns true if there are no patterns.
168    #[must_use]
169    pub const fn is_empty(&self) -> bool {
170        self.0.input_patterns.is_empty()
171    }
172}
173
174impl<'a> TryFrom<&'a [String]> for ExcludeSet {
175    type Error = WordTallyError;
176
177    fn try_from(input_patterns: &'a [String]) -> Result<Self, Self::Error> {
178        Ok(Self(Patterns::from_slice(input_patterns)?))
179    }
180}
181
182impl AsRef<[String]> for ExcludeSet {
183    fn as_ref(&self) -> &[String] {
184        self.0.as_ref()
185    }
186}
187
188impl Serialize for ExcludeSet {
189    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
190    where
191        S: serde::Serializer,
192    {
193        self.0.input_patterns.serialize(serializer)
194    }
195}
196
197impl<'de> Deserialize<'de> for ExcludeSet {
198    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
199    where
200        D: serde::Deserializer<'de>,
201    {
202        let input_patterns: PatternList = Vec::deserialize(deserializer)?;
203
204        Self::new(input_patterns)
205            .map_err(|e| D::Error::custom(format!("failed to compile exclude regex patterns: {e}")))
206    }
207}
208
209impl PartialEq for ExcludeSet {
210    fn eq(&self, other: &Self) -> bool {
211        self.0 == other.0
212    }
213}
214
215impl Eq for ExcludeSet {}
216
217impl PartialOrd for ExcludeSet {
218    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
219        Some(self.cmp(other))
220    }
221}
222
223impl Ord for ExcludeSet {
224    fn cmp(&self, other: &Self) -> Ordering {
225        self.0.cmp(&other.0)
226    }
227}
228
229impl Hash for ExcludeSet {
230    fn hash<H: Hasher>(&self, state: &mut H) {
231        self.0.hash(state);
232    }
233}
234
235impl Display for ExcludeSet {
236    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
237        self.0.fmt(f)
238    }
239}
240
241/// Regex patterns used to include only matching words.
242///
243/// # Examples
244///
245/// ```
246/// use word_tally::IncludeSet;
247///
248/// // Create a pattern to include only words containing vowels.
249/// let patterns = IncludeSet::new(vec![r"[aeiou]".to_string()]).unwrap();
250///
251/// // Test matching
252/// assert!(patterns.matches("test")); // Contains 'e'
253/// assert!(!patterns.matches("rhythm")); // No vowels
254/// ```
255#[derive(Clone, Debug, Default)]
256pub struct IncludeSet(Patterns);
257
258impl IncludeSet {
259    /// Creates patterns from owned pattern strings.
260    ///
261    /// # Examples
262    ///
263    /// ```
264    /// use word_tally::IncludeSet;
265    ///
266    /// // Create patterns for including words with specific prefixes.
267    /// let patterns = IncludeSet::new(vec![r"^pre".to_string(), r"^un".to_string()]).unwrap();
268    ///
269    /// assert_eq!(patterns.len(), 2);
270    /// assert!(patterns.matches("prevent"));
271    /// assert!(patterns.matches("unlike"));
272    /// assert!(!patterns.matches("likely"));
273    /// ```
274    ///
275    /// # Errors
276    ///
277    /// Returns an error if any pattern cannot be compiled into a valid regular expression.
278    pub fn new(input_patterns: PatternList) -> Result<Self, WordTallyError> {
279        Ok(Self(Patterns::new(input_patterns)?))
280    }
281
282    /// Tests if a word matches any pattern.
283    #[must_use]
284    pub fn matches(&self, word: &str) -> bool {
285        self.0.matches(word)
286    }
287
288    /// Returns a slice of the original pattern strings.
289    #[must_use]
290    pub fn as_patterns(&self) -> &[String] {
291        self.0.as_patterns()
292    }
293
294    /// Returns the number of patterns.
295    #[must_use]
296    pub const fn len(&self) -> usize {
297        self.0.input_patterns.len()
298    }
299
300    /// Returns true if there are no patterns.
301    #[must_use]
302    pub const fn is_empty(&self) -> bool {
303        self.0.input_patterns.is_empty()
304    }
305}
306
307impl<'a> TryFrom<&'a [String]> for IncludeSet {
308    type Error = WordTallyError;
309
310    fn try_from(input_patterns: &'a [String]) -> Result<Self, Self::Error> {
311        Ok(Self(Patterns::from_slice(input_patterns)?))
312    }
313}
314
315impl AsRef<[String]> for IncludeSet {
316    fn as_ref(&self) -> &[String] {
317        self.0.as_ref()
318    }
319}
320
321impl Serialize for IncludeSet {
322    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
323    where
324        S: serde::Serializer,
325    {
326        self.0.input_patterns.serialize(serializer)
327    }
328}
329
330impl<'de> Deserialize<'de> for IncludeSet {
331    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
332    where
333        D: serde::Deserializer<'de>,
334    {
335        let input_patterns: PatternList = Vec::deserialize(deserializer)?;
336
337        Self::new(input_patterns)
338            .map_err(|e| D::Error::custom(format!("failed to compile include regex patterns: {e}")))
339    }
340}
341
342impl PartialEq for IncludeSet {
343    fn eq(&self, other: &Self) -> bool {
344        self.0 == other.0
345    }
346}
347
348impl Eq for IncludeSet {}
349
350impl PartialOrd for IncludeSet {
351    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
352        Some(self.cmp(other))
353    }
354}
355
356impl Ord for IncludeSet {
357    fn cmp(&self, other: &Self) -> Ordering {
358        self.0.cmp(&other.0)
359    }
360}
361
362impl Hash for IncludeSet {
363    fn hash<H: Hasher>(&self, state: &mut H) {
364        self.0.hash(state);
365    }
366}
367
368impl Display for IncludeSet {
369    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
370        self.0.fmt(f)
371    }
372}