scrolling_window_pattern_matcher/
lib.rs

1//! ScrollingWindowPatternMatcher
2//!
3//! A flexible, ergonomic pattern matcher for slices, arrays, and windows, supporting wildcards, custom logic, and builder patterns.
4//!
5//! # Features
6//! - Wildcard matching (`PatternElem::Any`)
7//! - Flexible matcher signatures: pass window and patterns as Vec, slice, or array, and patterns as owned or referenced
8//! - Ergonomic builder patterns
9//! - Custom matcher logic
10//! - Flexible callback and overlap configuration
11
12use std::{borrow::Borrow, fmt};
13
14/// Pattern element: matches a value, a predicate, or any value (wildcard)
15///
16/// - `Value(T)`: Matches a specific value.
17/// - `Matcher(Box<dyn Fn(&T) -> bool>)`: Matches using a custom predicate function.
18/// - `Any`: Matches any value (wildcard).
19///
20/// Used to build flexible patterns for matching windows of data.
21pub enum PatternElem<T> {
22    /// Matches a specific value, with optional repeat and capture name.
23    ///
24    /// - `min_repeat`, `max_repeat`: Minimum and maximum number of times this element must repeat consecutively.
25    /// - `capture_name`: If set, matched values are stored under this name in the output.
26    Value {
27        value: T,
28        min_repeat: Option<usize>,
29        max_repeat: Option<usize>,
30        capture_name: Option<String>,
31    },
32    /// Matches using a custom predicate, with optional repeat and capture name
33    Matcher {
34        matcher: Box<dyn Fn(&T) -> bool + 'static>,
35        min_repeat: Option<usize>,
36        max_repeat: Option<usize>,
37        capture_name: Option<String>,
38    },
39    /// Matches any value (wildcard), with optional repeat and capture name
40    Any {
41        min_repeat: Option<usize>,
42        max_repeat: Option<usize>,
43        capture_name: Option<String>,
44    },
45}
46
47impl<T: fmt::Debug> fmt::Debug for PatternElem<T> {
48    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49        match self {
50            PatternElem::Value { value, min_repeat, max_repeat, capture_name } => {
51                write!(f, "Value({:?}, min_repeat={:?}, max_repeat={:?}, capture_name={:?})", value, min_repeat, max_repeat, capture_name)
52            }
53            PatternElem::Matcher { min_repeat, max_repeat, capture_name, .. } => {
54                write!(f, "Matcher(.., min_repeat={:?}, max_repeat={:?}, capture_name={:?})", min_repeat, max_repeat, capture_name)
55            }
56            PatternElem::Any { min_repeat, max_repeat, capture_name } => {
57                write!(f, "Any(min_repeat={:?}, max_repeat={:?}, capture_name={:?})", min_repeat, max_repeat, capture_name)
58            }
59        }
60    }
61}
62
63/// Manual implementation of Clone for PatternElem.
64///
65/// - `Value(v)`: Clones the value.
66/// - `Any`: Returns Any.
67/// - `Matcher`: Panics (cannot clone closures).
68impl<T: Clone> Clone for PatternElem<T> {
69    fn clone(&self) -> Self {
70        match self {
71            PatternElem::Value { value, min_repeat, max_repeat, capture_name } => PatternElem::Value {
72                value: value.clone(),
73                min_repeat: *min_repeat,
74                max_repeat: *max_repeat,
75                capture_name: capture_name.clone(),
76            },
77            PatternElem::Any { min_repeat, max_repeat, capture_name } => PatternElem::Any {
78                min_repeat: *min_repeat,
79                max_repeat: *max_repeat,
80                capture_name: capture_name.clone(),
81            },
82            PatternElem::Matcher { .. } => panic!("Cannot clone PatternElem::Matcher"),
83        }
84    }
85}
86
87/// Pattern: a sequence of pattern elements, with optional callback and overlap/deduplication settings
88///
89/// - `pattern`: Sequence of pattern elements.
90/// - `callback`: Optional callback invoked on match.
91/// - `overlap`: If false, prevents overlapping matches for this pattern.
92/// - `deduplication`: If true, prevents duplicate matches for this pattern.
93pub struct Pattern<T> {
94    pub pattern: Vec<PatternElem<T>>,
95    pub callback: Option<SliceCallback<T>>,
96    pub overlap: bool,
97    pub deduplication: bool,
98    pub name: Option<String>,
99}
100
101// Allow Pattern<T> to be used as AsRef<Pattern<T>>
102impl<T> AsRef<Pattern<T>> for Pattern<T> {
103    fn as_ref(&self) -> &Pattern<T> {
104        self
105    }
106}
107
108impl<T: Clone> Clone for Pattern<T> {
109    fn clone(&self) -> Self {
110        Pattern {
111            pattern: self.pattern.clone(),
112            callback: None, // Cannot clone callback
113            overlap: self.overlap,
114            deduplication: self.deduplication,
115            name: self.name.clone(),
116        }
117    }
118}
119
120impl<T> Pattern<T> {
121    /// Create a new pattern from a sequence of pattern elements
122    pub fn new(pattern: Vec<PatternElem<T>>) -> Self {
123        Self {
124            pattern,
125            callback: None,
126            overlap: true,
127            deduplication: false,
128            name: None,
129        }
130    }
131    /// Set the name for this pattern (used in named output)
132    pub fn with_name(mut self, name: impl Into<String>) -> Self {
133        self.name = Some(name.into());
134        self
135    }
136    /// Set a callback to be invoked on match
137    pub fn with_callback(mut self, cb: SliceCallback<T>) -> Self {
138        self.callback = Some(cb);
139        self
140    }
141    /// Set overlap behavior
142    pub fn overlap(mut self, allow: bool) -> Self {
143        self.overlap = allow;
144        self
145    }
146    /// Set deduplication behavior
147    pub fn deduplication(mut self, enable: bool) -> Self {
148        self.deduplication = enable;
149        self
150    }
151}
152
153/// Type alias for a callback on a slice
154///
155/// The callback receives a slice of matched window elements.
156pub type SliceCallback<T> = Box<dyn Fn(&[T]) + 'static>;
157
158/// Builder for Pattern<T>
159///
160/// Use this builder to construct complex patterns with custom callbacks, overlap, and deduplication settings.
161pub struct PatternBuilderErased {
162    overlap: bool,
163    deduplication: bool,
164    name: Option<String>,
165}
166
167impl PatternBuilderErased {
168    /// Creates a new builder with default settings (overlap: true, deduplication: false, no name).
169    pub fn new() -> Self {
170        Self {
171            overlap: true,
172            deduplication: false,
173            name: None,
174        }
175    }
176
177    /// Starts a new pattern with a value element. Consumes the erased builder and returns a typed builder.
178    /// Usage: PatternBuilderErased::new().value_elem(42)
179    pub fn value_elem<T>(self, value: T) -> PatternBuilder<T> {
180        PatternBuilder {
181            pattern: vec![PatternElem::Value {
182                value,
183                min_repeat: None,
184                max_repeat: None,
185                capture_name: None,
186            }],
187            callback: None,
188            overlap: self.overlap,
189            deduplication: self.deduplication,
190            name: self.name,
191        }
192    }
193
194    /// Starts a new pattern with a matcher element (custom predicate). Consumes the erased builder and returns a typed builder.
195    /// Usage: PatternBuilderErased::new().matcher_elem(|x| *x > 0)
196    pub fn matcher_elem<T, F>(self, matcher: F) -> PatternBuilder<T>
197    where
198        F: Fn(&T) -> bool + 'static,
199    {
200        PatternBuilder {
201            pattern: vec![PatternElem::Matcher {
202                matcher: Box::new(matcher),
203                min_repeat: None,
204                max_repeat: None,
205                capture_name: None,
206            }],
207            callback: None,
208            overlap: self.overlap,
209            deduplication: self.deduplication,
210            name: self.name,
211        }
212    }
213
214    /// Starts a new pattern with an 'any' (wildcard) element. Consumes the erased builder and returns a typed builder.
215    /// Usage: PatternBuilderErased::new().any_elem()
216    pub fn any_elem<T>(self) -> PatternBuilder<T> {
217        PatternBuilder {
218            pattern: vec![PatternElem::Any {
219                min_repeat: None,
220                max_repeat: None,
221                capture_name: None,
222            }],
223            callback: None,
224            overlap: self.overlap,
225            deduplication: self.deduplication,
226            name: self.name,
227        }
228    }
229
230    /// Sets the name for the pattern (used for output keys). Can be chained before adding an element.
231    /// Usage: PatternBuilderErased::new().name("foo").value_elem(1)
232    pub fn name(mut self, name: impl Into<String>) -> Self {
233        self.name = Some(name.into());
234        self
235    }
236
237    /// Sets whether overlapping matches are allowed for this pattern. Can be chained before adding an element.
238    /// Usage: PatternBuilderErased::new().overlap(false).value_elem(1)
239    pub fn overlap(mut self, allow: bool) -> Self {
240        self.overlap = allow;
241        self
242    }
243
244    /// Sets whether deduplication is enabled for this pattern. Can be chained before adding an element.
245    /// Usage: PatternBuilderErased::new().deduplication(true).value_elem(1)
246    pub fn deduplication(mut self, enable: bool) -> Self {
247        self.deduplication = enable;
248        self
249    }
250}
251
252/// Ergonomic builder for Pattern<T>
253///
254/// All fields and methods are public for chaining and ergonomic usage.
255pub struct PatternBuilder<T> {
256    pub pattern: Vec<PatternElem<T>>,
257    pub callback: Option<SliceCallback<T>>,
258    pub overlap: bool,
259    pub deduplication: bool,
260    pub name: Option<String>,
261}
262
263impl<T> PatternBuilder<T> {
264    /// Set the minimum repeat count for the last pattern element
265    pub fn min_repeat(mut self, min: usize) -> Self {
266        if let Some(last) = self.pattern.last_mut() {
267            match last {
268                PatternElem::Value { min_repeat, .. } => *min_repeat = Some(min),
269                PatternElem::Matcher { min_repeat, .. } => *min_repeat = Some(min),
270                PatternElem::Any { min_repeat, .. } => *min_repeat = Some(min),
271            }
272        }
273        self
274    }
275    /// Set the maximum repeat count for the last pattern element
276    pub fn max_repeat(mut self, max: usize) -> Self {
277        if let Some(last) = self.pattern.last_mut() {
278            match last {
279                PatternElem::Value { max_repeat, .. } => *max_repeat = Some(max),
280                PatternElem::Matcher { max_repeat, .. } => *max_repeat = Some(max),
281                PatternElem::Any { max_repeat, .. } => *max_repeat = Some(max),
282            }
283        }
284        self
285    }
286    /// Set the capture name for the last pattern element
287    pub fn capture_name(mut self, name: impl Into<String>) -> Self {
288        if let Some(last) = self.pattern.last_mut() {
289            match last {
290                PatternElem::Value { capture_name, .. } => *capture_name = Some(name.into()),
291                PatternElem::Matcher { capture_name, .. } => *capture_name = Some(name.into()),
292                PatternElem::Any { capture_name, .. } => *capture_name = Some(name.into()),
293            }
294        }
295        self
296    }
297    /// Set a callback for the pattern
298    pub fn callback(mut self, cb: SliceCallback<T>) -> Self {
299        self.callback = Some(cb);
300        self
301    }
302    pub fn value_elem(mut self, value: T) -> Self {
303        self.pattern.push(PatternElem::Value {
304            value,
305            min_repeat: None,
306            max_repeat: None,
307            capture_name: None,
308        });
309        self
310    }
311    pub fn matcher_elem<F>(mut self, matcher: F) -> Self
312    where
313        F: Fn(&T) -> bool + 'static,
314    {
315        self.pattern.push(PatternElem::Matcher {
316            matcher: Box::new(matcher),
317            min_repeat: None,
318            max_repeat: None,
319            capture_name: None,
320        });
321        self
322    }
323    pub fn any_elem(mut self) -> Self {
324        self.pattern.push(PatternElem::Any {
325            min_repeat: None,
326            max_repeat: None,
327            capture_name: None,
328        });
329        self
330    }
331    pub fn build(self) -> Pattern<T> {
332        Pattern {
333            pattern: self.pattern,
334            callback: self.callback,
335            overlap: self.overlap,
336            deduplication: self.deduplication,
337            name: self.name,
338        }
339    }
340    pub fn name(mut self, name: impl Into<String>) -> Self {
341        self.name = Some(name.into());
342        self
343    }
344    pub fn overlap(mut self, allow: bool) -> Self {
345        self.overlap = allow;
346        self
347    }
348    pub fn deduplication(mut self, enable: bool) -> Self {
349        self.deduplication = enable;
350        self
351    }
352}
353
354/// The main matcher struct
355///
356/// Use this struct to perform pattern matching on windows of data. Supports flexible and ergonomic APIs.
357#[derive(Debug, Clone)]
358pub struct ScrollingWindowPatternMatcherRef {
359    pub window_len: usize,
360}
361
362impl ScrollingWindowPatternMatcherRef {
363    /// Find matches and return named captures in a HashMap output format.
364    ///
365    /// Output: `HashMap<pattern_name, Vec<HashMap<capture_name, Vec<T>>>>`
366    ///
367    /// Each pattern's name is used as the key, and each match produces a HashMap of capture names to matched values.
368    ///
369    /// Output: HashMap<pattern_name, Vec<HashMap<capture_name, Vec<T>>>>
370    pub fn find_matches<T>(&self, window: &[T], patterns: &[Pattern<T>]) -> std::collections::HashMap<String, Vec<std::collections::HashMap<String, Vec<T>>>>
371    where
372        T: PartialEq + Clone + std::fmt::Debug,
373    {
374        use std::collections::HashMap;
375        let mut results: HashMap<String, Vec<HashMap<String, Vec<T>>>> = HashMap::new();
376        for (p_idx, pat) in patterns.iter().enumerate() {
377            let pat_name = pat.name.clone().unwrap_or_else(|| format!("pattern_{}", p_idx));
378            let pat_len = pat.pattern.len();
379            if pat_len == 0 || window.is_empty() {
380                continue;
381            }
382            let mut w_idx = 0;
383            while w_idx < window.len() {
384                let mut win_pos = w_idx;
385                let mut captures: HashMap<String, Vec<T>> = HashMap::new();
386                let mut matched = true;
387                let mut match_indices = Vec::new();
388                for elem in pat.pattern.iter() {
389                    // Handle repeats
390                    let min_repeat = match elem {
391                        PatternElem::Value { min_repeat, .. } => min_repeat,
392                        PatternElem::Matcher { min_repeat, .. } => min_repeat,
393                        PatternElem::Any { min_repeat, .. } => min_repeat,
394                    };
395                    let max_repeat = match elem {
396                        PatternElem::Value { max_repeat, .. } => max_repeat,
397                        PatternElem::Matcher { max_repeat, .. } => max_repeat,
398                        PatternElem::Any { max_repeat, .. } => max_repeat,
399                    };
400                    let repeat_min = min_repeat.unwrap_or(1);
401                    let repeat_max = max_repeat.unwrap_or(1);
402                    let mut repeat_count = 0;
403                    let mut repeat_indices = Vec::new();
404                    while repeat_count < repeat_max && win_pos < window.len() {
405                        let elem_match = match elem {
406                            PatternElem::Value { value, .. } => &window[win_pos] == value,
407                            PatternElem::Matcher { matcher, .. } => matcher(&window[win_pos]),
408                            PatternElem::Any { .. } => true,
409                        };
410                        if elem_match {
411                            repeat_indices.push(win_pos);
412                            repeat_count += 1;
413                            win_pos += 1;
414                        } else {
415                            break;
416                        }
417                    }
418                    if repeat_count < repeat_min {
419                        matched = false;
420                        break;
421                    }
422                    match_indices.extend(repeat_indices.iter().copied());
423                    // Handle capture
424                    let capture_name = match elem {
425                        PatternElem::Value { capture_name, .. } => capture_name,
426                        PatternElem::Matcher { capture_name, .. } => capture_name,
427                        PatternElem::Any { capture_name, .. } => capture_name,
428                    };
429                    if let Some(name) = capture_name {
430                        let captured: Vec<T> = repeat_indices.iter().map(|&i| window[i].clone()).collect();
431                        captures.insert(name.clone(), captured);
432                    }
433                }
434                if matched && !match_indices.is_empty() {
435                    // Call the callback if present
436                    if let Some(cb) = &pat.callback {
437                        // Pass the matched slice to the callback
438                        let matched_slice: Vec<T> = match_indices.iter().map(|&i| window[i].clone()).collect();
439                        cb(&matched_slice);
440                    }
441                    results.entry(pat_name.clone()).or_default().push(captures);
442                }
443                w_idx += 1;
444            }
445        }
446        results
447    }
448    /// Create a new matcher for a window of given length
449    pub fn new(window_len: usize) -> Self {
450        Self { window_len }
451    }
452
453    /// Flexible find_matches: accepts Vec, slice, or array for window and patterns
454    /// Flexible pattern matching for windows and patterns.
455    ///
456    /// Accepts any owned or referenced container for the window (e.g., Vec, slice, array) and patterns (owned or referenced).
457    ///
458    /// Trait bounds:
459    /// - `W: IntoIterator, W::Item: Borrow<T>`: Window can be owned or referenced; each item is borrowed for matching.
460    /// - `T: Clone + PartialEq`: Window elements must be cloneable and comparable.
461    /// - `P: IntoIterator, P::Item: AsRef<Pattern<T>>`: Patterns can be owned or referenced.
462    ///
463    /// Performance: Clones all window elements into a new Vec; may use more memory for large windows.
464    ///
465    /// Use this method for ergonomic API and flexibility. For maximum performance, use `find_matches` with slices.
466    ///
467    /// # Examples
468    ///
469    /// ```rust
470    /// use scrolling_window_pattern_matcher::{PatternBuilderErased, ScrollingWindowPatternMatcherRef};
471    /// let window = vec![1, 2, 1, 2, 1];
472    /// let patterns = vec![
473    ///     PatternBuilderErased::new().value_elem(1).value_elem(2).build(),
474    ///     PatternBuilderErased::new().value_elem(2).value_elem(1).build(),
475    /// ];
476    /// let matcher = ScrollingWindowPatternMatcherRef::new(window.len());
477    /// let named = matcher.find_matches_flexible(window, &patterns);
478    /// assert!(named["pattern_0"].iter().any(|m| m.is_empty() || m.contains_key("")));
479    /// assert!(named["pattern_1"].iter().any(|m| m.is_empty() || m.contains_key("")));
480    /// ```
481    pub fn find_matches_flexible<T, W, P>(&self, window: W, patterns: P) -> std::collections::HashMap<String, Vec<std::collections::HashMap<String, Vec<T>>>>
482    where
483        W: IntoIterator,
484        W::Item: Borrow<T>,
485        T: Clone + PartialEq + std::fmt::Debug,
486        P: IntoIterator,
487        P::Item: AsRef<Pattern<T>>,
488        Pattern<T>: Clone,
489    {
490        let window_vec: Vec<T> = window.into_iter()
491            .map(|x| x.borrow().clone())
492            .collect();
493        let patterns_vec: Vec<Pattern<T>> = patterns
494            .into_iter()
495            .map(|p| p.as_ref().clone())
496            .collect();
497        // Use find_matches, which now calls callbacks
498        self.find_matches(&window_vec, &patterns_vec)
499    }
500}
501
502// Re-export major types and builders for crate consumers