hyperscan_tokio/
pattern.rs

1//! Pattern information and capture group structures
2
3use crate::Flags;
4
5/// Information about a compiled pattern
6#[derive(Debug, Clone)]
7pub struct PatternInfo {
8    /// Pattern ID
9    pub id: u32,
10    /// Original PCRE pattern
11    pub original: String,
12    /// Simplified pattern for Hyperscan
13    pub simplified: String,
14    /// Capture group information
15    pub capture_groups: Vec<CaptureGroupInfo>,
16    /// Pattern features
17    pub features: PatternFeatures,
18    /// Compilation flags
19    pub flags: Flags,
20    /// PCRE match limits (if specified)
21    pub match_limit: Option<u32>,
22    /// PCRE recursion limit (if specified)
23    pub match_limit_recursion: Option<u32>,
24}
25
26/// Information about a capture group
27#[derive(Debug, Clone)]
28pub struct CaptureGroupInfo {
29    /// Group name (if named)
30    pub name: Option<String>,
31    /// Group index (0-based, excluding full match)
32    pub index: usize,
33    /// Offset adjustment for simplified pattern
34    pub offset_adjustment: i32,
35}
36
37/// Features detected in a pattern
38#[derive(Debug, Clone, Default)]
39pub struct PatternFeatures {
40    /// Pattern has capture groups
41    pub has_captures: bool,
42    /// Pattern has named capture groups
43    pub has_named_captures: bool,
44    /// Pattern has backreferences
45    pub has_backrefs: bool,
46    /// Pattern has lookarounds
47    pub has_lookarounds: bool,
48    /// Pattern has anchors (^, $, \A, \z)
49    pub has_anchors: bool,
50    /// Pattern has word boundaries (\b, \B)
51    pub has_word_boundaries: bool,
52}
53
54impl PatternFeatures {
55    /// Check if pattern needs simplification for Hyperscan
56    pub fn needs_simplification(&self) -> bool {
57        self.has_captures || self.has_backrefs || self.has_lookarounds
58    }
59    
60    /// Check if pattern needs regex for extraction
61    pub fn needs_extraction(&self) -> bool {
62        self.has_captures
63    }
64}
65
66/// A capture group match
67#[derive(Debug, Clone, PartialEq)]
68pub struct CaptureGroup {
69    /// Start position of the capture
70    pub start: usize,
71    /// End position of the capture
72    pub end: usize,
73    /// Capture group name (if any)
74    pub name: Option<String>,
75}
76
77impl CaptureGroup {
78    /// Get the matched text from the source data
79    pub fn as_bytes<'a>(&self, data: &'a [u8]) -> &'a [u8] {
80        &data[self.start..self.end]
81    }
82    
83    /// Get the matched text as a string (if valid UTF-8)
84    pub fn as_str<'a>(&self, data: &'a [u8]) -> Option<&'a str> {
85        std::str::from_utf8(self.as_bytes(data)).ok()
86    }
87    
88    /// Check if this capture group is empty
89    pub fn is_empty(&self) -> bool {
90        self.start == self.end
91    }
92    
93    /// Get the length of this capture
94    pub fn len(&self) -> usize {
95        self.end - self.start
96    }
97}
98
99#[cfg(test)]
100mod tests {
101    use super::*;
102    
103    #[test]
104    fn test_pattern_features() {
105        let mut features = PatternFeatures::default();
106        assert!(!features.needs_simplification());
107        assert!(!features.needs_extraction());
108        
109        features.has_captures = true;
110        assert!(features.needs_simplification());
111        assert!(features.needs_extraction());
112    }
113    
114    #[test]
115    fn test_capture_group() {
116        let data = b"Hello, World!";
117        let capture = CaptureGroup {
118            start: 7,
119            end: 12,
120            name: None,
121        };
122        
123        assert_eq!(capture.as_bytes(data), b"World");
124        assert_eq!(capture.as_str(data), Some("World"));
125    }
126}