Skip to main content

formatparse_core/parser/
regex.rs

1use crate::error::FormatParseError;
2use fancy_regex::Regex;
3use std::time::Instant;
4
5/// Maximum time allowed for regex compilation (in milliseconds)
6///
7/// This is checked **after** `Regex::new` returns (wall-clock elapsed time). It does not
8/// interrupt compilation in progress and does not bound **matching** time. See project
9/// security documentation for ReDoS considerations.
10///
11/// Set to 500ms so legitimate patterns still fail closed on extreme stalls while tolerating
12/// slow shared CI hosts (e.g. macOS Intel runners) that can exceed a tighter budget sporadically.
13const MAX_REGEX_COMPILATION_TIME_MS: u128 = 500;
14
15/// Build a regex from a pattern string with DOTALL flag
16/// Includes timeout protection against ReDoS attacks
17pub fn build_regex(pattern: &str) -> Result<Regex, FormatParseError> {
18    let start = Instant::now();
19
20    // Pre-allocate with estimated capacity
21    let mut regex_with_flags = String::with_capacity(pattern.len() + 4);
22    regex_with_flags.push_str("(?s)");
23    regex_with_flags.push_str(pattern);
24
25    let regex = Regex::new(&regex_with_flags).map_err(|e| {
26        // Sanitize error message - don't expose full regex pattern to prevent information disclosure
27        FormatParseError::RegexError(format!("Invalid regex pattern: {}", e))
28    })?;
29
30    // Check compilation time
31    let elapsed = start.elapsed().as_millis();
32    if elapsed > MAX_REGEX_COMPILATION_TIME_MS {
33        return Err(FormatParseError::RegexError(format!(
34            "Regex compilation took {}ms, exceeding maximum allowed time of {}ms",
35            elapsed, MAX_REGEX_COMPILATION_TIME_MS
36        )));
37    }
38
39    Ok(regex)
40}
41
42/// Build a case-insensitive regex from a pattern string with DOTALL flag
43/// Includes timeout protection against ReDoS attacks
44pub fn build_case_insensitive_regex(pattern: &str) -> Option<Regex> {
45    let start = Instant::now();
46
47    // Pre-allocate with estimated capacity
48    let mut regex_with_flags = String::with_capacity(pattern.len() + 8);
49    regex_with_flags.push_str("(?s)(?i)");
50    regex_with_flags.push_str(pattern);
51
52    let regex = Regex::new(&regex_with_flags).ok()?;
53
54    // Check compilation time
55    let elapsed = start.elapsed().as_millis();
56    if elapsed > MAX_REGEX_COMPILATION_TIME_MS {
57        return None;
58    }
59
60    Some(regex)
61}
62
63/// Remove anchors and flags from a regex string for search operations
64/// Returns a string slice or owned string as needed
65pub fn prepare_search_regex(regex_str: &str) -> String {
66    let mut start = 0;
67    let mut end = regex_str.len();
68
69    // Remove (?s) flag if present
70    if regex_str.starts_with("(?s)") {
71        start = 4;
72    }
73
74    // Remove ^ anchor
75    if regex_str[start..].starts_with("^") {
76        start += 1;
77    }
78
79    // Remove $ anchor
80    if regex_str[..end].ends_with("$") {
81        end -= 1;
82    }
83
84    // Only allocate if we need to modify the string
85    if start > 0 || end < regex_str.len() {
86        regex_str[start..end].to_string()
87    } else {
88        regex_str.to_string()
89    }
90}
91
92/// Build a search regex (without anchors) with optional case sensitivity
93/// Includes timeout protection against ReDoS attacks
94pub fn build_search_regex(
95    regex_str: &str,
96    case_sensitive: bool,
97) -> Result<Regex, FormatParseError> {
98    let start = Instant::now();
99
100    let search_regex_str = prepare_search_regex(regex_str);
101
102    // Pre-allocate with estimated capacity
103    let capacity = search_regex_str.len() + if case_sensitive { 4 } else { 8 };
104    let mut pattern = String::with_capacity(capacity);
105    pattern.push_str("(?s)");
106    if !case_sensitive {
107        pattern.push_str("(?i)");
108    }
109    pattern.push_str(&search_regex_str);
110
111    let regex = Regex::new(&pattern).map_err(|e| {
112        // Sanitize error message - don't expose full regex pattern to prevent information disclosure
113        FormatParseError::RegexError(format!("Invalid regex pattern: {}", e))
114    })?;
115
116    // Check compilation time
117    let elapsed = start.elapsed().as_millis();
118    if elapsed > MAX_REGEX_COMPILATION_TIME_MS {
119        return Err(FormatParseError::RegexError(format!(
120            "Regex compilation took {}ms, exceeding maximum allowed time of {}ms",
121            elapsed, MAX_REGEX_COMPILATION_TIME_MS
122        )));
123    }
124
125    Ok(regex)
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131
132    #[test]
133    fn test_build_regex() {
134        let regex = build_regex(r"^test$").unwrap();
135        assert!(regex.is_match("test").unwrap());
136        assert!(!regex.is_match("TEST").unwrap());
137        assert!(!regex.is_match("notest").unwrap());
138    }
139
140    #[test]
141    fn test_build_regex_with_dotall() {
142        let regex = build_regex(r"test.line").unwrap();
143        assert!(regex.is_match("test\nline").unwrap());
144    }
145
146    #[test]
147    fn test_build_case_insensitive_regex() {
148        let regex = build_case_insensitive_regex(r"^test$").unwrap();
149        assert!(regex.is_match("test").unwrap());
150        assert!(regex.is_match("TEST").unwrap());
151        assert!(regex.is_match("Test").unwrap());
152        assert!(!regex.is_match("notest").unwrap());
153    }
154
155    #[test]
156    fn test_build_case_insensitive_regex_with_dotall() {
157        let regex = build_case_insensitive_regex(r"test.line").unwrap();
158        assert!(regex.is_match("TEST\nLINE").unwrap());
159    }
160
161    #[test]
162    fn test_prepare_search_regex_no_anchors() {
163        let result = prepare_search_regex(r"test");
164        assert_eq!(result, "test");
165    }
166
167    #[test]
168    fn test_prepare_search_regex_with_anchors() {
169        let result = prepare_search_regex(r"^test$");
170        assert_eq!(result, "test");
171    }
172
173    #[test]
174    fn test_prepare_search_regex_with_dotall() {
175        let result = prepare_search_regex(r"(?s)^test$");
176        assert_eq!(result, "test");
177    }
178
179    #[test]
180    fn test_prepare_search_regex_start_anchor_only() {
181        let result = prepare_search_regex(r"^test");
182        assert_eq!(result, "test");
183    }
184
185    #[test]
186    fn test_prepare_search_regex_end_anchor_only() {
187        let result = prepare_search_regex(r"test$");
188        assert_eq!(result, "test");
189    }
190
191    #[test]
192    fn test_build_search_regex_case_sensitive() {
193        let regex = build_search_regex(r"^test$", true).unwrap();
194        assert!(regex.is_match("test").unwrap());
195        assert!(!regex.is_match("TEST").unwrap());
196        // Should match anywhere in string (no anchors)
197        assert!(regex.is_match("prefix test suffix").unwrap());
198    }
199
200    #[test]
201    fn test_build_search_regex_case_insensitive() {
202        let regex = build_search_regex(r"^test$", false).unwrap();
203        assert!(regex.is_match("test").unwrap());
204        assert!(regex.is_match("TEST").unwrap());
205        assert!(regex.is_match("Test").unwrap());
206        // Should match anywhere in string (no anchors)
207        assert!(regex.is_match("prefix TEST suffix").unwrap());
208    }
209
210    #[test]
211    fn test_build_search_regex_with_dotall() {
212        let regex = build_search_regex(r"test.line", true).unwrap();
213        assert!(regex.is_match("test\nline").unwrap());
214        assert!(regex.is_match("prefix test\nline suffix").unwrap());
215    }
216}