1pub mod analyzer;
4pub mod error;
5pub mod prelude;
6pub mod validator;
7
8mod syntax;
9
10pub use analyzer::{CaptureGroup, RegexAnalyzer};
11pub use error::RegexError;
12pub use validator::RegexValidator;
13
14#[cfg(test)]
15mod tests {
16 use super::*;
17 use crate::validator::RegexValidationConfig;
18
19 #[test]
22 fn regex_error_syntax_stores_message_and_offset() {
23 let err = RegexError::syntax("unexpected char", 7);
24 match &err {
25 RegexError::Syntax { message, offset } => {
26 assert_eq!(message, "unexpected char");
27 assert_eq!(*offset, 7);
28 }
29 }
30 assert!(err.to_string().contains("7"));
31 assert!(err.to_string().contains("unexpected char"));
32 }
33
34 #[test]
35 fn regex_error_implements_clone_and_partialeq() {
36 let e1 = RegexError::syntax("msg", 3);
37 let e2 = e1.clone();
38 assert_eq!(e1, e2);
39 }
40
41 #[test]
44 fn validate_simple_pattern_ok() {
45 let v = RegexValidator::new();
46 assert!(v.validate("hello", 0).is_ok());
47 assert!(v.validate("", 0).is_ok());
48 assert!(v.validate("(a|b)+", 0).is_ok());
49 }
50
51 #[test]
52 fn validate_unicode_property_within_limit_ok() {
53 let v = RegexValidator::new();
54 let pattern = r"\p{L}".repeat(50);
56 assert!(v.validate(&pattern, 0).is_ok());
57 }
58
59 #[test]
60 fn validate_too_many_unicode_properties_errors() {
61 let v = RegexValidator::new();
62 let pattern = r"\p{L}".repeat(51);
63 let err = v.validate(&pattern, 0).unwrap_err();
64 assert!(err.to_string().contains("Unicode"));
65 }
66
67 #[test]
68 fn validate_unicode_property_error_reports_configured_limit() {
69 let config = RegexValidationConfig {
70 max_nesting: 10,
71 max_unicode_properties: 1,
72 max_branch_reset_branches: 50,
73 };
74 let v = RegexValidator::with_config(config);
75 let result = v.validate(r"\p{L}\p{N}", 0);
76 let message = result.err().map(|err| err.to_string()).unwrap_or_default();
77 assert!(message.contains("max 1"));
78 }
79
80 #[test]
81 fn validate_unicode_property_offset_propagated() {
82 let v = RegexValidator::new();
83 let prefix = "x";
84 let pattern = format!("{}{}", prefix, r"\p{L}".repeat(51));
85 let err = v.validate(&pattern, 10).unwrap_err();
86 match err {
88 RegexError::Syntax { offset, .. } => assert!(offset >= 10),
89 }
90 }
91
92 #[test]
93 fn validate_lookbehind_within_limit_ok() {
94 let v = RegexValidator::new();
95 let mut pattern = String::from("foo");
97 for _ in 0..9 {
98 pattern = format!("(?<={})", pattern);
99 }
100 assert!(v.validate(&pattern, 0).is_ok());
101 }
102
103 #[test]
104 fn validate_lookbehind_nesting_too_deep_errors() {
105 let v = RegexValidator::new();
106 let mut pattern = String::from("a");
108 for _ in 0..11 {
109 pattern = format!("(?<={})", pattern);
110 }
111 let err = v.validate(&pattern, 0).unwrap_err();
112 assert!(err.to_string().contains("lookbehind") || err.to_string().contains("nesting"));
113 }
114
115 #[test]
116 fn validate_branch_reset_nesting_too_deep_errors() {
117 let v = RegexValidator::new();
118 let mut pattern = String::from("a");
119 for _ in 0..11 {
120 pattern = format!("(?|{})", pattern);
121 }
122 let err = v.validate(&pattern, 0).unwrap_err();
123 assert!(err.to_string().contains("branch reset") || err.to_string().contains("nesting"));
124 }
125
126 #[test]
127 fn validate_too_many_branches_in_reset_group_errors() {
128 let v = RegexValidator::new();
129 let alts = (0u32..51).map(|i| format!("a{i}")).collect::<Vec<_>>().join("|");
131 let pattern = format!("(?|{alts})");
132 let err = v.validate(&pattern, 0).unwrap_err();
133 assert!(err.to_string().contains("branch") || err.to_string().contains("50"));
134 }
135
136 #[test]
137 fn validate_branch_reset_error_reports_configured_limit() {
138 let config = RegexValidationConfig {
139 max_nesting: 10,
140 max_unicode_properties: 50,
141 max_branch_reset_branches: 2,
142 };
143 let v = RegexValidator::with_config(config);
144 let result = v.validate("(?|a|b|c)", 0);
145 let message = result.err().map(|err| err.to_string()).unwrap_or_default();
146 assert!(message.contains("max 2"));
147 }
148
149 #[test]
150 fn validate_character_class_skipped() {
151 let v = RegexValidator::new();
153 assert!(v.validate("[(?{]", 0).is_ok());
154 }
155
156 #[test]
159 fn detects_code_execution_with_code_block() {
160 let v = RegexValidator::new();
161 assert!(v.detects_code_execution("(?{ print 'hi' })"));
162 }
163
164 #[test]
165 fn detects_code_execution_with_deferred_code_block() {
166 let v = RegexValidator::new();
167 assert!(v.detects_code_execution("(??{ some_code() })"));
168 }
169
170 #[test]
171 fn detects_code_execution_false_for_non_capturing() {
172 let v = RegexValidator::new();
173 assert!(!v.detects_code_execution("(?:foo)"));
174 assert!(!v.detects_code_execution("(?=ahead)"));
175 assert!(!v.detects_code_execution("(?!not)"));
176 }
177
178 #[test]
179 fn detects_code_execution_escaped_paren_not_detected() {
180 let v = RegexValidator::new();
181 assert!(!v.detects_code_execution(r"\(?{"));
182 }
183
184 #[test]
185 fn detects_code_execution_in_char_class_not_detected() {
186 let v = RegexValidator::new();
187 assert!(!v.detects_code_execution("[(?{]"));
188 }
189
190 #[test]
191 fn detects_code_execution_empty_pattern() {
192 let v = RegexValidator::new();
193 assert!(!v.detects_code_execution(""));
194 }
195
196 #[test]
199 fn detect_nested_quantifiers_finds_plus_plus() {
200 let v = RegexValidator::new();
201 assert!(v.detect_nested_quantifiers("(a+)+"));
202 }
203
204 #[test]
205 fn detect_nested_quantifiers_finds_star_star() {
206 let v = RegexValidator::new();
207 assert!(v.detect_nested_quantifiers("(a*)*"));
208 }
209
210 #[test]
211 fn detect_nested_quantifiers_finds_brace_quantifier() {
212 let v = RegexValidator::new();
213 assert!(v.detect_nested_quantifiers("(a+){2,5}"));
214 }
215
216 #[test]
217 fn detect_nested_quantifiers_safe_patterns() {
218 let v = RegexValidator::new();
219 assert!(!v.detect_nested_quantifiers("(abc)+")); assert!(!v.detect_nested_quantifiers("[a-z]+")); assert!(!v.detect_nested_quantifiers("a+b+")); }
223
224 #[test]
227 fn default_is_same_as_new() {
228 let v: RegexValidator = Default::default();
229 assert!(v.validate("simple", 0).is_ok());
230 }
231
232 #[test]
235 fn extract_named_captures_angle_bracket_syntax() {
236 let caps = RegexAnalyzer::extract_named_captures(r"(?<year>\d{4})-(?<month>\d{2})");
237 assert_eq!(caps.len(), 2);
238 assert_eq!(caps[0].name, "year");
239 assert_eq!(caps[0].index, 1);
240 assert_eq!(caps[1].name, "month");
241 assert_eq!(caps[1].index, 2);
242 }
243
244 #[test]
245 fn extract_named_captures_single_quote_syntax() {
246 let caps = RegexAnalyzer::extract_named_captures(r"(?'name'\w+)");
247 assert_eq!(caps.len(), 1);
248 assert_eq!(caps[0].name, "name");
249 assert_eq!(caps[0].index, 1);
250 }
251
252 #[test]
253 fn extract_named_captures_no_captures() {
254 let caps = RegexAnalyzer::extract_named_captures(r"\d+\.\d+");
255 assert!(caps.is_empty());
256 }
257
258 #[test]
259 fn extract_named_captures_non_capturing_group_not_counted() {
260 let caps = RegexAnalyzer::extract_named_captures(r"(?:foo)(?<bar>baz)");
261 assert_eq!(caps.len(), 1);
262 assert_eq!(caps[0].name, "bar");
263 assert_eq!(caps[0].index, 1); }
265
266 #[test]
267 fn extract_named_captures_lookbehind_not_counted() {
268 let caps = RegexAnalyzer::extract_named_captures(r"(?<=foo)(?<word>\w+)");
270 assert_eq!(caps.len(), 1);
271 assert_eq!(caps[0].name, "word");
272 }
273
274 #[test]
275 fn extract_named_captures_escaped_paren_skipped() {
276 let caps = RegexAnalyzer::extract_named_captures(r"\((?<x>\d)\)");
277 assert_eq!(caps.len(), 1);
278 assert_eq!(caps[0].name, "x");
279 }
280
281 #[test]
282 fn extract_named_captures_stores_subpattern() {
283 let caps = RegexAnalyzer::extract_named_captures(r"(?<id>\d+)");
284 assert_eq!(caps.len(), 1);
285 assert_eq!(caps[0].pattern, r"\d+");
286 }
287
288 #[test]
291 fn hover_text_includes_pattern_and_captures() {
292 let text = RegexAnalyzer::hover_text_for_regex(r"(?<id>\d+)", "i");
293 assert!(text.contains("id"));
294 assert!(text.contains("case"));
295 }
296
297 #[test]
298 fn hover_text_modifier_explanations() {
299 let text = RegexAnalyzer::hover_text_for_regex("foo", "imsx");
300 assert!(text.contains("case-insensitive"));
301 assert!(text.contains("multiline"));
302 assert!(text.contains("single-line"));
303 assert!(text.contains("extended"));
304 }
305
306 #[test]
307 fn hover_text_global_modifier() {
308 let text = RegexAnalyzer::hover_text_for_regex("foo", "g");
309 assert!(text.contains("global"));
310 }
311
312 #[test]
313 fn hover_text_no_modifiers() {
314 let text = RegexAnalyzer::hover_text_for_regex("hello", "");
315 assert!(text.contains("hello"));
316 assert!(!text.contains("Modifiers"));
317 }
318
319 #[test]
320 fn hover_text_empty_pattern() {
321 let text = RegexAnalyzer::hover_text_for_regex("", "");
322 assert!(text.is_empty());
323 }
324
325 #[test]
326 fn hover_text_unknown_modifier_ignored() {
327 let text = RegexAnalyzer::hover_text_for_regex("x", "z");
328 assert!(!text.contains("Modifiers"));
330 }
331}