1use crate::module_exports::{ModuleContext, ModuleExports, ModuleFunction, ModuleParam};
7use shape_value::ValueWord;
8use std::sync::Arc;
9
10fn match_to_nanboxed(m: ®ex::Match, captures: ®ex::Captures) -> ValueWord {
13 let mut keys = Vec::with_capacity(4);
14 let mut values = Vec::with_capacity(4);
15
16 keys.push(ValueWord::from_string(Arc::new("text".to_string())));
17 values.push(ValueWord::from_string(Arc::new(m.as_str().to_string())));
18
19 keys.push(ValueWord::from_string(Arc::new("start".to_string())));
20 values.push(ValueWord::from_f64(m.start() as f64));
21
22 keys.push(ValueWord::from_string(Arc::new("end".to_string())));
23 values.push(ValueWord::from_f64(m.end() as f64));
24
25 let groups: Vec<ValueWord> = captures
26 .iter()
27 .skip(1)
28 .map(|opt| match opt {
29 Some(g) => ValueWord::from_string(Arc::new(g.as_str().to_string())),
30 None => ValueWord::none(),
31 })
32 .collect();
33 keys.push(ValueWord::from_string(Arc::new("groups".to_string())));
34 values.push(ValueWord::from_array(Arc::new(groups)));
35
36 ValueWord::from_hashmap_pairs(keys, values)
37}
38
39pub fn create_regex_module() -> ModuleExports {
41 let mut module = ModuleExports::new("regex");
42 module.description = "Regular expression matching and replacement".to_string();
43
44 module.add_function_with_schema(
46 "is_match",
47 |args: &[ValueWord], _ctx: &ModuleContext| {
48 let text = args
49 .first()
50 .and_then(|a| a.as_str())
51 .ok_or_else(|| "regex.is_match() requires a text string argument".to_string())?;
52
53 let pattern = args
54 .get(1)
55 .and_then(|a| a.as_str())
56 .ok_or_else(|| "regex.is_match() requires a pattern string argument".to_string())?;
57
58 let re = regex::Regex::new(pattern)
59 .map_err(|e| format!("regex.is_match() invalid pattern: {}", e))?;
60
61 Ok(ValueWord::from_bool(re.is_match(text)))
62 },
63 ModuleFunction {
64 description: "Test whether the pattern matches anywhere in the text".to_string(),
65 params: vec![
66 ModuleParam {
67 name: "text".to_string(),
68 type_name: "string".to_string(),
69 required: true,
70 description: "Text to search".to_string(),
71 ..Default::default()
72 },
73 ModuleParam {
74 name: "pattern".to_string(),
75 type_name: "string".to_string(),
76 required: true,
77 description: "Regular expression pattern".to_string(),
78 ..Default::default()
79 },
80 ],
81 return_type: Some("bool".to_string()),
82 },
83 );
84
85 module.add_function_with_schema(
87 "match",
88 |args: &[ValueWord], _ctx: &ModuleContext| {
89 let text = args
90 .first()
91 .and_then(|a| a.as_str())
92 .ok_or_else(|| "regex.match() requires a text string argument".to_string())?;
93
94 let pattern = args
95 .get(1)
96 .and_then(|a| a.as_str())
97 .ok_or_else(|| "regex.match() requires a pattern string argument".to_string())?;
98
99 let re = regex::Regex::new(pattern)
100 .map_err(|e| format!("regex.match() invalid pattern: {}", e))?;
101
102 match re.captures(text) {
103 Some(caps) => {
104 let m = caps.get(0).unwrap();
105 Ok(ValueWord::from_some(match_to_nanboxed(&m, &caps)))
106 }
107 None => Ok(ValueWord::none()),
108 }
109 },
110 ModuleFunction {
111 description: "Find the first match of the pattern, returning a match object or none"
112 .to_string(),
113 params: vec![
114 ModuleParam {
115 name: "text".to_string(),
116 type_name: "string".to_string(),
117 required: true,
118 description: "Text to search".to_string(),
119 ..Default::default()
120 },
121 ModuleParam {
122 name: "pattern".to_string(),
123 type_name: "string".to_string(),
124 required: true,
125 description: "Regular expression pattern".to_string(),
126 ..Default::default()
127 },
128 ],
129 return_type: Some("Option<object>".to_string()),
130 },
131 );
132
133 module.add_function_with_schema(
135 "match_all",
136 |args: &[ValueWord], _ctx: &ModuleContext| {
137 let text = args
138 .first()
139 .and_then(|a| a.as_str())
140 .ok_or_else(|| "regex.match_all() requires a text string argument".to_string())?;
141
142 let pattern = args.get(1).and_then(|a| a.as_str()).ok_or_else(|| {
143 "regex.match_all() requires a pattern string argument".to_string()
144 })?;
145
146 let re = regex::Regex::new(pattern)
147 .map_err(|e| format!("regex.match_all() invalid pattern: {}", e))?;
148
149 let matches: Vec<ValueWord> = re
150 .captures_iter(text)
151 .map(|caps| {
152 let m = caps.get(0).unwrap();
153 match_to_nanboxed(&m, &caps)
154 })
155 .collect();
156
157 Ok(ValueWord::from_array(Arc::new(matches)))
158 },
159 ModuleFunction {
160 description: "Find all non-overlapping matches of the pattern".to_string(),
161 params: vec![
162 ModuleParam {
163 name: "text".to_string(),
164 type_name: "string".to_string(),
165 required: true,
166 description: "Text to search".to_string(),
167 ..Default::default()
168 },
169 ModuleParam {
170 name: "pattern".to_string(),
171 type_name: "string".to_string(),
172 required: true,
173 description: "Regular expression pattern".to_string(),
174 ..Default::default()
175 },
176 ],
177 return_type: Some("Array<object>".to_string()),
178 },
179 );
180
181 module.add_function_with_schema(
183 "replace",
184 |args: &[ValueWord], _ctx: &ModuleContext| {
185 let text = args
186 .first()
187 .and_then(|a| a.as_str())
188 .ok_or_else(|| "regex.replace() requires a text string argument".to_string())?;
189
190 let pattern = args
191 .get(1)
192 .and_then(|a| a.as_str())
193 .ok_or_else(|| "regex.replace() requires a pattern string argument".to_string())?;
194
195 let replacement = args.get(2).and_then(|a| a.as_str()).ok_or_else(|| {
196 "regex.replace() requires a replacement string argument".to_string()
197 })?;
198
199 let re = regex::Regex::new(pattern)
200 .map_err(|e| format!("regex.replace() invalid pattern: {}", e))?;
201
202 let result = re.replace(text, replacement);
203 Ok(ValueWord::from_string(Arc::new(result.into_owned())))
204 },
205 ModuleFunction {
206 description: "Replace the first match of the pattern with the replacement".to_string(),
207 params: vec![
208 ModuleParam {
209 name: "text".to_string(),
210 type_name: "string".to_string(),
211 required: true,
212 description: "Text to search".to_string(),
213 ..Default::default()
214 },
215 ModuleParam {
216 name: "pattern".to_string(),
217 type_name: "string".to_string(),
218 required: true,
219 description: "Regular expression pattern".to_string(),
220 ..Default::default()
221 },
222 ModuleParam {
223 name: "replacement".to_string(),
224 type_name: "string".to_string(),
225 required: true,
226 description: "Replacement string (supports $1, $2 for capture groups)"
227 .to_string(),
228 ..Default::default()
229 },
230 ],
231 return_type: Some("string".to_string()),
232 },
233 );
234
235 module.add_function_with_schema(
237 "replace_all",
238 |args: &[ValueWord], _ctx: &ModuleContext| {
239 let text = args
240 .first()
241 .and_then(|a| a.as_str())
242 .ok_or_else(|| "regex.replace_all() requires a text string argument".to_string())?;
243
244 let pattern = args.get(1).and_then(|a| a.as_str()).ok_or_else(|| {
245 "regex.replace_all() requires a pattern string argument".to_string()
246 })?;
247
248 let replacement = args.get(2).and_then(|a| a.as_str()).ok_or_else(|| {
249 "regex.replace_all() requires a replacement string argument".to_string()
250 })?;
251
252 let re = regex::Regex::new(pattern)
253 .map_err(|e| format!("regex.replace_all() invalid pattern: {}", e))?;
254
255 let result = re.replace_all(text, replacement);
256 Ok(ValueWord::from_string(Arc::new(result.into_owned())))
257 },
258 ModuleFunction {
259 description: "Replace all matches of the pattern with the replacement".to_string(),
260 params: vec![
261 ModuleParam {
262 name: "text".to_string(),
263 type_name: "string".to_string(),
264 required: true,
265 description: "Text to search".to_string(),
266 ..Default::default()
267 },
268 ModuleParam {
269 name: "pattern".to_string(),
270 type_name: "string".to_string(),
271 required: true,
272 description: "Regular expression pattern".to_string(),
273 ..Default::default()
274 },
275 ModuleParam {
276 name: "replacement".to_string(),
277 type_name: "string".to_string(),
278 required: true,
279 description: "Replacement string (supports $1, $2 for capture groups)"
280 .to_string(),
281 ..Default::default()
282 },
283 ],
284 return_type: Some("string".to_string()),
285 },
286 );
287
288 module.add_function_with_schema(
290 "split",
291 |args: &[ValueWord], _ctx: &ModuleContext| {
292 let text = args
293 .first()
294 .and_then(|a| a.as_str())
295 .ok_or_else(|| "regex.split() requires a text string argument".to_string())?;
296
297 let pattern = args
298 .get(1)
299 .and_then(|a| a.as_str())
300 .ok_or_else(|| "regex.split() requires a pattern string argument".to_string())?;
301
302 let re = regex::Regex::new(pattern)
303 .map_err(|e| format!("regex.split() invalid pattern: {}", e))?;
304
305 let parts: Vec<ValueWord> = re
306 .split(text)
307 .map(|s| ValueWord::from_string(Arc::new(s.to_string())))
308 .collect();
309
310 Ok(ValueWord::from_array(Arc::new(parts)))
311 },
312 ModuleFunction {
313 description: "Split the text at each match of the pattern".to_string(),
314 params: vec![
315 ModuleParam {
316 name: "text".to_string(),
317 type_name: "string".to_string(),
318 required: true,
319 description: "Text to split".to_string(),
320 ..Default::default()
321 },
322 ModuleParam {
323 name: "pattern".to_string(),
324 type_name: "string".to_string(),
325 required: true,
326 description: "Regular expression pattern to split on".to_string(),
327 ..Default::default()
328 },
329 ],
330 return_type: Some("Array<string>".to_string()),
331 },
332 );
333
334 module
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340
341 fn s(val: &str) -> ValueWord {
342 ValueWord::from_string(Arc::new(val.to_string()))
343 }
344
345 fn test_ctx() -> crate::module_exports::ModuleContext<'static> {
346 let registry = Box::leak(Box::new(crate::type_schema::TypeSchemaRegistry::new()));
347 crate::module_exports::ModuleContext {
348 schemas: registry,
349 invoke_callable: None,
350 raw_invoker: None,
351 function_hashes: None,
352 vm_state: None,
353 granted_permissions: None,
354 scope_constraints: None,
355 set_pending_resume: None,
356 set_pending_frame_resume: None,
357 }
358 }
359
360 #[test]
361 fn test_regex_module_creation() {
362 let module = create_regex_module();
363 assert_eq!(module.name, "regex");
364 assert!(module.has_export("is_match"));
365 assert!(module.has_export("match"));
366 assert!(module.has_export("match_all"));
367 assert!(module.has_export("replace"));
368 assert!(module.has_export("replace_all"));
369 assert!(module.has_export("split"));
370 }
371
372 #[test]
373 fn test_is_match_true() {
374 let module = create_regex_module();
375 let ctx = test_ctx();
376 let f = module.get_export("is_match").unwrap();
377 let result = f(&[s("hello world"), s(r"\bworld\b")], &ctx).unwrap();
378 assert_eq!(result.as_bool(), Some(true));
379 }
380
381 #[test]
382 fn test_is_match_false() {
383 let module = create_regex_module();
384 let ctx = test_ctx();
385 let f = module.get_export("is_match").unwrap();
386 let result = f(&[s("hello world"), s(r"^\d+$")], &ctx).unwrap();
387 assert_eq!(result.as_bool(), Some(false));
388 }
389
390 #[test]
391 fn test_is_match_invalid_pattern() {
392 let module = create_regex_module();
393 let ctx = test_ctx();
394 let f = module.get_export("is_match").unwrap();
395 assert!(f(&[s("text"), s("[invalid")], &ctx).is_err());
396 }
397
398 #[test]
399 fn test_match_found() {
400 let module = create_regex_module();
401 let ctx = test_ctx();
402 let f = module.get_export("match").unwrap();
403 let result = f(&[s("abc 123 def"), s(r"(\d+)")], &ctx).unwrap();
404 let inner = result.as_some_inner().expect("should be Some");
406 let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
407 let text_idx = keys
409 .iter()
410 .position(|k| k.as_str() == Some("text"))
411 .unwrap();
412 assert_eq!(values[text_idx].as_str(), Some("123"));
413 }
414
415 #[test]
416 fn test_match_not_found() {
417 let module = create_regex_module();
418 let ctx = test_ctx();
419 let f = module.get_export("match").unwrap();
420 let result = f(&[s("abc def"), s(r"\d+")], &ctx).unwrap();
421 assert!(result.is_none());
422 }
423
424 #[test]
425 fn test_match_all() {
426 let module = create_regex_module();
427 let ctx = test_ctx();
428 let f = module.get_export("match_all").unwrap();
429 let result = f(&[s("a1 b2 c3"), s(r"\d")], &ctx).unwrap();
430 let arr = result.as_any_array().expect("should be array").to_generic();
431 assert_eq!(arr.len(), 3);
432 }
433
434 #[test]
435 fn test_match_all_no_matches() {
436 let module = create_regex_module();
437 let ctx = test_ctx();
438 let f = module.get_export("match_all").unwrap();
439 let result = f(&[s("abc"), s(r"\d+")], &ctx).unwrap();
440 let arr = result.as_any_array().expect("should be array").to_generic();
441 assert_eq!(arr.len(), 0);
442 }
443
444 #[test]
445 fn test_replace_first() {
446 let module = create_regex_module();
447 let ctx = test_ctx();
448 let f = module.get_export("replace").unwrap();
449 let result = f(&[s("foo bar foo"), s("foo"), s("baz")], &ctx).unwrap();
450 assert_eq!(result.as_str(), Some("baz bar foo"));
451 }
452
453 #[test]
454 fn test_replace_all() {
455 let module = create_regex_module();
456 let ctx = test_ctx();
457 let f = module.get_export("replace_all").unwrap();
458 let result = f(&[s("foo bar foo"), s("foo"), s("baz")], &ctx).unwrap();
459 assert_eq!(result.as_str(), Some("baz bar baz"));
460 }
461
462 #[test]
463 fn test_replace_with_capture_group() {
464 let module = create_regex_module();
465 let ctx = test_ctx();
466 let f = module.get_export("replace_all").unwrap();
467 let result = f(
468 &[
469 s("2024-01-15"),
470 s(r"(\d{4})-(\d{2})-(\d{2})"),
471 s("$3/$2/$1"),
472 ],
473 &ctx,
474 )
475 .unwrap();
476 assert_eq!(result.as_str(), Some("15/01/2024"));
477 }
478
479 #[test]
480 fn test_split() {
481 let module = create_regex_module();
482 let ctx = test_ctx();
483 let f = module.get_export("split").unwrap();
484 let result = f(&[s("one,two,,three"), s(",")], &ctx).unwrap();
485 let arr = result.as_any_array().expect("should be array").to_generic();
486 assert_eq!(arr.len(), 4);
487 assert_eq!(arr[0].as_str(), Some("one"));
488 assert_eq!(arr[1].as_str(), Some("two"));
489 assert_eq!(arr[2].as_str(), Some(""));
490 assert_eq!(arr[3].as_str(), Some("three"));
491 }
492
493 #[test]
494 fn test_split_by_whitespace() {
495 let module = create_regex_module();
496 let ctx = test_ctx();
497 let f = module.get_export("split").unwrap();
498 let result = f(&[s("hello world test"), s(r"\s+")], &ctx).unwrap();
499 let arr = result.as_any_array().expect("should be array").to_generic();
500 assert_eq!(arr.len(), 3);
501 assert_eq!(arr[0].as_str(), Some("hello"));
502 assert_eq!(arr[1].as_str(), Some("world"));
503 assert_eq!(arr[2].as_str(), Some("test"));
504 }
505
506 #[test]
507 fn test_regex_schemas() {
508 let module = create_regex_module();
509
510 let match_schema = module.get_schema("match").unwrap();
511 assert_eq!(match_schema.params.len(), 2);
512 assert_eq!(match_schema.return_type.as_deref(), Some("Option<object>"));
513
514 let replace_schema = module.get_schema("replace").unwrap();
515 assert_eq!(replace_schema.params.len(), 3);
516
517 let split_schema = module.get_schema("split").unwrap();
518 assert_eq!(split_schema.return_type.as_deref(), Some("Array<string>"));
519 }
520}