1use crate::module_exports::{ModuleContext, ModuleExports, ModuleFunction, ModuleParam};
7use shape_value::ValueWord;
8use std::sync::Arc;
9
10fn match_to_nanboxed(m: ®ex::Match, captures: ®ex::Captures) -> ValueWord {
13 let mut keys = Vec::with_capacity(4);
14 let mut values = Vec::with_capacity(4);
15
16 keys.push(ValueWord::from_string(Arc::new("text".to_string())));
17 values.push(ValueWord::from_string(Arc::new(m.as_str().to_string())));
18
19 keys.push(ValueWord::from_string(Arc::new("start".to_string())));
20 values.push(ValueWord::from_f64(m.start() as f64));
21
22 keys.push(ValueWord::from_string(Arc::new("end".to_string())));
23 values.push(ValueWord::from_f64(m.end() as f64));
24
25 let groups: Vec<ValueWord> = captures
26 .iter()
27 .skip(1)
28 .map(|opt| match opt {
29 Some(g) => ValueWord::from_string(Arc::new(g.as_str().to_string())),
30 None => ValueWord::none(),
31 })
32 .collect();
33 keys.push(ValueWord::from_string(Arc::new("groups".to_string())));
34 values.push(ValueWord::from_array(Arc::new(groups)));
35
36 ValueWord::from_hashmap_pairs(keys, values)
37}
38
39pub fn create_regex_module() -> ModuleExports {
41 let mut module = ModuleExports::new("std::core::regex");
42 module.description = "Regular expression matching and replacement".to_string();
43
44 module.add_function_with_schema(
46 "is_match",
47 |args: &[ValueWord], _ctx: &ModuleContext| {
48 let text = args
49 .first()
50 .and_then(|a| a.as_str())
51 .ok_or_else(|| "regex.is_match() requires a text string argument".to_string())?;
52
53 let pattern = args
54 .get(1)
55 .and_then(|a| a.as_str())
56 .ok_or_else(|| "regex.is_match() requires a pattern string argument".to_string())?;
57
58 let re = regex::Regex::new(pattern)
59 .map_err(|e| format!("regex.is_match() invalid pattern: {}", e))?;
60
61 Ok(ValueWord::from_bool(re.is_match(text)))
62 },
63 ModuleFunction {
64 description: "Test whether the pattern matches anywhere in the text".to_string(),
65 params: vec![
66 ModuleParam {
67 name: "text".to_string(),
68 type_name: "string".to_string(),
69 required: true,
70 description: "Text to search".to_string(),
71 ..Default::default()
72 },
73 ModuleParam {
74 name: "pattern".to_string(),
75 type_name: "string".to_string(),
76 required: true,
77 description: "Regular expression pattern".to_string(),
78 ..Default::default()
79 },
80 ],
81 return_type: Some("bool".to_string()),
82 },
83 );
84
85 module.add_function_with_schema(
87 "match",
88 |args: &[ValueWord], _ctx: &ModuleContext| {
89 let text = args
90 .first()
91 .and_then(|a| a.as_str())
92 .ok_or_else(|| "regex.match() requires a text string argument".to_string())?;
93
94 let pattern = args
95 .get(1)
96 .and_then(|a| a.as_str())
97 .ok_or_else(|| "regex.match() requires a pattern string argument".to_string())?;
98
99 let re = regex::Regex::new(pattern)
100 .map_err(|e| format!("regex.match() invalid pattern: {}", e))?;
101
102 match re.captures(text) {
103 Some(caps) => {
104 let m = caps.get(0).unwrap();
105 Ok(ValueWord::from_some(match_to_nanboxed(&m, &caps)))
106 }
107 None => Ok(ValueWord::none()),
108 }
109 },
110 ModuleFunction {
111 description: "Find the first match of the pattern, returning a match object or none"
112 .to_string(),
113 params: vec![
114 ModuleParam {
115 name: "text".to_string(),
116 type_name: "string".to_string(),
117 required: true,
118 description: "Text to search".to_string(),
119 ..Default::default()
120 },
121 ModuleParam {
122 name: "pattern".to_string(),
123 type_name: "string".to_string(),
124 required: true,
125 description: "Regular expression pattern".to_string(),
126 ..Default::default()
127 },
128 ],
129 return_type: Some("Option<object>".to_string()),
130 },
131 );
132
133 module.add_function(
135 "find",
136 |args: &[ValueWord], _ctx: &ModuleContext| {
137 let text = args
138 .first()
139 .and_then(|a| a.as_str())
140 .ok_or_else(|| "regex.find() requires a text string argument".to_string())?;
141 let pattern = args
142 .get(1)
143 .and_then(|a| a.as_str())
144 .ok_or_else(|| "regex.find() requires a pattern string argument".to_string())?;
145 let re = regex::Regex::new(pattern)
146 .map_err(|e| format!("regex.find() invalid pattern: {}", e))?;
147 match re.captures(text) {
148 Some(caps) => {
149 let m = caps.get(0).unwrap();
150 Ok(ValueWord::from_some(match_to_nanboxed(&m, &caps)))
151 }
152 None => Ok(ValueWord::none()),
153 }
154 },
155 );
156
157 module.add_function_with_schema(
159 "match_all",
160 |args: &[ValueWord], _ctx: &ModuleContext| {
161 let text = args
162 .first()
163 .and_then(|a| a.as_str())
164 .ok_or_else(|| "regex.match_all() requires a text string argument".to_string())?;
165
166 let pattern = args.get(1).and_then(|a| a.as_str()).ok_or_else(|| {
167 "regex.match_all() requires a pattern string argument".to_string()
168 })?;
169
170 let re = regex::Regex::new(pattern)
171 .map_err(|e| format!("regex.match_all() invalid pattern: {}", e))?;
172
173 let matches: Vec<ValueWord> = re
174 .captures_iter(text)
175 .map(|caps| {
176 let m = caps.get(0).unwrap();
177 match_to_nanboxed(&m, &caps)
178 })
179 .collect();
180
181 Ok(ValueWord::from_array(Arc::new(matches)))
182 },
183 ModuleFunction {
184 description: "Find all non-overlapping matches of the pattern".to_string(),
185 params: vec![
186 ModuleParam {
187 name: "text".to_string(),
188 type_name: "string".to_string(),
189 required: true,
190 description: "Text to search".to_string(),
191 ..Default::default()
192 },
193 ModuleParam {
194 name: "pattern".to_string(),
195 type_name: "string".to_string(),
196 required: true,
197 description: "Regular expression pattern".to_string(),
198 ..Default::default()
199 },
200 ],
201 return_type: Some("Array<object>".to_string()),
202 },
203 );
204
205 module.add_function_with_schema(
207 "replace",
208 |args: &[ValueWord], _ctx: &ModuleContext| {
209 let text = args
210 .first()
211 .and_then(|a| a.as_str())
212 .ok_or_else(|| "regex.replace() requires a text string argument".to_string())?;
213
214 let pattern = args
215 .get(1)
216 .and_then(|a| a.as_str())
217 .ok_or_else(|| "regex.replace() requires a pattern string argument".to_string())?;
218
219 let replacement = args.get(2).and_then(|a| a.as_str()).ok_or_else(|| {
220 "regex.replace() requires a replacement string argument".to_string()
221 })?;
222
223 let re = regex::Regex::new(pattern)
224 .map_err(|e| format!("regex.replace() invalid pattern: {}", e))?;
225
226 let result = re.replace(text, replacement);
227 Ok(ValueWord::from_string(Arc::new(result.into_owned())))
228 },
229 ModuleFunction {
230 description: "Replace the first match of the pattern with the replacement".to_string(),
231 params: vec![
232 ModuleParam {
233 name: "text".to_string(),
234 type_name: "string".to_string(),
235 required: true,
236 description: "Text to search".to_string(),
237 ..Default::default()
238 },
239 ModuleParam {
240 name: "pattern".to_string(),
241 type_name: "string".to_string(),
242 required: true,
243 description: "Regular expression pattern".to_string(),
244 ..Default::default()
245 },
246 ModuleParam {
247 name: "replacement".to_string(),
248 type_name: "string".to_string(),
249 required: true,
250 description: "Replacement string (supports $1, $2 for capture groups)"
251 .to_string(),
252 ..Default::default()
253 },
254 ],
255 return_type: Some("string".to_string()),
256 },
257 );
258
259 module.add_function_with_schema(
261 "replace_all",
262 |args: &[ValueWord], _ctx: &ModuleContext| {
263 let text = args
264 .first()
265 .and_then(|a| a.as_str())
266 .ok_or_else(|| "regex.replace_all() requires a text string argument".to_string())?;
267
268 let pattern = args.get(1).and_then(|a| a.as_str()).ok_or_else(|| {
269 "regex.replace_all() requires a pattern string argument".to_string()
270 })?;
271
272 let replacement = args.get(2).and_then(|a| a.as_str()).ok_or_else(|| {
273 "regex.replace_all() requires a replacement string argument".to_string()
274 })?;
275
276 let re = regex::Regex::new(pattern)
277 .map_err(|e| format!("regex.replace_all() invalid pattern: {}", e))?;
278
279 let result = re.replace_all(text, replacement);
280 Ok(ValueWord::from_string(Arc::new(result.into_owned())))
281 },
282 ModuleFunction {
283 description: "Replace all matches of the pattern with the replacement".to_string(),
284 params: vec![
285 ModuleParam {
286 name: "text".to_string(),
287 type_name: "string".to_string(),
288 required: true,
289 description: "Text to search".to_string(),
290 ..Default::default()
291 },
292 ModuleParam {
293 name: "pattern".to_string(),
294 type_name: "string".to_string(),
295 required: true,
296 description: "Regular expression pattern".to_string(),
297 ..Default::default()
298 },
299 ModuleParam {
300 name: "replacement".to_string(),
301 type_name: "string".to_string(),
302 required: true,
303 description: "Replacement string (supports $1, $2 for capture groups)"
304 .to_string(),
305 ..Default::default()
306 },
307 ],
308 return_type: Some("string".to_string()),
309 },
310 );
311
312 module.add_function_with_schema(
314 "split",
315 |args: &[ValueWord], _ctx: &ModuleContext| {
316 let text = args
317 .first()
318 .and_then(|a| a.as_str())
319 .ok_or_else(|| "regex.split() requires a text string argument".to_string())?;
320
321 let pattern = args
322 .get(1)
323 .and_then(|a| a.as_str())
324 .ok_or_else(|| "regex.split() requires a pattern string argument".to_string())?;
325
326 let re = regex::Regex::new(pattern)
327 .map_err(|e| format!("regex.split() invalid pattern: {}", e))?;
328
329 let parts: Vec<ValueWord> = re
330 .split(text)
331 .map(|s| ValueWord::from_string(Arc::new(s.to_string())))
332 .collect();
333
334 Ok(ValueWord::from_array(Arc::new(parts)))
335 },
336 ModuleFunction {
337 description: "Split the text at each match of the pattern".to_string(),
338 params: vec![
339 ModuleParam {
340 name: "text".to_string(),
341 type_name: "string".to_string(),
342 required: true,
343 description: "Text to split".to_string(),
344 ..Default::default()
345 },
346 ModuleParam {
347 name: "pattern".to_string(),
348 type_name: "string".to_string(),
349 required: true,
350 description: "Regular expression pattern to split on".to_string(),
351 ..Default::default()
352 },
353 ],
354 return_type: Some("Array<string>".to_string()),
355 },
356 );
357
358 module
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364
365 fn s(val: &str) -> ValueWord {
366 ValueWord::from_string(Arc::new(val.to_string()))
367 }
368
369 fn test_ctx() -> crate::module_exports::ModuleContext<'static> {
370 let registry = Box::leak(Box::new(crate::type_schema::TypeSchemaRegistry::new()));
371 crate::module_exports::ModuleContext {
372 schemas: registry,
373 invoke_callable: None,
374 raw_invoker: None,
375 function_hashes: None,
376 vm_state: None,
377 granted_permissions: None,
378 scope_constraints: None,
379 set_pending_resume: None,
380 set_pending_frame_resume: None,
381 }
382 }
383
384 #[test]
385 fn test_regex_module_creation() {
386 let module = create_regex_module();
387 assert_eq!(module.name, "std::core::regex");
388 assert!(module.has_export("is_match"));
389 assert!(module.has_export("match"));
390 assert!(module.has_export("match_all"));
391 assert!(module.has_export("replace"));
392 assert!(module.has_export("replace_all"));
393 assert!(module.has_export("split"));
394 }
395
396 #[test]
397 fn test_is_match_true() {
398 let module = create_regex_module();
399 let ctx = test_ctx();
400 let f = module.get_export("is_match").unwrap();
401 let result = f(&[s("hello world"), s(r"\bworld\b")], &ctx).unwrap();
402 assert_eq!(result.as_bool(), Some(true));
403 }
404
405 #[test]
406 fn test_is_match_false() {
407 let module = create_regex_module();
408 let ctx = test_ctx();
409 let f = module.get_export("is_match").unwrap();
410 let result = f(&[s("hello world"), s(r"^\d+$")], &ctx).unwrap();
411 assert_eq!(result.as_bool(), Some(false));
412 }
413
414 #[test]
415 fn test_is_match_invalid_pattern() {
416 let module = create_regex_module();
417 let ctx = test_ctx();
418 let f = module.get_export("is_match").unwrap();
419 assert!(f(&[s("text"), s("[invalid")], &ctx).is_err());
420 }
421
422 #[test]
423 fn test_match_found() {
424 let module = create_regex_module();
425 let ctx = test_ctx();
426 let f = module.get_export("match").unwrap();
427 let result = f(&[s("abc 123 def"), s(r"(\d+)")], &ctx).unwrap();
428 let inner = result.as_some_inner().expect("should be Some");
430 let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
431 let text_idx = keys
433 .iter()
434 .position(|k| k.as_str() == Some("text"))
435 .unwrap();
436 assert_eq!(values[text_idx].as_str(), Some("123"));
437 }
438
439 #[test]
440 fn test_match_not_found() {
441 let module = create_regex_module();
442 let ctx = test_ctx();
443 let f = module.get_export("match").unwrap();
444 let result = f(&[s("abc def"), s(r"\d+")], &ctx).unwrap();
445 assert!(result.is_none());
446 }
447
448 #[test]
449 fn test_match_all() {
450 let module = create_regex_module();
451 let ctx = test_ctx();
452 let f = module.get_export("match_all").unwrap();
453 let result = f(&[s("a1 b2 c3"), s(r"\d")], &ctx).unwrap();
454 let arr = result.as_any_array().expect("should be array").to_generic();
455 assert_eq!(arr.len(), 3);
456 }
457
458 #[test]
459 fn test_match_all_no_matches() {
460 let module = create_regex_module();
461 let ctx = test_ctx();
462 let f = module.get_export("match_all").unwrap();
463 let result = f(&[s("abc"), s(r"\d+")], &ctx).unwrap();
464 let arr = result.as_any_array().expect("should be array").to_generic();
465 assert_eq!(arr.len(), 0);
466 }
467
468 #[test]
469 fn test_replace_first() {
470 let module = create_regex_module();
471 let ctx = test_ctx();
472 let f = module.get_export("replace").unwrap();
473 let result = f(&[s("foo bar foo"), s("foo"), s("baz")], &ctx).unwrap();
474 assert_eq!(result.as_str(), Some("baz bar foo"));
475 }
476
477 #[test]
478 fn test_replace_all() {
479 let module = create_regex_module();
480 let ctx = test_ctx();
481 let f = module.get_export("replace_all").unwrap();
482 let result = f(&[s("foo bar foo"), s("foo"), s("baz")], &ctx).unwrap();
483 assert_eq!(result.as_str(), Some("baz bar baz"));
484 }
485
486 #[test]
487 fn test_replace_with_capture_group() {
488 let module = create_regex_module();
489 let ctx = test_ctx();
490 let f = module.get_export("replace_all").unwrap();
491 let result = f(
492 &[
493 s("2024-01-15"),
494 s(r"(\d{4})-(\d{2})-(\d{2})"),
495 s("$3/$2/$1"),
496 ],
497 &ctx,
498 )
499 .unwrap();
500 assert_eq!(result.as_str(), Some("15/01/2024"));
501 }
502
503 #[test]
504 fn test_split() {
505 let module = create_regex_module();
506 let ctx = test_ctx();
507 let f = module.get_export("split").unwrap();
508 let result = f(&[s("one,two,,three"), s(",")], &ctx).unwrap();
509 let arr = result.as_any_array().expect("should be array").to_generic();
510 assert_eq!(arr.len(), 4);
511 assert_eq!(arr[0].as_str(), Some("one"));
512 assert_eq!(arr[1].as_str(), Some("two"));
513 assert_eq!(arr[2].as_str(), Some(""));
514 assert_eq!(arr[3].as_str(), Some("three"));
515 }
516
517 #[test]
518 fn test_split_by_whitespace() {
519 let module = create_regex_module();
520 let ctx = test_ctx();
521 let f = module.get_export("split").unwrap();
522 let result = f(&[s("hello world test"), s(r"\s+")], &ctx).unwrap();
523 let arr = result.as_any_array().expect("should be array").to_generic();
524 assert_eq!(arr.len(), 3);
525 assert_eq!(arr[0].as_str(), Some("hello"));
526 assert_eq!(arr[1].as_str(), Some("world"));
527 assert_eq!(arr[2].as_str(), Some("test"));
528 }
529
530 #[test]
531 fn test_regex_schemas() {
532 let module = create_regex_module();
533
534 let match_schema = module.get_schema("match").unwrap();
535 assert_eq!(match_schema.params.len(), 2);
536 assert_eq!(match_schema.return_type.as_deref(), Some("Option<object>"));
537
538 let replace_schema = module.get_schema("replace").unwrap();
539 assert_eq!(replace_schema.params.len(), 3);
540
541 let split_schema = module.get_schema("split").unwrap();
542 assert_eq!(split_schema.return_type.as_deref(), Some("Array<string>"));
543 }
544}