Skip to main content

shape_runtime/stdlib/
regex.rs

1//! Native `regex` module for regular expression operations.
2//!
3//! Exports: regex.match, regex.match_all, regex.replace, regex.replace_all,
4//!          regex.is_match, regex.split
5
6use crate::module_exports::{ModuleContext, ModuleExports, ModuleFunction, ModuleParam};
7use shape_value::ValueWord;
8use std::sync::Arc;
9
10/// Build a match result object as a ValueWord HashMap.
11/// Fields: text (string), start (number), end (number), groups (array of strings).
12fn match_to_nanboxed(m: &regex::Match, captures: &regex::Captures) -> ValueWord {
13    let mut keys = Vec::with_capacity(4);
14    let mut values = Vec::with_capacity(4);
15
16    keys.push(ValueWord::from_string(Arc::new("text".to_string())));
17    values.push(ValueWord::from_string(Arc::new(m.as_str().to_string())));
18
19    keys.push(ValueWord::from_string(Arc::new("start".to_string())));
20    values.push(ValueWord::from_f64(m.start() as f64));
21
22    keys.push(ValueWord::from_string(Arc::new("end".to_string())));
23    values.push(ValueWord::from_f64(m.end() as f64));
24
25    let groups: Vec<ValueWord> = captures
26        .iter()
27        .skip(1)
28        .map(|opt| match opt {
29            Some(g) => ValueWord::from_string(Arc::new(g.as_str().to_string())),
30            None => ValueWord::none(),
31        })
32        .collect();
33    keys.push(ValueWord::from_string(Arc::new("groups".to_string())));
34    values.push(ValueWord::from_array(Arc::new(groups)));
35
36    ValueWord::from_hashmap_pairs(keys, values)
37}
38
39/// Create the `regex` module with regular expression functions.
40pub fn create_regex_module() -> ModuleExports {
41    let mut module = ModuleExports::new("std::core::regex");
42    module.description = "Regular expression matching and replacement".to_string();
43
44    // regex.is_match(text: string, pattern: string) -> bool
45    module.add_function_with_schema(
46        "is_match",
47        |args: &[ValueWord], _ctx: &ModuleContext| {
48            let text = args
49                .first()
50                .and_then(|a| a.as_str())
51                .ok_or_else(|| "regex.is_match() requires a text string argument".to_string())?;
52
53            let pattern = args
54                .get(1)
55                .and_then(|a| a.as_str())
56                .ok_or_else(|| "regex.is_match() requires a pattern string argument".to_string())?;
57
58            let re = regex::Regex::new(pattern)
59                .map_err(|e| format!("regex.is_match() invalid pattern: {}", e))?;
60
61            Ok(ValueWord::from_bool(re.is_match(text)))
62        },
63        ModuleFunction {
64            description: "Test whether the pattern matches anywhere in the text".to_string(),
65            params: vec![
66                ModuleParam {
67                    name: "text".to_string(),
68                    type_name: "string".to_string(),
69                    required: true,
70                    description: "Text to search".to_string(),
71                    ..Default::default()
72                },
73                ModuleParam {
74                    name: "pattern".to_string(),
75                    type_name: "string".to_string(),
76                    required: true,
77                    description: "Regular expression pattern".to_string(),
78                    ..Default::default()
79                },
80            ],
81            return_type: Some("bool".to_string()),
82        },
83    );
84
85    // regex.match(text: string, pattern: string) -> Option<object>
86    module.add_function_with_schema(
87        "match",
88        |args: &[ValueWord], _ctx: &ModuleContext| {
89            let text = args
90                .first()
91                .and_then(|a| a.as_str())
92                .ok_or_else(|| "regex.match() requires a text string argument".to_string())?;
93
94            let pattern = args
95                .get(1)
96                .and_then(|a| a.as_str())
97                .ok_or_else(|| "regex.match() requires a pattern string argument".to_string())?;
98
99            let re = regex::Regex::new(pattern)
100                .map_err(|e| format!("regex.match() invalid pattern: {}", e))?;
101
102            match re.captures(text) {
103                Some(caps) => {
104                    let m = caps.get(0).unwrap();
105                    Ok(ValueWord::from_some(match_to_nanboxed(&m, &caps)))
106                }
107                None => Ok(ValueWord::none()),
108            }
109        },
110        ModuleFunction {
111            description: "Find the first match of the pattern, returning a match object or none"
112                .to_string(),
113            params: vec![
114                ModuleParam {
115                    name: "text".to_string(),
116                    type_name: "string".to_string(),
117                    required: true,
118                    description: "Text to search".to_string(),
119                    ..Default::default()
120                },
121                ModuleParam {
122                    name: "pattern".to_string(),
123                    type_name: "string".to_string(),
124                    required: true,
125                    description: "Regular expression pattern".to_string(),
126                    ..Default::default()
127                },
128            ],
129            return_type: Some("Option<object>".to_string()),
130        },
131    );
132
133    // regex.find(text, pattern) — alias for `match` (since `match` is a keyword in Shape)
134    module.add_function(
135        "find",
136        |args: &[ValueWord], _ctx: &ModuleContext| {
137            let text = args
138                .first()
139                .and_then(|a| a.as_str())
140                .ok_or_else(|| "regex.find() requires a text string argument".to_string())?;
141            let pattern = args
142                .get(1)
143                .and_then(|a| a.as_str())
144                .ok_or_else(|| "regex.find() requires a pattern string argument".to_string())?;
145            let re = regex::Regex::new(pattern)
146                .map_err(|e| format!("regex.find() invalid pattern: {}", e))?;
147            match re.captures(text) {
148                Some(caps) => {
149                    let m = caps.get(0).unwrap();
150                    Ok(ValueWord::from_some(match_to_nanboxed(&m, &caps)))
151                }
152                None => Ok(ValueWord::none()),
153            }
154        },
155    );
156
157    // regex.match_all(text: string, pattern: string) -> Array<object>
158    module.add_function_with_schema(
159        "match_all",
160        |args: &[ValueWord], _ctx: &ModuleContext| {
161            let text = args
162                .first()
163                .and_then(|a| a.as_str())
164                .ok_or_else(|| "regex.match_all() requires a text string argument".to_string())?;
165
166            let pattern = args.get(1).and_then(|a| a.as_str()).ok_or_else(|| {
167                "regex.match_all() requires a pattern string argument".to_string()
168            })?;
169
170            let re = regex::Regex::new(pattern)
171                .map_err(|e| format!("regex.match_all() invalid pattern: {}", e))?;
172
173            let matches: Vec<ValueWord> = re
174                .captures_iter(text)
175                .map(|caps| {
176                    let m = caps.get(0).unwrap();
177                    match_to_nanboxed(&m, &caps)
178                })
179                .collect();
180
181            Ok(ValueWord::from_array(Arc::new(matches)))
182        },
183        ModuleFunction {
184            description: "Find all non-overlapping matches of the pattern".to_string(),
185            params: vec![
186                ModuleParam {
187                    name: "text".to_string(),
188                    type_name: "string".to_string(),
189                    required: true,
190                    description: "Text to search".to_string(),
191                    ..Default::default()
192                },
193                ModuleParam {
194                    name: "pattern".to_string(),
195                    type_name: "string".to_string(),
196                    required: true,
197                    description: "Regular expression pattern".to_string(),
198                    ..Default::default()
199                },
200            ],
201            return_type: Some("Array<object>".to_string()),
202        },
203    );
204
205    // regex.replace(text: string, pattern: string, replacement: string) -> string
206    module.add_function_with_schema(
207        "replace",
208        |args: &[ValueWord], _ctx: &ModuleContext| {
209            let text = args
210                .first()
211                .and_then(|a| a.as_str())
212                .ok_or_else(|| "regex.replace() requires a text string argument".to_string())?;
213
214            let pattern = args
215                .get(1)
216                .and_then(|a| a.as_str())
217                .ok_or_else(|| "regex.replace() requires a pattern string argument".to_string())?;
218
219            let replacement = args.get(2).and_then(|a| a.as_str()).ok_or_else(|| {
220                "regex.replace() requires a replacement string argument".to_string()
221            })?;
222
223            let re = regex::Regex::new(pattern)
224                .map_err(|e| format!("regex.replace() invalid pattern: {}", e))?;
225
226            let result = re.replace(text, replacement);
227            Ok(ValueWord::from_string(Arc::new(result.into_owned())))
228        },
229        ModuleFunction {
230            description: "Replace the first match of the pattern with the replacement".to_string(),
231            params: vec![
232                ModuleParam {
233                    name: "text".to_string(),
234                    type_name: "string".to_string(),
235                    required: true,
236                    description: "Text to search".to_string(),
237                    ..Default::default()
238                },
239                ModuleParam {
240                    name: "pattern".to_string(),
241                    type_name: "string".to_string(),
242                    required: true,
243                    description: "Regular expression pattern".to_string(),
244                    ..Default::default()
245                },
246                ModuleParam {
247                    name: "replacement".to_string(),
248                    type_name: "string".to_string(),
249                    required: true,
250                    description: "Replacement string (supports $1, $2 for capture groups)"
251                        .to_string(),
252                    ..Default::default()
253                },
254            ],
255            return_type: Some("string".to_string()),
256        },
257    );
258
259    // regex.replace_all(text: string, pattern: string, replacement: string) -> string
260    module.add_function_with_schema(
261        "replace_all",
262        |args: &[ValueWord], _ctx: &ModuleContext| {
263            let text = args
264                .first()
265                .and_then(|a| a.as_str())
266                .ok_or_else(|| "regex.replace_all() requires a text string argument".to_string())?;
267
268            let pattern = args.get(1).and_then(|a| a.as_str()).ok_or_else(|| {
269                "regex.replace_all() requires a pattern string argument".to_string()
270            })?;
271
272            let replacement = args.get(2).and_then(|a| a.as_str()).ok_or_else(|| {
273                "regex.replace_all() requires a replacement string argument".to_string()
274            })?;
275
276            let re = regex::Regex::new(pattern)
277                .map_err(|e| format!("regex.replace_all() invalid pattern: {}", e))?;
278
279            let result = re.replace_all(text, replacement);
280            Ok(ValueWord::from_string(Arc::new(result.into_owned())))
281        },
282        ModuleFunction {
283            description: "Replace all matches of the pattern with the replacement".to_string(),
284            params: vec![
285                ModuleParam {
286                    name: "text".to_string(),
287                    type_name: "string".to_string(),
288                    required: true,
289                    description: "Text to search".to_string(),
290                    ..Default::default()
291                },
292                ModuleParam {
293                    name: "pattern".to_string(),
294                    type_name: "string".to_string(),
295                    required: true,
296                    description: "Regular expression pattern".to_string(),
297                    ..Default::default()
298                },
299                ModuleParam {
300                    name: "replacement".to_string(),
301                    type_name: "string".to_string(),
302                    required: true,
303                    description: "Replacement string (supports $1, $2 for capture groups)"
304                        .to_string(),
305                    ..Default::default()
306                },
307            ],
308            return_type: Some("string".to_string()),
309        },
310    );
311
312    // regex.split(text: string, pattern: string) -> Array<string>
313    module.add_function_with_schema(
314        "split",
315        |args: &[ValueWord], _ctx: &ModuleContext| {
316            let text = args
317                .first()
318                .and_then(|a| a.as_str())
319                .ok_or_else(|| "regex.split() requires a text string argument".to_string())?;
320
321            let pattern = args
322                .get(1)
323                .and_then(|a| a.as_str())
324                .ok_or_else(|| "regex.split() requires a pattern string argument".to_string())?;
325
326            let re = regex::Regex::new(pattern)
327                .map_err(|e| format!("regex.split() invalid pattern: {}", e))?;
328
329            let parts: Vec<ValueWord> = re
330                .split(text)
331                .map(|s| ValueWord::from_string(Arc::new(s.to_string())))
332                .collect();
333
334            Ok(ValueWord::from_array(Arc::new(parts)))
335        },
336        ModuleFunction {
337            description: "Split the text at each match of the pattern".to_string(),
338            params: vec![
339                ModuleParam {
340                    name: "text".to_string(),
341                    type_name: "string".to_string(),
342                    required: true,
343                    description: "Text to split".to_string(),
344                    ..Default::default()
345                },
346                ModuleParam {
347                    name: "pattern".to_string(),
348                    type_name: "string".to_string(),
349                    required: true,
350                    description: "Regular expression pattern to split on".to_string(),
351                    ..Default::default()
352                },
353            ],
354            return_type: Some("Array<string>".to_string()),
355        },
356    );
357
358    module
359}
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364
365    fn s(val: &str) -> ValueWord {
366        ValueWord::from_string(Arc::new(val.to_string()))
367    }
368
369    fn test_ctx() -> crate::module_exports::ModuleContext<'static> {
370        let registry = Box::leak(Box::new(crate::type_schema::TypeSchemaRegistry::new()));
371        crate::module_exports::ModuleContext {
372            schemas: registry,
373            invoke_callable: None,
374            raw_invoker: None,
375            function_hashes: None,
376            vm_state: None,
377            granted_permissions: None,
378            scope_constraints: None,
379            set_pending_resume: None,
380            set_pending_frame_resume: None,
381        }
382    }
383
384    #[test]
385    fn test_regex_module_creation() {
386        let module = create_regex_module();
387        assert_eq!(module.name, "std::core::regex");
388        assert!(module.has_export("is_match"));
389        assert!(module.has_export("match"));
390        assert!(module.has_export("match_all"));
391        assert!(module.has_export("replace"));
392        assert!(module.has_export("replace_all"));
393        assert!(module.has_export("split"));
394    }
395
396    #[test]
397    fn test_is_match_true() {
398        let module = create_regex_module();
399        let ctx = test_ctx();
400        let f = module.get_export("is_match").unwrap();
401        let result = f(&[s("hello world"), s(r"\bworld\b")], &ctx).unwrap();
402        assert_eq!(result.as_bool(), Some(true));
403    }
404
405    #[test]
406    fn test_is_match_false() {
407        let module = create_regex_module();
408        let ctx = test_ctx();
409        let f = module.get_export("is_match").unwrap();
410        let result = f(&[s("hello world"), s(r"^\d+$")], &ctx).unwrap();
411        assert_eq!(result.as_bool(), Some(false));
412    }
413
414    #[test]
415    fn test_is_match_invalid_pattern() {
416        let module = create_regex_module();
417        let ctx = test_ctx();
418        let f = module.get_export("is_match").unwrap();
419        assert!(f(&[s("text"), s("[invalid")], &ctx).is_err());
420    }
421
422    #[test]
423    fn test_match_found() {
424        let module = create_regex_module();
425        let ctx = test_ctx();
426        let f = module.get_export("match").unwrap();
427        let result = f(&[s("abc 123 def"), s(r"(\d+)")], &ctx).unwrap();
428        // Should be Some(match_object)
429        let inner = result.as_some_inner().expect("should be Some");
430        let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
431        // Find "text" field
432        let text_idx = keys
433            .iter()
434            .position(|k| k.as_str() == Some("text"))
435            .unwrap();
436        assert_eq!(values[text_idx].as_str(), Some("123"));
437    }
438
439    #[test]
440    fn test_match_not_found() {
441        let module = create_regex_module();
442        let ctx = test_ctx();
443        let f = module.get_export("match").unwrap();
444        let result = f(&[s("abc def"), s(r"\d+")], &ctx).unwrap();
445        assert!(result.is_none());
446    }
447
448    #[test]
449    fn test_match_all() {
450        let module = create_regex_module();
451        let ctx = test_ctx();
452        let f = module.get_export("match_all").unwrap();
453        let result = f(&[s("a1 b2 c3"), s(r"\d")], &ctx).unwrap();
454        let arr = result.as_any_array().expect("should be array").to_generic();
455        assert_eq!(arr.len(), 3);
456    }
457
458    #[test]
459    fn test_match_all_no_matches() {
460        let module = create_regex_module();
461        let ctx = test_ctx();
462        let f = module.get_export("match_all").unwrap();
463        let result = f(&[s("abc"), s(r"\d+")], &ctx).unwrap();
464        let arr = result.as_any_array().expect("should be array").to_generic();
465        assert_eq!(arr.len(), 0);
466    }
467
468    #[test]
469    fn test_replace_first() {
470        let module = create_regex_module();
471        let ctx = test_ctx();
472        let f = module.get_export("replace").unwrap();
473        let result = f(&[s("foo bar foo"), s("foo"), s("baz")], &ctx).unwrap();
474        assert_eq!(result.as_str(), Some("baz bar foo"));
475    }
476
477    #[test]
478    fn test_replace_all() {
479        let module = create_regex_module();
480        let ctx = test_ctx();
481        let f = module.get_export("replace_all").unwrap();
482        let result = f(&[s("foo bar foo"), s("foo"), s("baz")], &ctx).unwrap();
483        assert_eq!(result.as_str(), Some("baz bar baz"));
484    }
485
486    #[test]
487    fn test_replace_with_capture_group() {
488        let module = create_regex_module();
489        let ctx = test_ctx();
490        let f = module.get_export("replace_all").unwrap();
491        let result = f(
492            &[
493                s("2024-01-15"),
494                s(r"(\d{4})-(\d{2})-(\d{2})"),
495                s("$3/$2/$1"),
496            ],
497            &ctx,
498        )
499        .unwrap();
500        assert_eq!(result.as_str(), Some("15/01/2024"));
501    }
502
503    #[test]
504    fn test_split() {
505        let module = create_regex_module();
506        let ctx = test_ctx();
507        let f = module.get_export("split").unwrap();
508        let result = f(&[s("one,two,,three"), s(",")], &ctx).unwrap();
509        let arr = result.as_any_array().expect("should be array").to_generic();
510        assert_eq!(arr.len(), 4);
511        assert_eq!(arr[0].as_str(), Some("one"));
512        assert_eq!(arr[1].as_str(), Some("two"));
513        assert_eq!(arr[2].as_str(), Some(""));
514        assert_eq!(arr[3].as_str(), Some("three"));
515    }
516
517    #[test]
518    fn test_split_by_whitespace() {
519        let module = create_regex_module();
520        let ctx = test_ctx();
521        let f = module.get_export("split").unwrap();
522        let result = f(&[s("hello   world  test"), s(r"\s+")], &ctx).unwrap();
523        let arr = result.as_any_array().expect("should be array").to_generic();
524        assert_eq!(arr.len(), 3);
525        assert_eq!(arr[0].as_str(), Some("hello"));
526        assert_eq!(arr[1].as_str(), Some("world"));
527        assert_eq!(arr[2].as_str(), Some("test"));
528    }
529
530    #[test]
531    fn test_regex_schemas() {
532        let module = create_regex_module();
533
534        let match_schema = module.get_schema("match").unwrap();
535        assert_eq!(match_schema.params.len(), 2);
536        assert_eq!(match_schema.return_type.as_deref(), Some("Option<object>"));
537
538        let replace_schema = module.get_schema("replace").unwrap();
539        assert_eq!(replace_schema.params.len(), 3);
540
541        let split_schema = module.get_schema("split").unwrap();
542        assert_eq!(split_schema.return_type.as_deref(), Some("Array<string>"));
543    }
544}