Skip to main content

shape_runtime/stdlib/
regex.rs

1//! Native `regex` module for regular expression operations.
2//!
3//! Exports: regex.match, regex.match_all, regex.find, regex.replace,
4//!          regex.replace_all, regex.is_match, regex.split
5//!
6//! Phase 2c migration: ported to the typed marshal layer.
7//! Phase 2d Cluster #4 (2026-05-07): regex.match and regex.find activated
8//! using the new `TypedReturn::SomeObjectPairs` variant.
9
10use crate::marshal::{register_typed_fn_2, register_typed_fn_3};
11use crate::module_exports::ModuleExports;
12use crate::typed_module_exports::{ConcreteReturn, ConcreteType, TypedReturn};
13use std::sync::Arc;
14
15/// Build a match-result row as a typed `(name, ConcreteReturn)` pair list.
16/// Fields: text (string), start (int), end (int), groups (Array<string>).
17fn match_to_pairs(m: &regex::Match, captures: &regex::Captures) -> Vec<(String, ConcreteReturn)> {
18    let groups: Vec<String> = captures
19        .iter()
20        .skip(1)
21        .map(|opt| match opt {
22            Some(g) => g.as_str().to_string(),
23            None => String::new(),
24        })
25        .collect();
26    vec![
27        ("text".to_string(), ConcreteReturn::String(m.as_str().to_string())),
28        ("start".to_string(), ConcreteReturn::I64(m.start() as i64)),
29        ("end".to_string(), ConcreteReturn::I64(m.end() as i64)),
30        ("groups".to_string(), ConcreteReturn::ArrayString(groups)),
31    ]
32}
33
34/// Create the `regex` module with regular expression functions.
35pub fn create_regex_module() -> ModuleExports {
36    let mut module = ModuleExports::new("std::core::regex");
37    module.description = "Regular expression matching and replacement".to_string();
38
39    // regex.match(text: string, pattern: string) -> Option<{text, start, end, groups}>
40    register_typed_fn_2::<_, Arc<String>, Arc<String>>(
41        &mut module,
42        "match",
43        "Find the first match of the pattern, returning Some({text, start, end, groups}) or None",
44        [("text", "string"), ("pattern", "string")],
45        ConcreteType::Option(Box::new(ConcreteType::Object)),
46        |text, pattern, _ctx| {
47            let re = regex::Regex::new(pattern.as_str())
48                .map_err(|e| format!("regex.match() invalid pattern: {}", e))?;
49            match re.captures(text.as_str()) {
50                Some(caps) => {
51                    let m = caps.get(0).unwrap();
52                    Ok(TypedReturn::SomeObjectPairs(match_to_pairs(&m, &caps)))
53                }
54                None => Ok(TypedReturn::None),
55            }
56        },
57    );
58
59    // regex.find(text: string, pattern: string) -> Option<{text, start, end, groups}>
60    //
61    // Same shape as regex.match — kept as a separate name for the
62    // historical "find first match" idiom in Shape user code.
63    register_typed_fn_2::<_, Arc<String>, Arc<String>>(
64        &mut module,
65        "find",
66        "Find the first match of the pattern (alias for regex.match)",
67        [("text", "string"), ("pattern", "string")],
68        ConcreteType::Option(Box::new(ConcreteType::Object)),
69        |text, pattern, _ctx| {
70            let re = regex::Regex::new(pattern.as_str())
71                .map_err(|e| format!("regex.find() invalid pattern: {}", e))?;
72            match re.captures(text.as_str()) {
73                Some(caps) => {
74                    let m = caps.get(0).unwrap();
75                    Ok(TypedReturn::SomeObjectPairs(match_to_pairs(&m, &caps)))
76                }
77                None => Ok(TypedReturn::None),
78            }
79        },
80    );
81
82    // regex.is_match(text: string, pattern: string) -> bool
83    register_typed_fn_2::<_, Arc<String>, Arc<String>>(
84        &mut module,
85        "is_match",
86        "Test whether the pattern matches anywhere in the text",
87        [("text", "string"), ("pattern", "string")],
88        ConcreteType::Bool,
89        |text, pattern, _ctx| {
90            let re = regex::Regex::new(pattern.as_str())
91                .map_err(|e| format!("regex.is_match() invalid pattern: {}", e))?;
92            Ok(TypedReturn::Concrete(ConcreteReturn::Bool(re.is_match(text.as_str()))))
93        },
94    );
95
96    // regex.match_all(text: string, pattern: string) -> Array<object>
97    register_typed_fn_2::<_, Arc<String>, Arc<String>>(
98        &mut module,
99        "match_all",
100        "Find all non-overlapping matches of the pattern",
101        [("text", "string"), ("pattern", "string")],
102        ConcreteType::ArrayObject("Array<object>".to_string()),
103        |text, pattern, _ctx| {
104            let re = regex::Regex::new(pattern.as_str())
105                .map_err(|e| format!("regex.match_all() invalid pattern: {}", e))?;
106            let matches: Vec<Vec<(String, ConcreteReturn)>> = re
107                .captures_iter(text.as_str())
108                .map(|caps| {
109                    let m = caps.get(0).unwrap();
110                    match_to_pairs(&m, &caps)
111                })
112                .collect();
113            Ok(TypedReturn::ArrayObjectPairs(matches))
114        },
115    );
116
117    // regex.replace(text: string, pattern: string, replacement: string) -> string
118    register_typed_fn_3::<_, Arc<String>, Arc<String>, Arc<String>>(
119        &mut module,
120        "replace",
121        "Replace the first match of the pattern with the replacement",
122        [("text", "string"), ("pattern", "string"), ("replacement", "string")],
123        ConcreteType::String,
124        |text, pattern, replacement, _ctx| {
125            let re = regex::Regex::new(pattern.as_str())
126                .map_err(|e| format!("regex.replace() invalid pattern: {}", e))?;
127            let result = re.replace(text.as_str(), replacement.as_str());
128            Ok(TypedReturn::Concrete(ConcreteReturn::String(result.into_owned())))
129        },
130    );
131
132    // regex.replace_all(text: string, pattern: string, replacement: string) -> string
133    register_typed_fn_3::<_, Arc<String>, Arc<String>, Arc<String>>(
134        &mut module,
135        "replace_all",
136        "Replace all matches of the pattern with the replacement",
137        [("text", "string"), ("pattern", "string"), ("replacement", "string")],
138        ConcreteType::String,
139        |text, pattern, replacement, _ctx| {
140            let re = regex::Regex::new(pattern.as_str())
141                .map_err(|e| format!("regex.replace_all() invalid pattern: {}", e))?;
142            let result = re.replace_all(text.as_str(), replacement.as_str());
143            Ok(TypedReturn::Concrete(ConcreteReturn::String(result.into_owned())))
144        },
145    );
146
147    // regex.split(text: string, pattern: string) -> Array<string>
148    register_typed_fn_2::<_, Arc<String>, Arc<String>>(
149        &mut module,
150        "split",
151        "Split the text at each match of the pattern",
152        [("text", "string"), ("pattern", "string")],
153        ConcreteType::ArrayString,
154        |text, pattern, _ctx| {
155            let re = regex::Regex::new(pattern.as_str())
156                .map_err(|e| format!("regex.split() invalid pattern: {}", e))?;
157            let parts: Vec<String> = re.split(text.as_str()).map(|s| s.to_string()).collect();
158            Ok(TypedReturn::Concrete(ConcreteReturn::ArrayString(parts)))
159        },
160    );
161
162    module
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    #[test]
170    fn test_regex_module_creation() {
171        let module = create_regex_module();
172        assert_eq!(module.name, "std::core::regex");
173        assert!(module.has_export("match"));
174        assert!(module.has_export("find"));
175        assert!(module.has_export("is_match"));
176        assert!(module.has_export("match_all"));
177        assert!(module.has_export("replace"));
178        assert!(module.has_export("replace_all"));
179        assert!(module.has_export("split"));
180    }
181
182    #[test]
183    fn test_regex_schemas() {
184        let module = create_regex_module();
185        let split_schema = module.get_schema("split").unwrap();
186        assert_eq!(split_schema.return_type.as_deref(), Some("Array<string>"));
187    }
188
189    // Behavioural tests removed — they used `module.invoke_export(&[ValueWord::...])`
190    // which is the deleted dynamic-dispatch entry point. End-to-end coverage
191    // belongs in `shape-test`'s integration suite.
192}