Skip to main content

jpx_core/extensions/
regex_fns.rs

1//! Regular expression functions.
2
3use std::collections::HashSet;
4
5use serde_json::Value;
6
7use crate::functions::Function;
8use crate::functions::custom_error;
9use crate::interpreter::SearchResult;
10use crate::registry::register_if_enabled;
11use crate::{Context, Runtime, arg, defn};
12
13use regex::Regex;
14
15/// Register regex functions with the runtime, filtered by the enabled set.
16pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
17    register_if_enabled(
18        runtime,
19        "regex_match",
20        enabled,
21        Box::new(RegexMatchFn::new()),
22    );
23    register_if_enabled(
24        runtime,
25        "regex_extract",
26        enabled,
27        Box::new(RegexExtractFn::new()),
28    );
29    register_if_enabled(
30        runtime,
31        "regex_replace",
32        enabled,
33        Box::new(RegexReplaceFn::new()),
34    );
35}
36
37// =============================================================================
38// regex_match(string, pattern) -> boolean
39// =============================================================================
40
41defn!(RegexMatchFn, vec![arg!(string), arg!(string)], None);
42
43impl Function for RegexMatchFn {
44    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
45        self.signature.validate(args, ctx)?;
46
47        // Safe to unwrap after signature validation
48        let input = args[0].as_str().unwrap();
49        let pattern = args[1].as_str().unwrap();
50
51        let re = Regex::new(pattern)
52            .map_err(|e| custom_error(ctx, &format!("Invalid regex pattern: {e}")))?;
53
54        Ok(Value::Bool(re.is_match(input)))
55    }
56}
57
58// =============================================================================
59// regex_extract(string, pattern) -> array of matches
60// =============================================================================
61
62defn!(RegexExtractFn, vec![arg!(string), arg!(string)], None);
63
64impl Function for RegexExtractFn {
65    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
66        self.signature.validate(args, ctx)?;
67
68        // Safe to unwrap after signature validation
69        let input = args[0].as_str().unwrap();
70        let pattern = args[1].as_str().unwrap();
71
72        let re = Regex::new(pattern)
73            .map_err(|e| custom_error(ctx, &format!("Invalid regex pattern: {e}")))?;
74
75        let matches: Vec<Value> = re
76            .find_iter(input)
77            .map(|m| Value::String(m.as_str().to_string()))
78            .collect();
79
80        // Return null if no matches found
81        if matches.is_empty() {
82            Ok(Value::Null)
83        } else {
84            Ok(Value::Array(matches))
85        }
86    }
87}
88
89// =============================================================================
90// regex_replace(string, pattern, replacement) -> string
91// =============================================================================
92
93defn!(
94    RegexReplaceFn,
95    vec![arg!(string), arg!(string), arg!(string)],
96    None
97);
98
99impl Function for RegexReplaceFn {
100    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
101        self.signature.validate(args, ctx)?;
102
103        // Safe to unwrap after signature validation
104        let input = args[0].as_str().unwrap();
105        let pattern = args[1].as_str().unwrap();
106        let replacement = args[2].as_str().unwrap();
107
108        let re = Regex::new(pattern)
109            .map_err(|e| custom_error(ctx, &format!("Invalid regex pattern: {e}")))?;
110
111        let result = re.replace_all(input, replacement);
112        Ok(Value::String(result.into_owned()))
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use crate::Runtime;
119    use serde_json::json;
120
121    fn setup_runtime() -> Runtime {
122        Runtime::builder()
123            .with_standard()
124            .with_all_extensions()
125            .build()
126    }
127
128    #[test]
129    fn test_regex_match() {
130        let runtime = setup_runtime();
131        let expr = runtime.compile("regex_match(@, '^hello')").unwrap();
132
133        let data = json!("hello world");
134        let result = expr.search(&data).unwrap();
135        assert_eq!(result, json!(true));
136
137        let data = json!("world hello");
138        let result = expr.search(&data).unwrap();
139        assert_eq!(result, json!(false));
140    }
141
142    #[test]
143    fn test_regex_extract() {
144        let runtime = setup_runtime();
145        let expr = runtime.compile("regex_extract(@, '[0-9]+')").unwrap();
146        let data = json!("abc123def456");
147        let result = expr.search(&data).unwrap();
148        let arr = result.as_array().unwrap();
149        assert_eq!(arr.len(), 2);
150        assert_eq!(arr[0].as_str().unwrap(), "123");
151        assert_eq!(arr[1].as_str().unwrap(), "456");
152    }
153
154    #[test]
155    fn test_regex_replace() {
156        let runtime = setup_runtime();
157        let expr = runtime.compile("regex_replace(@, '[0-9]+', 'X')").unwrap();
158        let data = json!("abc123def456");
159        let result = expr.search(&data).unwrap();
160        assert_eq!(result.as_str().unwrap(), "abcXdefX");
161    }
162}