jmespath_extensions 0.9.0

Extended functions for JMESPath queries - 400+ functions for strings, arrays, dates, hashing, encoding, geo, and more
Documentation
//! Regular expression functions.
//!
//! This module provides regex_fns functions for JMESPath queries.
//!
//! For complete function reference with signatures and examples, see the
//! [`functions`](crate::functions) module documentation or use `jpx --list-category regex_fns`.
//!
//! # Example
//!
//! ```rust
//! use jmespath::{Runtime, Variable};
//! use jmespath_extensions::regex_fns;
//!
//! let mut runtime = Runtime::new();
//! runtime.register_builtin_functions();
//! regex_fns::register(&mut runtime);
//! ```

use std::collections::HashSet;
use std::rc::Rc;

use crate::common::{
    ArgumentType, Context, Function, JmespathError, Rcvar, Runtime, Variable, custom_error,
};
use crate::define_function;
use crate::register_if_enabled;

use regex::Regex;

/// Register all regex functions with the runtime.
pub fn register(runtime: &mut Runtime) {
    runtime.register_function("regex_match", Box::new(RegexMatchFn::new()));
    runtime.register_function("regex_extract", Box::new(RegexExtractFn::new()));
    runtime.register_function("regex_replace", Box::new(RegexReplaceFn::new()));
}

/// Register only the regex functions that are in the enabled set.
pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
    register_if_enabled!(
        runtime,
        enabled,
        "regex_match",
        Box::new(RegexMatchFn::new())
    );
    register_if_enabled!(
        runtime,
        enabled,
        "regex_extract",
        Box::new(RegexExtractFn::new())
    );
    register_if_enabled!(
        runtime,
        enabled,
        "regex_replace",
        Box::new(RegexReplaceFn::new())
    );
}

// =============================================================================
// regex_match(string, pattern) -> boolean
// =============================================================================

define_function!(
    RegexMatchFn,
    vec![ArgumentType::String, ArgumentType::String],
    None
);

impl Function for RegexMatchFn {
    fn evaluate(&self, args: &[Rcvar], ctx: &mut Context<'_>) -> Result<Rcvar, JmespathError> {
        self.signature.validate(args, ctx)?;

        // Safe to unwrap after signature validation
        let input = args[0].as_string().unwrap();
        let pattern = args[1].as_string().unwrap();

        let re = Regex::new(pattern)
            .map_err(|e| custom_error(ctx, &format!("Invalid regex pattern: {e}")))?;

        Ok(Rc::new(Variable::Bool(re.is_match(input))))
    }
}

// =============================================================================
// regex_extract(string, pattern) -> array of matches
// =============================================================================

define_function!(
    RegexExtractFn,
    vec![ArgumentType::String, ArgumentType::String],
    None
);

impl Function for RegexExtractFn {
    fn evaluate(&self, args: &[Rcvar], ctx: &mut Context<'_>) -> Result<Rcvar, JmespathError> {
        self.signature.validate(args, ctx)?;

        // Safe to unwrap after signature validation
        let input = args[0].as_string().unwrap();
        let pattern = args[1].as_string().unwrap();

        let re = Regex::new(pattern)
            .map_err(|e| custom_error(ctx, &format!("Invalid regex pattern: {e}")))?;

        let matches: Vec<Rcvar> = re
            .find_iter(input)
            .map(|m| Rc::new(Variable::String(m.as_str().to_string())) as Rcvar)
            .collect();

        // Return null if no matches found
        if matches.is_empty() {
            Ok(Rc::new(Variable::Null))
        } else {
            Ok(Rc::new(Variable::Array(matches)))
        }
    }
}

// =============================================================================
// regex_replace(string, pattern, replacement) -> string
// =============================================================================

define_function!(
    RegexReplaceFn,
    vec![
        ArgumentType::String,
        ArgumentType::String,
        ArgumentType::String
    ],
    None
);

impl Function for RegexReplaceFn {
    fn evaluate(&self, args: &[Rcvar], ctx: &mut Context<'_>) -> Result<Rcvar, JmespathError> {
        self.signature.validate(args, ctx)?;

        // Safe to unwrap after signature validation
        let input = args[0].as_string().unwrap();
        let pattern = args[1].as_string().unwrap();
        let replacement = args[2].as_string().unwrap();

        let re = Regex::new(pattern)
            .map_err(|e| custom_error(ctx, &format!("Invalid regex pattern: {e}")))?;

        let result = re.replace_all(input, replacement);
        Ok(Rc::new(Variable::String(result.into_owned())))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use jmespath::Runtime;

    fn setup_runtime() -> Runtime {
        let mut runtime = Runtime::new();
        runtime.register_builtin_functions();
        register(&mut runtime);
        runtime
    }

    #[test]
    fn test_regex_match() {
        let runtime = setup_runtime();
        let expr = runtime.compile("regex_match(@, '^hello')").unwrap();

        let data = Variable::String("hello world".to_string());
        let result = expr.search(&data).unwrap();
        assert!(result.as_boolean().unwrap());

        let data = Variable::String("world hello".to_string());
        let result = expr.search(&data).unwrap();
        assert!(!result.as_boolean().unwrap());
    }

    #[test]
    fn test_regex_extract() {
        let runtime = setup_runtime();
        let expr = runtime.compile("regex_extract(@, '[0-9]+')").unwrap();
        let data = Variable::String("abc123def456".to_string());
        let result = expr.search(&data).unwrap();
        let arr = result.as_array().unwrap();
        assert_eq!(arr.len(), 2);
        assert_eq!(arr[0].as_string().unwrap(), "123");
        assert_eq!(arr[1].as_string().unwrap(), "456");
    }

    #[test]
    fn test_regex_replace() {
        let runtime = setup_runtime();
        let expr = runtime.compile("regex_replace(@, '[0-9]+', 'X')").unwrap();
        let data = Variable::String("abc123def456".to_string());
        let result = expr.search(&data).unwrap();
        assert_eq!(result.as_string().unwrap(), "abcXdefX");
    }
}