use ferray_core::Array;
use ferray_core::dimension::{Dimension, Ix1};
use ferray_core::error::{FerrayError, FerrayResult};
use regex::Regex;
use crate::string_array::{StringArray, StringArray1};
pub fn match_<D: Dimension>(a: &StringArray<D>, pattern: &str) -> FerrayResult<Array<bool, D>> {
let re = Regex::new(pattern)
.map_err(|e| FerrayError::invalid_value(format!("invalid regex pattern: {e}")))?;
match_compiled(a, &re)
}
pub fn match_compiled<D: Dimension>(
a: &StringArray<D>,
re: &Regex,
) -> FerrayResult<Array<bool, D>> {
let data: Vec<bool> = a.map_to_vec(|s| re.is_match(s));
Array::from_vec(a.dim().clone(), data)
}
pub fn extract<D: Dimension>(a: &StringArray<D>, pattern: &str) -> FerrayResult<StringArray1> {
let re = Regex::new(pattern)
.map_err(|e| FerrayError::invalid_value(format!("invalid regex pattern: {e}")))?;
extract_compiled(a, &re)
}
pub fn extract_compiled<D: Dimension>(
a: &StringArray<D>,
re: &Regex,
) -> FerrayResult<StringArray1> {
let data: Vec<String> = a
.iter()
.map(|s| {
re.captures(s)
.and_then(|caps| caps.get(1))
.map(|m| m.as_str().to_string())
.unwrap_or_default()
})
.collect();
let dim = Ix1::new([data.len()]);
StringArray1::from_vec(dim, data)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::string_array::array;
#[test]
fn test_match_basic() {
let a = array(&["hello123", "world", "foo42"]).unwrap();
let result = match_(&a, r"\d+").unwrap();
let data = result.as_slice().unwrap();
assert_eq!(data, &[true, false, true]);
}
#[test]
fn test_match_full_pattern() {
let a = array(&["abc", "def", "abcdef"]).unwrap();
let result = match_(&a, r"^abc$").unwrap();
let data = result.as_slice().unwrap();
assert_eq!(data, &[true, false, false]);
}
#[test]
fn test_match_invalid_regex() {
let a = array(&["hello"]).unwrap();
let result = match_(&a, r"[invalid");
assert!(result.is_err());
}
#[test]
fn test_extract_capture_group() {
let a = array(&["hello123world", "foo42bar", "nodigits"]).unwrap();
let result = extract(&a, r"(\d+)").unwrap();
assert_eq!(result.as_slice(), &["123", "42", ""]);
}
#[test]
fn test_extract_named_group() {
let a = array(&["user:alice", "user:bob", "invalid"]).unwrap();
let result = extract(&a, r"user:(\w+)").unwrap();
assert_eq!(result.as_slice(), &["alice", "bob", ""]);
}
#[test]
fn test_extract_no_match() {
let a = array(&["no match here"]).unwrap();
let result = extract(&a, r"(\d+)").unwrap();
assert_eq!(result.as_slice(), &[""]);
}
#[test]
fn test_extract_invalid_regex() {
let a = array(&["hello"]).unwrap();
let result = extract(&a, r"[invalid");
assert!(result.is_err());
}
#[test]
fn test_match_and_extract_ac5() {
let a = array(&["abc123", "def", "ghi456"]).unwrap();
let matched = match_(&a, r"\d+").unwrap();
let matched_data = matched.as_slice().unwrap();
assert_eq!(matched_data, &[true, false, true]);
let extracted = extract(&a, r"([a-z]+)(\d+)").unwrap();
assert_eq!(extracted.as_slice(), &["abc", "", "ghi"]);
}
#[test]
fn test_extract_empty_string() {
let a = array(&["", "abc"]).unwrap();
let result = extract(&a, r"(abc)").unwrap();
assert_eq!(result.as_slice(), &["", "abc"]);
}
}