ferray_strings/
regex_ops.rs1use ferray_core::Array;
6use ferray_core::dimension::{Dimension, Ix1};
7use ferray_core::error::{FerrayError, FerrayResult};
8use regex::Regex;
9
10use crate::string_array::{StringArray, StringArray1};
11
12pub fn match_<D: Dimension>(a: &StringArray<D>, pattern: &str) -> FerrayResult<Array<bool, D>> {
21 let re = Regex::new(pattern)
22 .map_err(|e| FerrayError::invalid_value(format!("invalid regex pattern: {e}")))?;
23
24 let data: Vec<bool> = a.map_to_vec(|s| re.is_match(s));
25 Array::from_vec(a.dim().clone(), data)
26}
27
28pub fn extract<D: Dimension>(a: &StringArray<D>, pattern: &str) -> FerrayResult<StringArray1> {
40 let re = Regex::new(pattern)
41 .map_err(|e| FerrayError::invalid_value(format!("invalid regex pattern: {e}")))?;
42
43 let data: Vec<String> = a
44 .iter()
45 .map(|s| {
46 re.captures(s)
47 .and_then(|caps| caps.get(1))
48 .map(|m| m.as_str().to_string())
49 .unwrap_or_default()
50 })
51 .collect();
52
53 let dim = Ix1::new([data.len()]);
54 StringArray1::from_vec(dim, data)
55}
56
57#[cfg(test)]
58mod tests {
59 use super::*;
60 use crate::string_array::array;
61
62 #[test]
63 fn test_match_basic() {
64 let a = array(&["hello123", "world", "foo42"]).unwrap();
65 let result = match_(&a, r"\d+").unwrap();
66 let data = result.as_slice().unwrap();
67 assert_eq!(data, &[true, false, true]);
68 }
69
70 #[test]
71 fn test_match_full_pattern() {
72 let a = array(&["abc", "def", "abcdef"]).unwrap();
73 let result = match_(&a, r"^abc$").unwrap();
74 let data = result.as_slice().unwrap();
75 assert_eq!(data, &[true, false, false]);
76 }
77
78 #[test]
79 fn test_match_invalid_regex() {
80 let a = array(&["hello"]).unwrap();
81 let result = match_(&a, r"[invalid");
82 assert!(result.is_err());
83 }
84
85 #[test]
86 fn test_extract_capture_group() {
87 let a = array(&["hello123world", "foo42bar", "nodigits"]).unwrap();
88 let result = extract(&a, r"(\d+)").unwrap();
89 assert_eq!(result.as_slice(), &["123", "42", ""]);
90 }
91
92 #[test]
93 fn test_extract_named_group() {
94 let a = array(&["user:alice", "user:bob", "invalid"]).unwrap();
95 let result = extract(&a, r"user:(\w+)").unwrap();
96 assert_eq!(result.as_slice(), &["alice", "bob", ""]);
97 }
98
99 #[test]
100 fn test_extract_no_match() {
101 let a = array(&["no match here"]).unwrap();
102 let result = extract(&a, r"(\d+)").unwrap();
103 assert_eq!(result.as_slice(), &[""]);
104 }
105
106 #[test]
107 fn test_extract_invalid_regex() {
108 let a = array(&["hello"]).unwrap();
109 let result = extract(&a, r"[invalid");
110 assert!(result.is_err());
111 }
112
113 #[test]
114 fn test_match_and_extract_ac5() {
115 let a = array(&["abc123", "def", "ghi456"]).unwrap();
117
118 let matched = match_(&a, r"\d+").unwrap();
119 let matched_data = matched.as_slice().unwrap();
120 assert_eq!(matched_data, &[true, false, true]);
121
122 let extracted = extract(&a, r"([a-z]+)(\d+)").unwrap();
123 assert_eq!(extracted.as_slice(), &["abc", "", "ghi"]);
125 }
126
127 #[test]
128 fn test_extract_empty_string() {
129 let a = array(&["", "abc"]).unwrap();
130 let result = extract(&a, r"(abc)").unwrap();
131 assert_eq!(result.as_slice(), &["", "abc"]);
132 }
133}