ferray_strings/
regex_ops.rs1use ferray_core::Array;
6use ferray_core::dimension::{Dimension, Ix1};
7use ferray_core::error::{FerrayError, FerrayResult};
8use regex::Regex;
9
10use crate::string_array::{StringArray, StringArray1};
11
12pub fn match_<D: Dimension>(a: &StringArray<D>, pattern: &str) -> FerrayResult<Array<bool, D>> {
26 let re = Regex::new(pattern)
27 .map_err(|e| FerrayError::invalid_value(format!("invalid regex pattern: {e}")))?;
28 match_compiled(a, &re)
29}
30
31pub fn match_compiled<D: Dimension>(
36 a: &StringArray<D>,
37 re: &Regex,
38) -> FerrayResult<Array<bool, D>> {
39 let data: Vec<bool> = a.map_to_vec(|s| re.is_match(s));
40 Array::from_vec(a.dim().clone(), data)
41}
42
43pub fn extract<D: Dimension>(a: &StringArray<D>, pattern: &str) -> FerrayResult<StringArray1> {
58 let re = Regex::new(pattern)
59 .map_err(|e| FerrayError::invalid_value(format!("invalid regex pattern: {e}")))?;
60 extract_compiled(a, &re)
61}
62
63pub fn extract_compiled<D: Dimension>(
65 a: &StringArray<D>,
66 re: &Regex,
67) -> FerrayResult<StringArray1> {
68 let data: Vec<String> = a
69 .iter()
70 .map(|s| {
71 re.captures(s)
72 .and_then(|caps| caps.get(1))
73 .map(|m| m.as_str().to_string())
74 .unwrap_or_default()
75 })
76 .collect();
77
78 let dim = Ix1::new([data.len()]);
79 StringArray1::from_vec(dim, data)
80}
81
82#[cfg(test)]
83mod tests {
84 use super::*;
85 use crate::string_array::array;
86
87 #[test]
88 fn test_match_basic() {
89 let a = array(&["hello123", "world", "foo42"]).unwrap();
90 let result = match_(&a, r"\d+").unwrap();
91 let data = result.as_slice().unwrap();
92 assert_eq!(data, &[true, false, true]);
93 }
94
95 #[test]
96 fn test_match_full_pattern() {
97 let a = array(&["abc", "def", "abcdef"]).unwrap();
98 let result = match_(&a, r"^abc$").unwrap();
99 let data = result.as_slice().unwrap();
100 assert_eq!(data, &[true, false, false]);
101 }
102
103 #[test]
104 fn test_match_invalid_regex() {
105 let a = array(&["hello"]).unwrap();
106 let result = match_(&a, r"[invalid");
107 assert!(result.is_err());
108 }
109
110 #[test]
111 fn test_extract_capture_group() {
112 let a = array(&["hello123world", "foo42bar", "nodigits"]).unwrap();
113 let result = extract(&a, r"(\d+)").unwrap();
114 assert_eq!(result.as_slice(), &["123", "42", ""]);
115 }
116
117 #[test]
118 fn test_extract_named_group() {
119 let a = array(&["user:alice", "user:bob", "invalid"]).unwrap();
120 let result = extract(&a, r"user:(\w+)").unwrap();
121 assert_eq!(result.as_slice(), &["alice", "bob", ""]);
122 }
123
124 #[test]
125 fn test_extract_no_match() {
126 let a = array(&["no match here"]).unwrap();
127 let result = extract(&a, r"(\d+)").unwrap();
128 assert_eq!(result.as_slice(), &[""]);
129 }
130
131 #[test]
132 fn test_extract_invalid_regex() {
133 let a = array(&["hello"]).unwrap();
134 let result = extract(&a, r"[invalid");
135 assert!(result.is_err());
136 }
137
138 #[test]
139 fn test_match_and_extract_ac5() {
140 let a = array(&["abc123", "def", "ghi456"]).unwrap();
142
143 let matched = match_(&a, r"\d+").unwrap();
144 let matched_data = matched.as_slice().unwrap();
145 assert_eq!(matched_data, &[true, false, true]);
146
147 let extracted = extract(&a, r"([a-z]+)(\d+)").unwrap();
148 assert_eq!(extracted.as_slice(), &["abc", "", "ghi"]);
150 }
151
152 #[test]
153 fn test_extract_empty_string() {
154 let a = array(&["", "abc"]).unwrap();
155 let result = extract(&a, r"(abc)").unwrap();
156 assert_eq!(result.as_slice(), &["", "abc"]);
157 }
158}