ferray_strings/
regex_ops.rs1use ferray_core::Array;
6use ferray_core::dimension::{Dimension, Ix1};
7use ferray_core::error::{FerrayError, FerrayResult};
8use regex::Regex;
9
10use crate::string_array::{StringArray, StringArray1};
11
12pub fn match_<D: Dimension>(a: &StringArray<D>, pattern: &str) -> FerrayResult<Array<bool, Ix1>> {
21 let re = Regex::new(pattern)
22 .map_err(|e| FerrayError::invalid_value(format!("invalid regex pattern: {e}")))?;
23
24 let data: Vec<bool> = a.map_to_vec(|s| re.is_match(s));
25 let dim = Ix1::new([data.len()]);
26 Array::from_vec(dim, data)
27}
28
29pub fn extract<D: Dimension>(a: &StringArray<D>, pattern: &str) -> FerrayResult<StringArray1> {
41 let re = Regex::new(pattern)
42 .map_err(|e| FerrayError::invalid_value(format!("invalid regex pattern: {e}")))?;
43
44 let data: Vec<String> = a
45 .iter()
46 .map(|s| {
47 re.captures(s)
48 .and_then(|caps| caps.get(1))
49 .map(|m| m.as_str().to_string())
50 .unwrap_or_default()
51 })
52 .collect();
53
54 let dim = Ix1::new([data.len()]);
55 StringArray1::from_vec(dim, data)
56}
57
58#[cfg(test)]
59mod tests {
60 use super::*;
61 use crate::string_array::array;
62
63 #[test]
64 fn test_match_basic() {
65 let a = array(&["hello123", "world", "foo42"]).unwrap();
66 let result = match_(&a, r"\d+").unwrap();
67 let data = result.as_slice().unwrap();
68 assert_eq!(data, &[true, false, true]);
69 }
70
71 #[test]
72 fn test_match_full_pattern() {
73 let a = array(&["abc", "def", "abcdef"]).unwrap();
74 let result = match_(&a, r"^abc$").unwrap();
75 let data = result.as_slice().unwrap();
76 assert_eq!(data, &[true, false, false]);
77 }
78
79 #[test]
80 fn test_match_invalid_regex() {
81 let a = array(&["hello"]).unwrap();
82 let result = match_(&a, r"[invalid");
83 assert!(result.is_err());
84 }
85
86 #[test]
87 fn test_extract_capture_group() {
88 let a = array(&["hello123world", "foo42bar", "nodigits"]).unwrap();
89 let result = extract(&a, r"(\d+)").unwrap();
90 assert_eq!(result.as_slice(), &["123", "42", ""]);
91 }
92
93 #[test]
94 fn test_extract_named_group() {
95 let a = array(&["user:alice", "user:bob", "invalid"]).unwrap();
96 let result = extract(&a, r"user:(\w+)").unwrap();
97 assert_eq!(result.as_slice(), &["alice", "bob", ""]);
98 }
99
100 #[test]
101 fn test_extract_no_match() {
102 let a = array(&["no match here"]).unwrap();
103 let result = extract(&a, r"(\d+)").unwrap();
104 assert_eq!(result.as_slice(), &[""]);
105 }
106
107 #[test]
108 fn test_extract_invalid_regex() {
109 let a = array(&["hello"]).unwrap();
110 let result = extract(&a, r"[invalid");
111 assert!(result.is_err());
112 }
113
114 #[test]
115 fn test_match_and_extract_ac5() {
116 let a = array(&["abc123", "def", "ghi456"]).unwrap();
118
119 let matched = match_(&a, r"\d+").unwrap();
120 let matched_data = matched.as_slice().unwrap();
121 assert_eq!(matched_data, &[true, false, true]);
122
123 let extracted = extract(&a, r"([a-z]+)(\d+)").unwrap();
124 assert_eq!(extracted.as_slice(), &["abc", "", "ghi"]);
126 }
127
128 #[test]
129 fn test_extract_empty_string() {
130 let a = array(&["", "abc"]).unwrap();
131 let result = extract(&a, r"(abc)").unwrap();
132 assert_eq!(result.as_slice(), &["", "abc"]);
133 }
134}