1use regex::Regex;
6use std::collections::HashMap;
7
8#[derive(Debug, Clone)]
10pub struct RegexMatch {
11 pub matched: bool,
12 pub full_match: Option<String>,
13 pub captures: Vec<Option<String>>,
14 pub match_start: Option<usize>,
15 pub match_end: Option<usize>,
16 pub capture_starts: Vec<Option<usize>>,
17 pub capture_ends: Vec<Option<usize>>,
18}
19
20impl RegexMatch {
21 pub fn no_match() -> Self {
22 Self {
23 matched: false,
24 full_match: None,
25 captures: Vec::new(),
26 match_start: None,
27 match_end: None,
28 capture_starts: Vec::new(),
29 capture_ends: Vec::new(),
30 }
31 }
32}
33
34#[derive(Debug, Clone, Default)]
36pub struct RegexOptions {
37 pub case_insensitive: bool,
38 pub bash_rematch: bool,
39 pub ksh_arrays: bool,
40}
41
42pub fn regex_match(
44 text: &str,
45 pattern: &str,
46 options: &RegexOptions,
47) -> Result<RegexMatch, String> {
48 let re = if options.case_insensitive {
49 Regex::new(&format!("(?i){}", pattern))
50 } else {
51 Regex::new(pattern)
52 }
53 .map_err(|e| format!("failed to compile regex: {}", e))?;
54
55 let caps = match re.captures(text) {
56 Some(c) => c,
57 None => return Ok(RegexMatch::no_match()),
58 };
59
60 let full_match = caps.get(0).map(|m| m.as_str().to_string());
61 let match_start = caps.get(0).map(|m| m.start());
62 let match_end = caps.get(0).map(|m| m.end());
63
64 let mut captures = Vec::new();
65 let mut capture_starts = Vec::new();
66 let mut capture_ends = Vec::new();
67
68 for i in 1..caps.len() {
69 if let Some(m) = caps.get(i) {
70 captures.push(Some(m.as_str().to_string()));
71 capture_starts.push(Some(m.start()));
72 capture_ends.push(Some(m.end()));
73 } else {
74 captures.push(None);
75 capture_starts.push(None);
76 capture_ends.push(None);
77 }
78 }
79
80 Ok(RegexMatch {
81 matched: true,
82 full_match,
83 captures,
84 match_start,
85 match_end,
86 capture_starts,
87 capture_ends,
88 })
89}
90
91fn byte_to_char_offset(s: &str, byte_offset: usize) -> usize {
93 s[..byte_offset].chars().count()
94}
95
96pub fn get_match_variables(
98 result: &RegexMatch,
99 text: &str,
100 options: &RegexOptions,
101) -> HashMap<String, String> {
102 let mut vars = HashMap::new();
103
104 if !result.matched {
105 return vars;
106 }
107
108 if options.bash_rematch {
109 if let Some(ref full) = result.full_match {
110 vars.insert("BASH_REMATCH[0]".to_string(), full.clone());
111 }
112 for (i, cap) in result.captures.iter().enumerate() {
113 if let Some(c) = cap {
114 vars.insert(format!("BASH_REMATCH[{}]", i + 1), c.clone());
115 }
116 }
117 } else {
118 if let Some(ref full) = result.full_match {
119 vars.insert("MATCH".to_string(), full.clone());
120 }
121
122 let base = if options.ksh_arrays { 0 } else { 1 };
123
124 if let Some(start) = result.match_start {
125 let char_start = byte_to_char_offset(text, start);
126 vars.insert("MBEGIN".to_string(), (char_start + base).to_string());
127 }
128
129 if let Some(end) = result.match_end {
130 let char_end = byte_to_char_offset(text, end);
131 vars.insert("MEND".to_string(), (char_end + base - 1).to_string());
132 }
133
134 for (i, cap) in result.captures.iter().enumerate() {
135 if let Some(c) = cap {
136 vars.insert(format!("match[{}]", i + base), c.clone());
137 }
138 }
139
140 for (i, start) in result.capture_starts.iter().enumerate() {
141 if let Some(s) = start {
142 let char_start = byte_to_char_offset(text, *s);
143 vars.insert(
144 format!("mbegin[{}]", i + base),
145 (char_start + base).to_string(),
146 );
147 } else {
148 vars.insert(format!("mbegin[{}]", i + base), "-1".to_string());
149 }
150 }
151
152 for (i, end) in result.capture_ends.iter().enumerate() {
153 if let Some(e) = end {
154 let char_end = byte_to_char_offset(text, *e);
155 vars.insert(
156 format!("mend[{}]", i + base),
157 (char_end + base - 1).to_string(),
158 );
159 } else {
160 vars.insert(format!("mend[{}]", i + base), "-1".to_string());
161 }
162 }
163 }
164
165 vars
166}
167
168pub fn cond_regex_match(lhs: &str, rhs: &str, options: &RegexOptions) -> (bool, RegexMatch) {
170 match regex_match(lhs, rhs, options) {
171 Ok(result) => (result.matched, result),
172 Err(_) => (false, RegexMatch::no_match()),
173 }
174}
175
176#[cfg(test)]
177mod tests {
178 use super::*;
179
180 #[test]
181 fn test_regex_match_simple() {
182 let opts = RegexOptions::default();
183 let result = regex_match("hello world", "hello", &opts).unwrap();
184 assert!(result.matched);
185 assert_eq!(result.full_match, Some("hello".to_string()));
186 }
187
188 #[test]
189 fn test_regex_match_no_match() {
190 let opts = RegexOptions::default();
191 let result = regex_match("hello world", "goodbye", &opts).unwrap();
192 assert!(!result.matched);
193 }
194
195 #[test]
196 fn test_regex_match_captures() {
197 let opts = RegexOptions::default();
198 let result = regex_match("hello world", "(hello) (world)", &opts).unwrap();
199 assert!(result.matched);
200 assert_eq!(result.full_match, Some("hello world".to_string()));
201 assert_eq!(result.captures.len(), 2);
202 assert_eq!(result.captures[0], Some("hello".to_string()));
203 assert_eq!(result.captures[1], Some("world".to_string()));
204 }
205
206 #[test]
207 fn test_regex_match_case_insensitive() {
208 let opts = RegexOptions {
209 case_insensitive: true,
210 ..Default::default()
211 };
212 let result = regex_match("HELLO WORLD", "hello", &opts).unwrap();
213 assert!(result.matched);
214 }
215
216 #[test]
217 fn test_regex_match_case_sensitive() {
218 let opts = RegexOptions::default();
219 let result = regex_match("HELLO WORLD", "hello", &opts).unwrap();
220 assert!(!result.matched);
221 }
222
223 #[test]
224 fn test_regex_match_positions() {
225 let opts = RegexOptions::default();
226 let result = regex_match("foo bar baz", "bar", &opts).unwrap();
227 assert!(result.matched);
228 assert_eq!(result.match_start, Some(4));
229 assert_eq!(result.match_end, Some(7));
230 }
231
232 #[test]
233 fn test_regex_match_invalid_pattern() {
234 let opts = RegexOptions::default();
235 let result = regex_match("test", "[invalid", &opts);
236 assert!(result.is_err());
237 }
238
239 #[test]
240 fn test_get_match_variables_zsh() {
241 let opts = RegexOptions::default();
242 let result = regex_match("hello world", "(hello) (world)", &opts).unwrap();
243 let vars = get_match_variables(&result, "hello world", &opts);
244
245 assert_eq!(vars.get("MATCH"), Some(&"hello world".to_string()));
246 assert_eq!(vars.get("MBEGIN"), Some(&"1".to_string()));
247 assert_eq!(vars.get("MEND"), Some(&"11".to_string()));
248 }
249
250 #[test]
251 fn test_get_match_variables_bash() {
252 let opts = RegexOptions {
253 bash_rematch: true,
254 ..Default::default()
255 };
256 let result = regex_match("hello world", "(hello) (world)", &opts).unwrap();
257 let vars = get_match_variables(&result, "hello world", &opts);
258
259 assert_eq!(
260 vars.get("BASH_REMATCH[0]"),
261 Some(&"hello world".to_string())
262 );
263 assert_eq!(vars.get("BASH_REMATCH[1]"), Some(&"hello".to_string()));
264 assert_eq!(vars.get("BASH_REMATCH[2]"), Some(&"world".to_string()));
265 }
266
267 #[test]
268 fn test_cond_regex_match() {
269 let opts = RegexOptions::default();
270 let (matched, _) = cond_regex_match("hello world", "hello", &opts);
271 assert!(matched);
272
273 let (matched, _) = cond_regex_match("hello world", "goodbye", &opts);
274 assert!(!matched);
275 }
276
277 #[test]
278 fn test_byte_to_char_offset_ascii() {
279 assert_eq!(byte_to_char_offset("hello", 0), 0);
280 assert_eq!(byte_to_char_offset("hello", 5), 5);
281 }
282
283 #[test]
284 fn test_byte_to_char_offset_unicode() {
285 let s = "héllo";
286 assert_eq!(byte_to_char_offset(s, 0), 0);
287 assert_eq!(byte_to_char_offset(s, 1), 1);
288 assert_eq!(byte_to_char_offset(s, 3), 2);
289 }
290}