Skip to main content

zsh/
regex_mod.rs

1//! Regex module - port of Modules/regex.c
2//!
3//! Provides regex matching conditional for the =~ operator.
4
5use regex::Regex;
6use std::collections::HashMap;
7
8/// Result of a regex match operation
9#[derive(Debug, Clone)]
10pub struct RegexMatch {
11    pub matched: bool,
12    pub full_match: Option<String>,
13    pub captures: Vec<Option<String>>,
14    pub match_start: Option<usize>,
15    pub match_end: Option<usize>,
16    pub capture_starts: Vec<Option<usize>>,
17    pub capture_ends: Vec<Option<usize>>,
18}
19
20impl RegexMatch {
21    pub fn no_match() -> Self {
22        Self {
23            matched: false,
24            full_match: None,
25            captures: Vec::new(),
26            match_start: None,
27            match_end: None,
28            capture_starts: Vec::new(),
29            capture_ends: Vec::new(),
30        }
31    }
32}
33
34/// Options for regex matching
35#[derive(Debug, Clone, Default)]
36pub struct RegexOptions {
37    pub case_insensitive: bool,
38    pub bash_rematch: bool,
39    pub ksh_arrays: bool,
40}
41
42/// Perform a regex match
43pub fn regex_match(
44    text: &str,
45    pattern: &str,
46    options: &RegexOptions,
47) -> Result<RegexMatch, String> {
48    let re = if options.case_insensitive {
49        Regex::new(&format!("(?i){}", pattern))
50    } else {
51        Regex::new(pattern)
52    }
53    .map_err(|e| format!("failed to compile regex: {}", e))?;
54
55    let caps = match re.captures(text) {
56        Some(c) => c,
57        None => return Ok(RegexMatch::no_match()),
58    };
59
60    let full_match = caps.get(0).map(|m| m.as_str().to_string());
61    let match_start = caps.get(0).map(|m| m.start());
62    let match_end = caps.get(0).map(|m| m.end());
63
64    let mut captures = Vec::new();
65    let mut capture_starts = Vec::new();
66    let mut capture_ends = Vec::new();
67
68    for i in 1..caps.len() {
69        if let Some(m) = caps.get(i) {
70            captures.push(Some(m.as_str().to_string()));
71            capture_starts.push(Some(m.start()));
72            capture_ends.push(Some(m.end()));
73        } else {
74            captures.push(None);
75            capture_starts.push(None);
76            capture_ends.push(None);
77        }
78    }
79
80    Ok(RegexMatch {
81        matched: true,
82        full_match,
83        captures,
84        match_start,
85        match_end,
86        capture_starts,
87        capture_ends,
88    })
89}
90
91/// Convert byte offsets to character offsets
92fn byte_to_char_offset(s: &str, byte_offset: usize) -> usize {
93    s[..byte_offset].chars().count()
94}
95
96/// Get match variables in zsh format
97pub fn get_match_variables(
98    result: &RegexMatch,
99    text: &str,
100    options: &RegexOptions,
101) -> HashMap<String, String> {
102    let mut vars = HashMap::new();
103
104    if !result.matched {
105        return vars;
106    }
107
108    if options.bash_rematch {
109        if let Some(ref full) = result.full_match {
110            vars.insert("BASH_REMATCH[0]".to_string(), full.clone());
111        }
112        for (i, cap) in result.captures.iter().enumerate() {
113            if let Some(c) = cap {
114                vars.insert(format!("BASH_REMATCH[{}]", i + 1), c.clone());
115            }
116        }
117    } else {
118        if let Some(ref full) = result.full_match {
119            vars.insert("MATCH".to_string(), full.clone());
120        }
121
122        let base = if options.ksh_arrays { 0 } else { 1 };
123
124        if let Some(start) = result.match_start {
125            let char_start = byte_to_char_offset(text, start);
126            vars.insert("MBEGIN".to_string(), (char_start + base).to_string());
127        }
128
129        if let Some(end) = result.match_end {
130            let char_end = byte_to_char_offset(text, end);
131            vars.insert("MEND".to_string(), (char_end + base - 1).to_string());
132        }
133
134        for (i, cap) in result.captures.iter().enumerate() {
135            if let Some(c) = cap {
136                vars.insert(format!("match[{}]", i + base), c.clone());
137            }
138        }
139
140        for (i, start) in result.capture_starts.iter().enumerate() {
141            if let Some(s) = start {
142                let char_start = byte_to_char_offset(text, *s);
143                vars.insert(
144                    format!("mbegin[{}]", i + base),
145                    (char_start + base).to_string(),
146                );
147            } else {
148                vars.insert(format!("mbegin[{}]", i + base), "-1".to_string());
149            }
150        }
151
152        for (i, end) in result.capture_ends.iter().enumerate() {
153            if let Some(e) = end {
154                let char_end = byte_to_char_offset(text, *e);
155                vars.insert(
156                    format!("mend[{}]", i + base),
157                    (char_end + base - 1).to_string(),
158                );
159            } else {
160                vars.insert(format!("mend[{}]", i + base), "-1".to_string());
161            }
162        }
163    }
164
165    vars
166}
167
168/// Conditional test for regex-match
169pub fn cond_regex_match(lhs: &str, rhs: &str, options: &RegexOptions) -> (bool, RegexMatch) {
170    match regex_match(lhs, rhs, options) {
171        Ok(result) => (result.matched, result),
172        Err(_) => (false, RegexMatch::no_match()),
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179
180    #[test]
181    fn test_regex_match_simple() {
182        let opts = RegexOptions::default();
183        let result = regex_match("hello world", "hello", &opts).unwrap();
184        assert!(result.matched);
185        assert_eq!(result.full_match, Some("hello".to_string()));
186    }
187
188    #[test]
189    fn test_regex_match_no_match() {
190        let opts = RegexOptions::default();
191        let result = regex_match("hello world", "goodbye", &opts).unwrap();
192        assert!(!result.matched);
193    }
194
195    #[test]
196    fn test_regex_match_captures() {
197        let opts = RegexOptions::default();
198        let result = regex_match("hello world", "(hello) (world)", &opts).unwrap();
199        assert!(result.matched);
200        assert_eq!(result.full_match, Some("hello world".to_string()));
201        assert_eq!(result.captures.len(), 2);
202        assert_eq!(result.captures[0], Some("hello".to_string()));
203        assert_eq!(result.captures[1], Some("world".to_string()));
204    }
205
206    #[test]
207    fn test_regex_match_case_insensitive() {
208        let opts = RegexOptions {
209            case_insensitive: true,
210            ..Default::default()
211        };
212        let result = regex_match("HELLO WORLD", "hello", &opts).unwrap();
213        assert!(result.matched);
214    }
215
216    #[test]
217    fn test_regex_match_case_sensitive() {
218        let opts = RegexOptions::default();
219        let result = regex_match("HELLO WORLD", "hello", &opts).unwrap();
220        assert!(!result.matched);
221    }
222
223    #[test]
224    fn test_regex_match_positions() {
225        let opts = RegexOptions::default();
226        let result = regex_match("foo bar baz", "bar", &opts).unwrap();
227        assert!(result.matched);
228        assert_eq!(result.match_start, Some(4));
229        assert_eq!(result.match_end, Some(7));
230    }
231
232    #[test]
233    fn test_regex_match_invalid_pattern() {
234        let opts = RegexOptions::default();
235        let result = regex_match("test", "[invalid", &opts);
236        assert!(result.is_err());
237    }
238
239    #[test]
240    fn test_get_match_variables_zsh() {
241        let opts = RegexOptions::default();
242        let result = regex_match("hello world", "(hello) (world)", &opts).unwrap();
243        let vars = get_match_variables(&result, "hello world", &opts);
244
245        assert_eq!(vars.get("MATCH"), Some(&"hello world".to_string()));
246        assert_eq!(vars.get("MBEGIN"), Some(&"1".to_string()));
247        assert_eq!(vars.get("MEND"), Some(&"11".to_string()));
248    }
249
250    #[test]
251    fn test_get_match_variables_bash() {
252        let opts = RegexOptions {
253            bash_rematch: true,
254            ..Default::default()
255        };
256        let result = regex_match("hello world", "(hello) (world)", &opts).unwrap();
257        let vars = get_match_variables(&result, "hello world", &opts);
258
259        assert_eq!(
260            vars.get("BASH_REMATCH[0]"),
261            Some(&"hello world".to_string())
262        );
263        assert_eq!(vars.get("BASH_REMATCH[1]"), Some(&"hello".to_string()));
264        assert_eq!(vars.get("BASH_REMATCH[2]"), Some(&"world".to_string()));
265    }
266
267    #[test]
268    fn test_cond_regex_match() {
269        let opts = RegexOptions::default();
270        let (matched, _) = cond_regex_match("hello world", "hello", &opts);
271        assert!(matched);
272
273        let (matched, _) = cond_regex_match("hello world", "goodbye", &opts);
274        assert!(!matched);
275    }
276
277    #[test]
278    fn test_byte_to_char_offset_ascii() {
279        assert_eq!(byte_to_char_offset("hello", 0), 0);
280        assert_eq!(byte_to_char_offset("hello", 5), 5);
281    }
282
283    #[test]
284    fn test_byte_to_char_offset_unicode() {
285        let s = "héllo";
286        assert_eq!(byte_to_char_offset(s, 0), 0);
287        assert_eq!(byte_to_char_offset(s, 1), 1);
288        assert_eq!(byte_to_char_offset(s, 3), 2);
289    }
290}