Skip to main content

zsh/
pcre.rs

1//! PCRE module - port of Modules/pcre.c
2//!
3//! Provides PCRE regex matching through pcre_compile, pcre_match, pcre_study builtins.
4//! Uses the Rust `regex` crate which provides Perl-compatible regex syntax.
5
6use regex::Regex;
7use std::collections::HashMap;
8
9/// Compiled PCRE pattern state
10#[derive(Debug)]
11pub struct PcreState {
12    pattern: Option<Regex>,
13    pattern_str: Option<String>,
14}
15
16impl Default for PcreState {
17    fn default() -> Self {
18        Self::new()
19    }
20}
21
22impl PcreState {
23    pub fn new() -> Self {
24        Self {
25            pattern: None,
26            pattern_str: None,
27        }
28    }
29
30    pub fn has_pattern(&self) -> bool {
31        self.pattern.is_some()
32    }
33
34    pub fn clear(&mut self) {
35        self.pattern = None;
36        self.pattern_str = None;
37    }
38}
39
40/// Options for pcre_compile
41#[derive(Debug, Default, Clone)]
42pub struct PcreCompileOptions {
43    pub anchored: bool,
44    pub caseless: bool,
45    pub multiline: bool,
46    pub extended: bool,
47    pub dotall: bool,
48}
49
50/// Options for pcre_match
51#[derive(Debug, Default, Clone)]
52pub struct PcreMatchOptions {
53    pub match_var: Option<String>,
54    pub array_var: Option<String>,
55    pub assoc_var: Option<String>,
56    pub offset: usize,
57    pub return_offsets: bool,
58    pub use_dfa: bool,
59}
60
61/// Result of a PCRE match
62#[derive(Debug, Clone)]
63pub struct PcreMatchResult {
64    pub matched: bool,
65    pub full_match: Option<String>,
66    pub captures: Vec<Option<String>>,
67    pub named_captures: HashMap<String, String>,
68    pub match_start: Option<usize>,
69    pub match_end: Option<usize>,
70}
71
72impl PcreMatchResult {
73    pub fn no_match() -> Self {
74        Self {
75            matched: false,
76            full_match: None,
77            captures: Vec::new(),
78            named_captures: HashMap::new(),
79            match_start: None,
80            match_end: None,
81        }
82    }
83}
84
85/// Compile a PCRE pattern
86pub fn pcre_compile(
87    pattern: &str,
88    options: &PcreCompileOptions,
89    state: &mut PcreState,
90) -> Result<(), String> {
91    state.clear();
92
93    let mut pattern_str = String::new();
94
95    if options.caseless {
96        pattern_str.push_str("(?i)");
97    }
98    if options.multiline {
99        pattern_str.push_str("(?m)");
100    }
101    if options.dotall {
102        pattern_str.push_str("(?s)");
103    }
104    if options.extended {
105        pattern_str.push_str("(?x)");
106    }
107    if options.anchored {
108        pattern_str.push('^');
109    }
110
111    pattern_str.push_str(pattern);
112
113    match Regex::new(&pattern_str) {
114        Ok(re) => {
115            state.pattern = Some(re);
116            state.pattern_str = Some(pattern_str);
117            Ok(())
118        }
119        Err(e) => Err(format!("error in regex: {}", e)),
120    }
121}
122
123/// Study a compiled pattern (no-op with Rust regex, but kept for API compat)
124pub fn pcre_study(state: &PcreState) -> Result<(), String> {
125    if state.pattern.is_none() {
126        return Err("no pattern has been compiled for study".to_string());
127    }
128    Ok(())
129}
130
131/// Match a string against the compiled pattern
132pub fn pcre_match(
133    text: &str,
134    options: &PcreMatchOptions,
135    state: &PcreState,
136) -> Result<PcreMatchResult, String> {
137    let re = state
138        .pattern
139        .as_ref()
140        .ok_or_else(|| "no pattern has been compiled".to_string())?;
141
142    let search_text = if options.offset > 0 && options.offset < text.len() {
143        &text[options.offset..]
144    } else if options.offset >= text.len() {
145        return Ok(PcreMatchResult::no_match());
146    } else {
147        text
148    };
149
150    let caps = match re.captures(search_text) {
151        Some(c) => c,
152        None => return Ok(PcreMatchResult::no_match()),
153    };
154
155    let full_match = caps.get(0).map(|m| m.as_str().to_string());
156    let match_start = caps.get(0).map(|m| m.start() + options.offset);
157    let match_end = caps.get(0).map(|m| m.end() + options.offset);
158
159    let mut captures = Vec::new();
160    for i in 1..caps.len() {
161        captures.push(caps.get(i).map(|m| m.as_str().to_string()));
162    }
163
164    let mut named_captures = HashMap::new();
165    for name in re.capture_names().flatten() {
166        if let Some(m) = caps.name(name) {
167            named_captures.insert(name.to_string(), m.as_str().to_string());
168        }
169    }
170
171    Ok(PcreMatchResult {
172        matched: true,
173        full_match,
174        captures,
175        named_captures,
176        match_start,
177        match_end,
178    })
179}
180
181/// Conditional test for pcre-match
182pub fn cond_pcre_match(lhs: &str, rhs: &str, caseless: bool) -> (bool, PcreMatchResult) {
183    let options = PcreCompileOptions {
184        caseless,
185        ..Default::default()
186    };
187
188    let mut state = PcreState::new();
189
190    if pcre_compile(rhs, &options, &mut state).is_err() {
191        return (false, PcreMatchResult::no_match());
192    }
193
194    let match_options = PcreMatchOptions::default();
195
196    match pcre_match(lhs, &match_options, &state) {
197        Ok(result) => (result.matched, result),
198        Err(_) => (false, PcreMatchResult::no_match()),
199    }
200}
201
202/// Execute pcre_compile builtin
203pub fn builtin_pcre_compile(
204    args: &[&str],
205    options: &PcreCompileOptions,
206    state: &mut PcreState,
207) -> (i32, String) {
208    if args.is_empty() {
209        return (1, "pcre_compile: pattern required\n".to_string());
210    }
211
212    match pcre_compile(args[0], options, state) {
213        Ok(()) => (0, String::new()),
214        Err(e) => (1, format!("pcre_compile: {}\n", e)),
215    }
216}
217
218/// Execute pcre_study builtin
219pub fn builtin_pcre_study(state: &PcreState) -> (i32, String) {
220    match pcre_study(state) {
221        Ok(()) => (0, String::new()),
222        Err(e) => (1, format!("pcre_study: {}\n", e)),
223    }
224}
225
226/// Execute pcre_match builtin
227pub fn builtin_pcre_match(
228    args: &[&str],
229    options: &PcreMatchOptions,
230    state: &PcreState,
231) -> (i32, PcreMatchResult) {
232    if args.is_empty() {
233        return (1, PcreMatchResult::no_match());
234    }
235
236    match pcre_match(args[0], options, state) {
237        Ok(result) => {
238            if result.matched {
239                (0, result)
240            } else {
241                (1, result)
242            }
243        }
244        Err(_) => (1, PcreMatchResult::no_match()),
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    #[test]
253    fn test_pcre_state_new() {
254        let state = PcreState::new();
255        assert!(!state.has_pattern());
256    }
257
258    #[test]
259    fn test_pcre_compile_simple() {
260        let mut state = PcreState::new();
261        let options = PcreCompileOptions::default();
262
263        let result = pcre_compile("hello", &options, &mut state);
264        assert!(result.is_ok());
265        assert!(state.has_pattern());
266    }
267
268    #[test]
269    fn test_pcre_compile_invalid() {
270        let mut state = PcreState::new();
271        let options = PcreCompileOptions::default();
272
273        let result = pcre_compile("[invalid", &options, &mut state);
274        assert!(result.is_err());
275    }
276
277    #[test]
278    fn test_pcre_compile_caseless() {
279        let mut state = PcreState::new();
280        let options = PcreCompileOptions {
281            caseless: true,
282            ..Default::default()
283        };
284
285        let result = pcre_compile("hello", &options, &mut state);
286        assert!(result.is_ok());
287
288        let match_opts = PcreMatchOptions::default();
289        let result = pcre_match("HELLO WORLD", &match_opts, &state).unwrap();
290        assert!(result.matched);
291    }
292
293    #[test]
294    fn test_pcre_study_no_pattern() {
295        let state = PcreState::new();
296        let result = pcre_study(&state);
297        assert!(result.is_err());
298    }
299
300    #[test]
301    fn test_pcre_study_with_pattern() {
302        let mut state = PcreState::new();
303        let options = PcreCompileOptions::default();
304        pcre_compile("hello", &options, &mut state).unwrap();
305
306        let result = pcre_study(&state);
307        assert!(result.is_ok());
308    }
309
310    #[test]
311    fn test_pcre_match_simple() {
312        let mut state = PcreState::new();
313        let options = PcreCompileOptions::default();
314        pcre_compile("hello", &options, &mut state).unwrap();
315
316        let match_opts = PcreMatchOptions::default();
317        let result = pcre_match("hello world", &match_opts, &state).unwrap();
318        assert!(result.matched);
319        assert_eq!(result.full_match, Some("hello".to_string()));
320    }
321
322    #[test]
323    fn test_pcre_match_no_match() {
324        let mut state = PcreState::new();
325        let options = PcreCompileOptions::default();
326        pcre_compile("hello", &options, &mut state).unwrap();
327
328        let match_opts = PcreMatchOptions::default();
329        let result = pcre_match("goodbye world", &match_opts, &state).unwrap();
330        assert!(!result.matched);
331    }
332
333    #[test]
334    fn test_pcre_match_captures() {
335        let mut state = PcreState::new();
336        let options = PcreCompileOptions::default();
337        pcre_compile(r"(\w+) (\w+)", &options, &mut state).unwrap();
338
339        let match_opts = PcreMatchOptions::default();
340        let result = pcre_match("hello world", &match_opts, &state).unwrap();
341        assert!(result.matched);
342        assert_eq!(result.captures.len(), 2);
343        assert_eq!(result.captures[0], Some("hello".to_string()));
344        assert_eq!(result.captures[1], Some("world".to_string()));
345    }
346
347    #[test]
348    fn test_pcre_match_named_captures() {
349        let mut state = PcreState::new();
350        let options = PcreCompileOptions::default();
351        pcre_compile(r"(?P<first>\w+) (?P<second>\w+)", &options, &mut state).unwrap();
352
353        let match_opts = PcreMatchOptions::default();
354        let result = pcre_match("hello world", &match_opts, &state).unwrap();
355        assert!(result.matched);
356        assert_eq!(
357            result.named_captures.get("first"),
358            Some(&"hello".to_string())
359        );
360        assert_eq!(
361            result.named_captures.get("second"),
362            Some(&"world".to_string())
363        );
364    }
365
366    #[test]
367    fn test_pcre_match_with_offset() {
368        let mut state = PcreState::new();
369        let options = PcreCompileOptions::default();
370        pcre_compile("world", &options, &mut state).unwrap();
371
372        let match_opts = PcreMatchOptions {
373            offset: 6,
374            ..Default::default()
375        };
376        let result = pcre_match("hello world", &match_opts, &state).unwrap();
377        assert!(result.matched);
378        assert_eq!(result.match_start, Some(6));
379    }
380
381    #[test]
382    fn test_cond_pcre_match() {
383        let (matched, _) = cond_pcre_match("hello world", "hello", false);
384        assert!(matched);
385
386        let (matched, _) = cond_pcre_match("hello world", "HELLO", true);
387        assert!(matched);
388
389        let (matched, _) = cond_pcre_match("hello world", "HELLO", false);
390        assert!(!matched);
391    }
392
393    #[test]
394    fn test_builtin_pcre_compile_no_args() {
395        let mut state = PcreState::new();
396        let options = PcreCompileOptions::default();
397        let (status, _) = builtin_pcre_compile(&[], &options, &mut state);
398        assert_eq!(status, 1);
399    }
400
401    #[test]
402    fn test_builtin_pcre_match_no_pattern() {
403        let state = PcreState::new();
404        let options = PcreMatchOptions::default();
405        let (status, _) = builtin_pcre_match(&["test"], &options, &state);
406        assert_eq!(status, 1);
407    }
408}