Skip to main content

perl_regex/analyzer/
capture.rs

1use super::parser::{parse_named_capture_name, parse_named_capture_name_from};
2
3#[derive(Debug, Clone, PartialEq)]
4pub struct CaptureGroup {
5    pub name: String,
6    pub index: usize,
7    pub pattern: String,
8}
9
10pub(crate) fn extract_named_captures(pattern: &str) -> Vec<CaptureGroup> {
11    let bytes = pattern.as_bytes();
12    let mut result = Vec::new();
13    let mut i = 0;
14    let mut capture_index = 0;
15
16    while i < bytes.len() {
17        if bytes[i] == b'\\' {
18            i += 2;
19            continue;
20        }
21
22        if bytes[i] == b'[' {
23            i += 1;
24            while i < bytes.len() {
25                if bytes[i] == b'\\' {
26                    i += 2;
27                } else if bytes[i] == b']' {
28                    i += 1;
29                    break;
30                } else {
31                    i += 1;
32                }
33            }
34            continue;
35        }
36
37        if bytes[i] == b'(' {
38            i += 1;
39            if i < bytes.len() && bytes[i] == b'?' {
40                i += 1;
41                if i < bytes.len() && bytes[i] == b'<' {
42                    i += 1;
43                    if i < bytes.len() && (bytes[i] == b'=' || bytes[i] == b'!') {
44                        i += 1;
45                        continue;
46                    }
47
48                    if let Some((name, next)) = parse_named_capture_name_from(bytes, i, b'>') {
49                        capture_index += 1;
50                        i = next;
51                        let (subpattern, next_i) = collect_subpattern(bytes, i);
52                        i = next_i;
53                        result.push(CaptureGroup {
54                            name,
55                            index: capture_index,
56                            pattern: subpattern,
57                        });
58                        continue;
59                    }
60                } else if i < bytes.len() && bytes[i] == b'\'' {
61                    if let Some((name, next)) = parse_named_capture_name(bytes, i, b'\'', b'\'') {
62                        capture_index += 1;
63                        i = next;
64                        let (subpattern, next_i) = collect_subpattern(bytes, i);
65                        i = next_i;
66                        result.push(CaptureGroup {
67                            name,
68                            index: capture_index,
69                            pattern: subpattern,
70                        });
71                        continue;
72                    }
73                }
74                continue;
75            }
76
77            capture_index += 1;
78            continue;
79        }
80
81        i += 1;
82    }
83
84    result
85}
86
87fn collect_subpattern(bytes: &[u8], mut i: usize) -> (String, usize) {
88    let start = i;
89    let mut depth = 1usize;
90    while i < bytes.len() && depth > 0 {
91        if bytes[i] == b'\\' {
92            i += 2;
93            continue;
94        }
95
96        if bytes[i] == b'[' {
97            i += 1;
98            while i < bytes.len() {
99                if bytes[i] == b'\\' {
100                    i += 2;
101                } else if bytes[i] == b']' {
102                    i += 1;
103                    break;
104                } else {
105                    i += 1;
106                }
107            }
108            continue;
109        }
110
111        if bytes[i] == b'(' {
112            depth += 1;
113        } else if bytes[i] == b')' {
114            depth -= 1;
115        }
116        i += 1;
117    }
118
119    let subpattern = if i > 0 && start < i - 1 {
120        String::from_utf8_lossy(&bytes[start..i - 1]).into_owned()
121    } else {
122        String::new()
123    };
124
125    (subpattern, i)
126}