Skip to main content

perl_regex/analyzer/
capture.rs

1use crate::syntax::cursor::quoted_literal_end;
2
3use super::parser::{parse_named_capture_name, parse_named_capture_name_from};
4
5#[derive(Debug, Clone, PartialEq)]
6pub struct CaptureGroup {
7    pub name: String,
8    pub index: usize,
9    pub pattern: String,
10}
11
12pub(crate) fn extract_named_captures(pattern: &str) -> Vec<CaptureGroup> {
13    let bytes = pattern.as_bytes();
14    let mut result = Vec::new();
15    let mut i = 0;
16    let mut capture_index = 0;
17
18    while i < bytes.len() {
19        if bytes[i] == b'\\' {
20            if let Some(end) = quoted_literal_end(bytes, i) {
21                i = end;
22                continue;
23            }
24            i += 2;
25            continue;
26        }
27
28        if bytes[i] == b'[' {
29            i += 1;
30            while i < bytes.len() {
31                if bytes[i] == b'\\' {
32                    i += 2;
33                } else if bytes[i] == b']' {
34                    i += 1;
35                    break;
36                } else {
37                    i += 1;
38                }
39            }
40            continue;
41        }
42
43        if bytes[i] == b'(' {
44            i += 1;
45            if i < bytes.len() && bytes[i] == b'?' {
46                i += 1;
47                if i < bytes.len() && bytes[i] == b'<' {
48                    i += 1;
49                    if i < bytes.len() && (bytes[i] == b'=' || bytes[i] == b'!') {
50                        i += 1;
51                        continue;
52                    }
53
54                    if let Some((name, next)) = parse_named_capture_name_from(bytes, i, b'>') {
55                        capture_index += 1;
56                        i = next;
57                        let (subpattern, next_i) = collect_subpattern(bytes, i);
58                        i = next_i;
59                        result.push(CaptureGroup {
60                            name,
61                            index: capture_index,
62                            pattern: subpattern,
63                        });
64                        continue;
65                    }
66                } else if i < bytes.len() && bytes[i] == b'\'' {
67                    if let Some((name, next)) = parse_named_capture_name(bytes, i, b'\'', b'\'') {
68                        capture_index += 1;
69                        i = next;
70                        let (subpattern, next_i) = collect_subpattern(bytes, i);
71                        i = next_i;
72                        result.push(CaptureGroup {
73                            name,
74                            index: capture_index,
75                            pattern: subpattern,
76                        });
77                        continue;
78                    }
79                } else if i + 1 < bytes.len() && bytes[i] == b'P' && bytes[i + 1] == b'<' {
80                    i += 1;
81                    if let Some((name, next)) = parse_named_capture_name(bytes, i, b'<', b'>') {
82                        capture_index += 1;
83                        i = next;
84                        let (subpattern, next_i) = collect_subpattern(bytes, i);
85                        i = next_i;
86                        result.push(CaptureGroup {
87                            name,
88                            index: capture_index,
89                            pattern: subpattern,
90                        });
91                        continue;
92                    }
93                }
94                continue;
95            }
96
97            capture_index += 1;
98            continue;
99        }
100
101        i += 1;
102    }
103
104    result
105}
106
107fn collect_subpattern(bytes: &[u8], mut i: usize) -> (String, usize) {
108    let start = i;
109    let mut depth = 1usize;
110    while i < bytes.len() && depth > 0 {
111        if bytes[i] == b'\\' {
112            if let Some(end) = quoted_literal_end(bytes, i) {
113                i = end;
114                continue;
115            }
116            i += 2;
117            continue;
118        }
119
120        if bytes[i] == b'[' {
121            i += 1;
122            while i < bytes.len() {
123                if bytes[i] == b'\\' {
124                    i += 2;
125                } else if bytes[i] == b']' {
126                    i += 1;
127                    break;
128                } else {
129                    i += 1;
130                }
131            }
132            continue;
133        }
134
135        if bytes[i] == b'(' {
136            depth += 1;
137        } else if bytes[i] == b')' {
138            depth -= 1;
139        }
140        i += 1;
141    }
142
143    let subpattern = if i > 0 && start < i - 1 {
144        String::from_utf8_lossy(&bytes[start..i - 1]).into_owned()
145    } else {
146        String::new()
147    };
148
149    (subpattern, i)
150}