perl_regex/analyzer/
capture.rs1use crate::syntax::cursor::quoted_literal_end;
2
3use super::parser::{parse_named_capture_name, parse_named_capture_name_from};
4
5#[derive(Debug, Clone, PartialEq)]
6pub struct CaptureGroup {
7 pub name: String,
8 pub index: usize,
9 pub pattern: String,
10}
11
12pub(crate) fn extract_named_captures(pattern: &str) -> Vec<CaptureGroup> {
13 let bytes = pattern.as_bytes();
14 let mut result = Vec::new();
15 let mut i = 0;
16 let mut capture_index = 0;
17
18 while i < bytes.len() {
19 if bytes[i] == b'\\' {
20 if let Some(end) = quoted_literal_end(bytes, i) {
21 i = end;
22 continue;
23 }
24 i += 2;
25 continue;
26 }
27
28 if bytes[i] == b'[' {
29 i += 1;
30 while i < bytes.len() {
31 if bytes[i] == b'\\' {
32 i += 2;
33 } else if bytes[i] == b']' {
34 i += 1;
35 break;
36 } else {
37 i += 1;
38 }
39 }
40 continue;
41 }
42
43 if bytes[i] == b'(' {
44 i += 1;
45 if i < bytes.len() && bytes[i] == b'?' {
46 i += 1;
47 if i < bytes.len() && bytes[i] == b'<' {
48 i += 1;
49 if i < bytes.len() && (bytes[i] == b'=' || bytes[i] == b'!') {
50 i += 1;
51 continue;
52 }
53
54 if let Some((name, next)) = parse_named_capture_name_from(bytes, i, b'>') {
55 capture_index += 1;
56 i = next;
57 let (subpattern, next_i) = collect_subpattern(bytes, i);
58 i = next_i;
59 result.push(CaptureGroup {
60 name,
61 index: capture_index,
62 pattern: subpattern,
63 });
64 continue;
65 }
66 } else if i < bytes.len() && bytes[i] == b'\'' {
67 if let Some((name, next)) = parse_named_capture_name(bytes, i, b'\'', b'\'') {
68 capture_index += 1;
69 i = next;
70 let (subpattern, next_i) = collect_subpattern(bytes, i);
71 i = next_i;
72 result.push(CaptureGroup {
73 name,
74 index: capture_index,
75 pattern: subpattern,
76 });
77 continue;
78 }
79 } else if i + 1 < bytes.len() && bytes[i] == b'P' && bytes[i + 1] == b'<' {
80 i += 1;
81 if let Some((name, next)) = parse_named_capture_name(bytes, i, b'<', b'>') {
82 capture_index += 1;
83 i = next;
84 let (subpattern, next_i) = collect_subpattern(bytes, i);
85 i = next_i;
86 result.push(CaptureGroup {
87 name,
88 index: capture_index,
89 pattern: subpattern,
90 });
91 continue;
92 }
93 }
94 continue;
95 }
96
97 capture_index += 1;
98 continue;
99 }
100
101 i += 1;
102 }
103
104 result
105}
106
107fn collect_subpattern(bytes: &[u8], mut i: usize) -> (String, usize) {
108 let start = i;
109 let mut depth = 1usize;
110 while i < bytes.len() && depth > 0 {
111 if bytes[i] == b'\\' {
112 if let Some(end) = quoted_literal_end(bytes, i) {
113 i = end;
114 continue;
115 }
116 i += 2;
117 continue;
118 }
119
120 if bytes[i] == b'[' {
121 i += 1;
122 while i < bytes.len() {
123 if bytes[i] == b'\\' {
124 i += 2;
125 } else if bytes[i] == b']' {
126 i += 1;
127 break;
128 } else {
129 i += 1;
130 }
131 }
132 continue;
133 }
134
135 if bytes[i] == b'(' {
136 depth += 1;
137 } else if bytes[i] == b')' {
138 depth -= 1;
139 }
140 i += 1;
141 }
142
143 let subpattern = if i > 0 && start < i - 1 {
144 String::from_utf8_lossy(&bytes[start..i - 1]).into_owned()
145 } else {
146 String::new()
147 };
148
149 (subpattern, i)
150}