perl_regex/analyzer/
capture.rs1use super::parser::{parse_named_capture_name, parse_named_capture_name_from};
2
3#[derive(Debug, Clone, PartialEq)]
4pub struct CaptureGroup {
5 pub name: String,
6 pub index: usize,
7 pub pattern: String,
8}
9
10pub(crate) fn extract_named_captures(pattern: &str) -> Vec<CaptureGroup> {
11 let bytes = pattern.as_bytes();
12 let mut result = Vec::new();
13 let mut i = 0;
14 let mut capture_index = 0;
15
16 while i < bytes.len() {
17 if bytes[i] == b'\\' {
18 i += 2;
19 continue;
20 }
21
22 if bytes[i] == b'[' {
23 i += 1;
24 while i < bytes.len() {
25 if bytes[i] == b'\\' {
26 i += 2;
27 } else if bytes[i] == b']' {
28 i += 1;
29 break;
30 } else {
31 i += 1;
32 }
33 }
34 continue;
35 }
36
37 if bytes[i] == b'(' {
38 i += 1;
39 if i < bytes.len() && bytes[i] == b'?' {
40 i += 1;
41 if i < bytes.len() && bytes[i] == b'<' {
42 i += 1;
43 if i < bytes.len() && (bytes[i] == b'=' || bytes[i] == b'!') {
44 i += 1;
45 continue;
46 }
47
48 if let Some((name, next)) = parse_named_capture_name_from(bytes, i, b'>') {
49 capture_index += 1;
50 i = next;
51 let (subpattern, next_i) = collect_subpattern(bytes, i);
52 i = next_i;
53 result.push(CaptureGroup {
54 name,
55 index: capture_index,
56 pattern: subpattern,
57 });
58 continue;
59 }
60 } else if i < bytes.len() && bytes[i] == b'\'' {
61 if let Some((name, next)) = parse_named_capture_name(bytes, i, b'\'', b'\'') {
62 capture_index += 1;
63 i = next;
64 let (subpattern, next_i) = collect_subpattern(bytes, i);
65 i = next_i;
66 result.push(CaptureGroup {
67 name,
68 index: capture_index,
69 pattern: subpattern,
70 });
71 continue;
72 }
73 }
74 continue;
75 }
76
77 capture_index += 1;
78 continue;
79 }
80
81 i += 1;
82 }
83
84 result
85}
86
87fn collect_subpattern(bytes: &[u8], mut i: usize) -> (String, usize) {
88 let start = i;
89 let mut depth = 1usize;
90 while i < bytes.len() && depth > 0 {
91 if bytes[i] == b'\\' {
92 i += 2;
93 continue;
94 }
95
96 if bytes[i] == b'[' {
97 i += 1;
98 while i < bytes.len() {
99 if bytes[i] == b'\\' {
100 i += 2;
101 } else if bytes[i] == b']' {
102 i += 1;
103 break;
104 } else {
105 i += 1;
106 }
107 }
108 continue;
109 }
110
111 if bytes[i] == b'(' {
112 depth += 1;
113 } else if bytes[i] == b')' {
114 depth -= 1;
115 }
116 i += 1;
117 }
118
119 let subpattern = if i > 0 && start < i - 1 {
120 String::from_utf8_lossy(&bytes[start..i - 1]).into_owned()
121 } else {
122 String::new()
123 };
124
125 (subpattern, i)
126}