1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
use crate::PerlLexer;
impl PerlLexer<'_> {
/// General-purpose balanced-segment consumer (no quote-boundary recovery).
///
/// For use inside double-quoted string interpolation where the outer `"` must
/// act as a recovery boundary, use [`consume_balanced_segment_in_string`] instead.
#[allow(dead_code)] // Recovery helper retained for future interpolation callers.
#[inline]
pub(crate) fn consume_balanced_segment(&mut self, open: char, close: char) -> Option<usize> {
if self.current_char() != Some(open) {
return None;
}
let mut depth = 1usize;
self.advance();
while let Some(ch) = self.current_char() {
match ch {
'\\' => {
self.advance();
if self.current_char().is_some() {
self.advance();
}
}
c if c == open => {
depth += 1;
self.advance();
}
c if c == close => {
self.advance();
depth -= 1;
if depth == 0 {
return Some(self.position);
}
}
_ => self.advance(),
}
}
None
}
#[inline]
pub(crate) fn consume_balanced_segment_in_string(
&mut self,
open: char,
close: char,
terminator: char,
) -> Option<usize> {
if self.current_char() != Some(open) {
return None;
}
let mut depth = 1usize;
self.advance();
while let Some(ch) = self.current_char() {
match ch {
'\\' => {
self.advance();
if self.current_char().is_some() {
self.advance();
}
}
c if c == terminator => {
// Local recovery for interpolation tails in quoted strings:
// stop at the closing quote so the outer string parser can
// still terminate this token cleanly.
return None;
}
c if c == open => {
depth += 1;
self.advance();
}
c if c == close => {
self.advance();
depth -= 1;
if depth == 0 {
return Some(self.position);
}
}
_ => self.advance(),
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn consume_balanced_segment_handles_nested_segments_and_escapes() {
let mut lexer = PerlLexer::new("(a(\\)b)c)");
let end = lexer.consume_balanced_segment('(', ')');
assert_eq!(end, Some(9));
assert_eq!(lexer.position, 9);
}
#[test]
fn consume_balanced_segment_returns_none_for_unbalanced_segment() {
let mut lexer = PerlLexer::new("(a(b)");
let end = lexer.consume_balanced_segment('(', ')');
assert_eq!(end, None);
}
#[test]
fn consume_balanced_segment_in_string_stops_at_terminator_for_recovery() {
let mut lexer = PerlLexer::new("(${foo\"tail");
let end = lexer.consume_balanced_segment_in_string('(', ')', '"');
assert_eq!(end, None);
assert_eq!(lexer.current_char(), Some('"'));
}
}