shuck_parser/parser/lexer/
cursor.rs1use super::*;
2
3impl<'a> Lexer<'a> {
4 pub fn new(input: &'a str) -> Self {
6 Self::with_max_subst_depth_and_profile(
7 input,
8 DEFAULT_MAX_SUBST_DEPTH,
9 &ShellProfile::native(super::ShellDialect::Bash),
10 None,
11 )
12 }
13
14 pub(in crate::parser) fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
17 Self::with_max_subst_depth_and_profile(
18 input,
19 max_depth,
20 &ShellProfile::native(super::ShellDialect::Bash),
21 None,
22 )
23 }
24
25 #[cfg(test)]
27 pub(in crate::parser) fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
28 let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
29 .then(|| ZshOptionTimeline::build(input, shell_profile))
30 .flatten()
31 .map(Arc::new);
32 Self::with_max_subst_depth_and_profile(
33 input,
34 DEFAULT_MAX_SUBST_DEPTH,
35 shell_profile,
36 zsh_timeline,
37 )
38 }
39
40 pub(crate) fn with_max_subst_depth_and_profile(
41 input: &'a str,
42 max_depth: usize,
43 shell_profile: &ShellProfile,
44 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
45 ) -> Self {
46 Self {
47 input,
48 offset: 0,
49 cursor: Cursor::new(input),
50 position_map: PositionMap::new(input),
51 reinject_buf: VecDeque::new(),
52 reinject_resume_offset: None,
53 max_subst_depth: max_depth,
54 initial_zsh_options: shell_profile.zsh_options().cloned(),
55 zsh_timeline,
56 zsh_timeline_index: 0,
57 #[cfg(feature = "benchmarking")]
58 benchmark_counters: None,
59 }
60 }
61
62 pub(in crate::parser) fn position_at_offset(&self, offset: usize) -> Position {
63 self.position_map.position_uncached(offset)
64 }
65
66 pub(in crate::parser) fn current_position(&mut self) -> Position {
67 #[cfg(feature = "benchmarking")]
68 self.maybe_record_current_position_call();
69 self.position_map.position(self.offset)
70 }
71
72 #[cfg(feature = "benchmarking")]
73 pub(crate) fn enable_benchmark_counters(&mut self) {
74 self.benchmark_counters = Some(LexerBenchmarkCounters::default());
75 }
76
77 #[cfg(feature = "benchmarking")]
78 pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
79 self.benchmark_counters.unwrap_or_default()
80 }
81
82 #[cfg(feature = "benchmarking")]
83 pub(in crate::parser) fn maybe_record_current_position_call(&mut self) {
84 if let Some(counters) = &mut self.benchmark_counters {
85 counters.current_position_calls += 1;
86 }
87 }
88
89 pub(in crate::parser) fn sync_offset_to_cursor(&mut self) {
90 if self.reinject_buf.is_empty()
91 && let Some(offset) = self.reinject_resume_offset.take()
92 {
93 self.offset = offset;
94 }
95 }
96
97 pub fn next_token_kind(&mut self) -> Option<TokenKind> {
103 self.next_lexed_token().map(|token| token.kind)
104 }
105
106 pub(in crate::parser) fn peek_char(&mut self) -> Option<char> {
107 self.sync_offset_to_cursor();
108 if let Some(&ch) = self.reinject_buf.front() {
109 Some(ch)
110 } else {
111 self.cursor.first()
112 }
113 }
114
115 pub(in crate::parser) fn advance(&mut self) -> Option<char> {
116 self.sync_offset_to_cursor();
117 let ch = if !self.reinject_buf.is_empty() {
118 self.reinject_buf.pop_front()
119 } else {
120 self.cursor.bump()
121 };
122 if let Some(c) = ch {
123 self.offset += c.len_utf8();
124 }
125 ch
126 }
127
128 pub(in crate::parser) fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
129 self.reinject_buf
130 .iter()
131 .copied()
132 .chain(self.cursor.rest().chars())
133 }
134
135 pub(in crate::parser) fn second_char(&self) -> Option<char> {
136 match self.reinject_buf.len() {
137 0 => self.cursor.second(),
138 1 => self.cursor.first(),
139 _ => self.reinject_buf.get(1).copied(),
140 }
141 }
142
143 pub(in crate::parser) fn third_char(&self) -> Option<char> {
144 match self.reinject_buf.len() {
145 0 => self.cursor.third(),
146 1 => self.cursor.second(),
147 2 => self.cursor.first(),
148 _ => self.reinject_buf.get(2).copied(),
149 }
150 }
151
152 pub(in crate::parser) fn fourth_char(&self) -> Option<char> {
153 match self.reinject_buf.len() {
154 0 => self.cursor.rest().chars().nth(3),
155 1 => self.cursor.third(),
156 2 => self.cursor.second(),
157 3 => self.cursor.first(),
158 _ => self.reinject_buf.get(3).copied(),
159 }
160 }
161
162 pub(in crate::parser) fn consume_source_bytes(&mut self, byte_len: usize) {
163 debug_assert!(self.reinject_buf.is_empty());
164 self.sync_offset_to_cursor();
165 self.offset += byte_len;
166 self.cursor.skip_bytes(byte_len);
167 }
168
169 pub(in crate::parser) fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
170 debug_assert!(self.reinject_buf.is_empty());
171 self.offset += byte_len;
172 }
173
174 pub(in crate::parser) fn consume_ascii_chars(&mut self, count: usize) {
175 if self.reinject_buf.is_empty() {
176 self.consume_source_bytes(count);
177 return;
178 }
179
180 for _ in 0..count {
181 self.advance();
182 }
183 }
184
185 pub(in crate::parser) fn source_horizontal_whitespace_len(&self) -> usize {
186 self.cursor
187 .rest()
188 .as_bytes()
189 .iter()
190 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
191 .count()
192 }
193
194 pub(in crate::parser) fn source_ascii_plain_word_len(&self) -> usize {
195 self.cursor
196 .rest()
197 .as_bytes()
198 .iter()
199 .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
200 .count()
201 }
202
203 pub(in crate::parser) fn find_double_quote_special(source: &str) -> Option<usize> {
204 source
205 .as_bytes()
206 .iter()
207 .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
208 }
209
210 pub(in crate::parser) fn ensure_capture_from_source(
211 &self,
212 capture: &mut Option<String>,
213 start: Position,
214 end: Position,
215 ) {
216 if capture.is_none() {
217 *capture = Some(self.input[start.offset..end.offset].to_string());
218 }
219 }
220
221 pub(in crate::parser) fn push_capture_char(capture: &mut Option<String>, ch: char) {
222 if let Some(text) = capture.as_mut() {
223 text.push(ch);
224 }
225 }
226
227 pub(in crate::parser) fn push_capture_str(capture: &mut Option<String>, text: &str) {
228 if let Some(current) = capture.as_mut() {
229 current.push_str(text);
230 }
231 }
232
233 pub(in crate::parser) fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
234 if let Some(timeline) = self.zsh_timeline.as_ref() {
235 while self.zsh_timeline_index < timeline.entries.len()
236 && timeline.entries[self.zsh_timeline_index].offset <= self.offset
237 {
238 self.zsh_timeline_index += 1;
239 }
240 return if self.zsh_timeline_index == 0 {
241 self.initial_zsh_options.as_ref()
242 } else {
243 Some(&timeline.entries[self.zsh_timeline_index - 1].state)
244 };
245 }
246
247 self.initial_zsh_options.as_ref()
248 }
249
250 pub(in crate::parser) fn comments_enabled(&mut self) -> bool {
251 !self
252 .current_zsh_options()
253 .is_some_and(|options| options.interactive_comments.is_definitely_off())
254 }
255
256 pub(in crate::parser) fn rc_quotes_enabled(&mut self) -> bool {
257 self.current_zsh_options()
258 .is_some_and(|options| options.rc_quotes.is_definitely_on())
259 }
260
261 pub(in crate::parser) fn ignore_braces_enabled(&mut self) -> bool {
262 self.current_zsh_options()
263 .is_some_and(|options| options.ignore_braces.is_definitely_on())
264 }
265
266 pub(in crate::parser) fn ignore_close_braces_enabled(&mut self) -> bool {
267 self.current_zsh_options().is_some_and(|options| {
268 options.ignore_braces.is_definitely_on()
269 || options.ignore_close_braces.is_definitely_on()
270 })
271 }
272
273 pub(in crate::parser) fn brace_ccl_enabled(&mut self) -> bool {
274 self.current_zsh_options()
275 .is_some_and(|options| options.brace_ccl.is_definitely_on())
276 }
277
278 pub(in crate::parser) fn should_treat_hash_as_word_char(&mut self) -> bool {
279 if !self.comments_enabled() {
280 return true;
281 }
282 self.reinject_buf.is_empty()
283 && (self
284 .input
285 .get(..self.offset)
286 .and_then(|prefix| prefix.chars().next_back())
287 .is_some_and(|prev| {
288 !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
289 })
290 || self.is_inside_unclosed_double_paren_on_line())
291 }
292
293 pub(in crate::parser) fn current_word_text<'b>(
294 &'b self,
295 start: Position,
296 capture: &'b Option<String>,
297 ) -> &'b str {
298 capture
299 .as_deref()
300 .unwrap_or(&self.input[start.offset..self.offset])
301 }
302
303 pub(in crate::parser) fn current_word_surface_is_single_char(
304 &self,
305 start: Position,
306 capture: &Option<String>,
307 target: char,
308 ) -> bool {
309 let text = self.current_word_text(start, capture);
310 if !text.contains('\x00') {
311 let mut encoded = [0; 4];
312 return text == target.encode_utf8(&mut encoded);
313 }
314
315 let mut chars = text.chars().filter(|&ch| ch != '\x00');
316 matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
317 }
318
319 pub(in crate::parser) fn current_word_surface_last_char<'b>(
320 &'b self,
321 start: Position,
322 capture: &'b Option<String>,
323 ) -> Option<char> {
324 self.current_word_text(start, capture)
325 .chars()
326 .rev()
327 .find(|&ch| ch != '\x00')
328 }
329
330 pub(in crate::parser) fn current_word_surface_ends_with_char(
331 &self,
332 start: Position,
333 capture: &Option<String>,
334 target: char,
335 ) -> bool {
336 self.current_word_surface_last_char(start, capture) == Some(target)
337 }
338
339 pub(in crate::parser) fn current_word_surface_ends_with_extglob_prefix(
340 &self,
341 start: Position,
342 capture: &Option<String>,
343 ) -> bool {
344 self.current_word_surface_last_char(start, capture)
345 .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
346 }
347
348 pub(in crate::parser) fn current_word_surface_can_take_zsh_glob_modifier_suffix(
349 &mut self,
350 start: Position,
351 capture: &Option<String>,
352 ) -> bool {
353 if self.current_zsh_options().is_none() || self.peek_char() != Some('(') {
354 return false;
355 }
356
357 let text = self.current_word_text(start, capture);
358 if !text.contains('/') {
359 return false;
360 }
361
362 let mut chars = self.lookahead_chars();
363 matches!((chars.next(), chars.next()), (Some('('), Some(':')))
364 }
365}