Skip to main content

shuck_parser/parser/lexer/
cursor.rs

1use super::*;
2
3impl<'a> Lexer<'a> {
4    /// Create a new bash-profile lexer for the given input.
5    pub fn new(input: &'a str) -> Self {
6        Self::with_max_subst_depth_and_profile(
7            input,
8            DEFAULT_MAX_SUBST_DEPTH,
9            &ShellProfile::native(super::ShellDialect::Bash),
10            None,
11        )
12    }
13
14    /// Create a new lexer with a custom max substitution nesting depth.
15    /// Limits recursion in read_command_subst_into().
16    pub(in crate::parser) fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
17        Self::with_max_subst_depth_and_profile(
18            input,
19            max_depth,
20            &ShellProfile::native(super::ShellDialect::Bash),
21            None,
22        )
23    }
24
25    /// Create a new lexer using the provided shell profile.
26    #[cfg(test)]
27    pub(in crate::parser) fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
28        let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
29            .then(|| ZshOptionTimeline::build(input, shell_profile))
30            .flatten()
31            .map(Arc::new);
32        Self::with_max_subst_depth_and_profile(
33            input,
34            DEFAULT_MAX_SUBST_DEPTH,
35            shell_profile,
36            zsh_timeline,
37        )
38    }
39
40    pub(crate) fn with_max_subst_depth_and_profile(
41        input: &'a str,
42        max_depth: usize,
43        shell_profile: &ShellProfile,
44        zsh_timeline: Option<Arc<ZshOptionTimeline>>,
45    ) -> Self {
46        Self {
47            input,
48            offset: 0,
49            cursor: Cursor::new(input),
50            position_map: PositionMap::new(input),
51            reinject_buf: VecDeque::new(),
52            reinject_resume_offset: None,
53            max_subst_depth: max_depth,
54            initial_zsh_options: shell_profile.zsh_options().cloned(),
55            zsh_timeline,
56            zsh_timeline_index: 0,
57            #[cfg(feature = "benchmarking")]
58            benchmark_counters: None,
59        }
60    }
61
62    pub(in crate::parser) fn position_at_offset(&self, offset: usize) -> Position {
63        self.position_map.position_uncached(offset)
64    }
65
66    pub(in crate::parser) fn current_position(&mut self) -> Position {
67        #[cfg(feature = "benchmarking")]
68        self.maybe_record_current_position_call();
69        self.position_map.position(self.offset)
70    }
71
72    #[cfg(feature = "benchmarking")]
73    pub(crate) fn enable_benchmark_counters(&mut self) {
74        self.benchmark_counters = Some(LexerBenchmarkCounters::default());
75    }
76
77    #[cfg(feature = "benchmarking")]
78    pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
79        self.benchmark_counters.unwrap_or_default()
80    }
81
82    #[cfg(feature = "benchmarking")]
83    pub(in crate::parser) fn maybe_record_current_position_call(&mut self) {
84        if let Some(counters) = &mut self.benchmark_counters {
85            counters.current_position_calls += 1;
86        }
87    }
88
89    pub(in crate::parser) fn sync_offset_to_cursor(&mut self) {
90        if self.reinject_buf.is_empty()
91            && let Some(offset) = self.reinject_resume_offset.take()
92        {
93            self.offset = offset;
94        }
95    }
96
97    /// Get the next token kind from the input.
98    ///
99    /// This skips whitespace and line comments, matching
100    /// [`Lexer::next_lexed_token`]. It is useful for callers that only need the
101    /// token stream shape.
102    pub fn next_token_kind(&mut self) -> Option<TokenKind> {
103        self.next_lexed_token().map(|token| token.kind)
104    }
105
106    pub(in crate::parser) fn peek_char(&mut self) -> Option<char> {
107        self.sync_offset_to_cursor();
108        if let Some(&ch) = self.reinject_buf.front() {
109            Some(ch)
110        } else {
111            self.cursor.first()
112        }
113    }
114
115    pub(in crate::parser) fn advance(&mut self) -> Option<char> {
116        self.sync_offset_to_cursor();
117        let ch = if !self.reinject_buf.is_empty() {
118            self.reinject_buf.pop_front()
119        } else {
120            self.cursor.bump()
121        };
122        if let Some(c) = ch {
123            self.offset += c.len_utf8();
124        }
125        ch
126    }
127
128    pub(in crate::parser) fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
129        self.reinject_buf
130            .iter()
131            .copied()
132            .chain(self.cursor.rest().chars())
133    }
134
135    pub(in crate::parser) fn second_char(&self) -> Option<char> {
136        match self.reinject_buf.len() {
137            0 => self.cursor.second(),
138            1 => self.cursor.first(),
139            _ => self.reinject_buf.get(1).copied(),
140        }
141    }
142
143    pub(in crate::parser) fn third_char(&self) -> Option<char> {
144        match self.reinject_buf.len() {
145            0 => self.cursor.third(),
146            1 => self.cursor.second(),
147            2 => self.cursor.first(),
148            _ => self.reinject_buf.get(2).copied(),
149        }
150    }
151
152    pub(in crate::parser) fn fourth_char(&self) -> Option<char> {
153        match self.reinject_buf.len() {
154            0 => self.cursor.rest().chars().nth(3),
155            1 => self.cursor.third(),
156            2 => self.cursor.second(),
157            3 => self.cursor.first(),
158            _ => self.reinject_buf.get(3).copied(),
159        }
160    }
161
162    pub(in crate::parser) fn consume_source_bytes(&mut self, byte_len: usize) {
163        debug_assert!(self.reinject_buf.is_empty());
164        self.sync_offset_to_cursor();
165        self.offset += byte_len;
166        self.cursor.skip_bytes(byte_len);
167    }
168
169    pub(in crate::parser) fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
170        debug_assert!(self.reinject_buf.is_empty());
171        self.offset += byte_len;
172    }
173
174    pub(in crate::parser) fn consume_ascii_chars(&mut self, count: usize) {
175        if self.reinject_buf.is_empty() {
176            self.consume_source_bytes(count);
177            return;
178        }
179
180        for _ in 0..count {
181            self.advance();
182        }
183    }
184
185    pub(in crate::parser) fn source_horizontal_whitespace_len(&self) -> usize {
186        self.cursor
187            .rest()
188            .as_bytes()
189            .iter()
190            .take_while(|byte| matches!(**byte, b' ' | b'\t'))
191            .count()
192    }
193
194    pub(in crate::parser) fn source_ascii_plain_word_len(&self) -> usize {
195        self.cursor
196            .rest()
197            .as_bytes()
198            .iter()
199            .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
200            .count()
201    }
202
203    pub(in crate::parser) fn find_double_quote_special(source: &str) -> Option<usize> {
204        source
205            .as_bytes()
206            .iter()
207            .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
208    }
209
210    pub(in crate::parser) fn ensure_capture_from_source(
211        &self,
212        capture: &mut Option<String>,
213        start: Position,
214        end: Position,
215    ) {
216        if capture.is_none() {
217            *capture = Some(self.input[start.offset..end.offset].to_string());
218        }
219    }
220
221    pub(in crate::parser) fn push_capture_char(capture: &mut Option<String>, ch: char) {
222        if let Some(text) = capture.as_mut() {
223            text.push(ch);
224        }
225    }
226
227    pub(in crate::parser) fn push_capture_str(capture: &mut Option<String>, text: &str) {
228        if let Some(current) = capture.as_mut() {
229            current.push_str(text);
230        }
231    }
232
233    pub(in crate::parser) fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
234        if let Some(timeline) = self.zsh_timeline.as_ref() {
235            while self.zsh_timeline_index < timeline.entries.len()
236                && timeline.entries[self.zsh_timeline_index].offset <= self.offset
237            {
238                self.zsh_timeline_index += 1;
239            }
240            return if self.zsh_timeline_index == 0 {
241                self.initial_zsh_options.as_ref()
242            } else {
243                Some(&timeline.entries[self.zsh_timeline_index - 1].state)
244            };
245        }
246
247        self.initial_zsh_options.as_ref()
248    }
249
250    pub(in crate::parser) fn comments_enabled(&mut self) -> bool {
251        !self
252            .current_zsh_options()
253            .is_some_and(|options| options.interactive_comments.is_definitely_off())
254    }
255
256    pub(in crate::parser) fn rc_quotes_enabled(&mut self) -> bool {
257        self.current_zsh_options()
258            .is_some_and(|options| options.rc_quotes.is_definitely_on())
259    }
260
261    pub(in crate::parser) fn ignore_braces_enabled(&mut self) -> bool {
262        self.current_zsh_options()
263            .is_some_and(|options| options.ignore_braces.is_definitely_on())
264    }
265
266    pub(in crate::parser) fn ignore_close_braces_enabled(&mut self) -> bool {
267        self.current_zsh_options().is_some_and(|options| {
268            options.ignore_braces.is_definitely_on()
269                || options.ignore_close_braces.is_definitely_on()
270        })
271    }
272
273    pub(in crate::parser) fn brace_ccl_enabled(&mut self) -> bool {
274        self.current_zsh_options()
275            .is_some_and(|options| options.brace_ccl.is_definitely_on())
276    }
277
278    pub(in crate::parser) fn should_treat_hash_as_word_char(&mut self) -> bool {
279        if !self.comments_enabled() {
280            return true;
281        }
282        self.reinject_buf.is_empty()
283            && (self
284                .input
285                .get(..self.offset)
286                .and_then(|prefix| prefix.chars().next_back())
287                .is_some_and(|prev| {
288                    !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
289                })
290                || self.is_inside_unclosed_double_paren_on_line())
291    }
292
293    pub(in crate::parser) fn current_word_text<'b>(
294        &'b self,
295        start: Position,
296        capture: &'b Option<String>,
297    ) -> &'b str {
298        capture
299            .as_deref()
300            .unwrap_or(&self.input[start.offset..self.offset])
301    }
302
303    pub(in crate::parser) fn current_word_surface_is_single_char(
304        &self,
305        start: Position,
306        capture: &Option<String>,
307        target: char,
308    ) -> bool {
309        let text = self.current_word_text(start, capture);
310        if !text.contains('\x00') {
311            let mut encoded = [0; 4];
312            return text == target.encode_utf8(&mut encoded);
313        }
314
315        let mut chars = text.chars().filter(|&ch| ch != '\x00');
316        matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
317    }
318
319    pub(in crate::parser) fn current_word_surface_last_char<'b>(
320        &'b self,
321        start: Position,
322        capture: &'b Option<String>,
323    ) -> Option<char> {
324        self.current_word_text(start, capture)
325            .chars()
326            .rev()
327            .find(|&ch| ch != '\x00')
328    }
329
330    pub(in crate::parser) fn current_word_surface_ends_with_char(
331        &self,
332        start: Position,
333        capture: &Option<String>,
334        target: char,
335    ) -> bool {
336        self.current_word_surface_last_char(start, capture) == Some(target)
337    }
338
339    pub(in crate::parser) fn current_word_surface_ends_with_extglob_prefix(
340        &self,
341        start: Position,
342        capture: &Option<String>,
343    ) -> bool {
344        self.current_word_surface_last_char(start, capture)
345            .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
346    }
347
348    pub(in crate::parser) fn current_word_surface_can_take_zsh_glob_modifier_suffix(
349        &mut self,
350        start: Position,
351        capture: &Option<String>,
352    ) -> bool {
353        if self.current_zsh_options().is_none() || self.peek_char() != Some('(') {
354            return false;
355        }
356
357        let text = self.current_word_text(start, capture);
358        if !text.contains('/') {
359            return false;
360        }
361
362        let mut chars = self.lookahead_chars();
363        matches!((chars.next(), chars.next()), (Some('('), Some(':')))
364    }
365}