Skip to main content

antlr4_runtime/
token_stream.rs

1use crate::int_stream::{EOF, IntStream, UNKNOWN_SOURCE_NAME};
2use crate::token::{CommonToken, DEFAULT_CHANNEL, TOKEN_EOF, Token, TokenSource, TokenSourceError};
3
4#[derive(Debug)]
5pub struct CommonTokenStream<S> {
6    source: S,
7    tokens: Vec<CommonToken>,
8    cursor: usize,
9    fetched_eof: bool,
10    channel: i32,
11    source_errors: Vec<TokenSourceError>,
12}
13
14impl<S> CommonTokenStream<S>
15where
16    S: TokenSource,
17{
18    /// Creates a token stream that filters lookahead to the default channel.
19    pub const fn new(source: S) -> Self {
20        Self::with_channel(source, DEFAULT_CHANNEL)
21    }
22
23    /// Creates a token stream whose `LT/LA` operations see only `channel`.
24    pub const fn with_channel(source: S, channel: i32) -> Self {
25        Self {
26            source,
27            tokens: Vec::new(),
28            cursor: 0,
29            fetched_eof: false,
30            channel,
31            source_errors: Vec::new(),
32        }
33    }
34
35    /// Reads tokens from the source until EOF is buffered.
36    pub fn fill(&mut self) {
37        while !self.fetched_eof {
38            self.fetch_one();
39        }
40        self.cursor = self.adjust_seek_index(self.cursor);
41    }
42
43    /// Returns the token at an absolute buffered index, fetching from the source
44    /// as needed.
45    pub fn get(&mut self, index: usize) -> Option<&CommonToken> {
46        self.sync(index);
47        self.tokens.get(index)
48    }
49
50    /// Returns the token at one-based lookahead/lookbehind offset, skipping
51    /// tokens outside the configured channel for positive offsets.
52    pub fn lt(&mut self, offset: isize) -> Option<&CommonToken> {
53        if offset == 0 {
54            return None;
55        }
56        if offset < 0 {
57            return offset
58                .checked_neg()
59                .map(isize::cast_unsigned)
60                .and_then(|offset| self.lb(offset));
61        }
62
63        let mut index = self.next_token_on_channel(self.cursor, self.channel);
64        let mut remaining = offset;
65        while remaining > 1 {
66            index = self.next_token_on_channel(index + 1, self.channel);
67            remaining -= 1;
68        }
69        self.sync(index);
70        self.tokens.get(index)
71    }
72
73    pub fn lb(&self, offset: usize) -> Option<&CommonToken> {
74        if offset == 0 || self.cursor == 0 {
75            return None;
76        }
77        let mut index = self.cursor;
78        let mut remaining = offset;
79        while remaining > 0 {
80            index = self.previous_token_on_channel(index, self.channel)?;
81            remaining -= 1;
82        }
83        self.tokens.get(index)
84    }
85
86    pub const fn token_source(&self) -> &S {
87        &self.source
88    }
89
90    pub fn tokens(&self) -> &[CommonToken] {
91        &self.tokens
92    }
93
94    /// Ensures the buffer contains `index`, unless EOF has already been fetched.
95    fn sync(&mut self, index: usize) -> bool {
96        if index < self.tokens.len() {
97            return true;
98        }
99        let needed = index + 1 - self.tokens.len();
100        self.fetch(needed) >= needed
101    }
102
103    /// Fetches up to `count` more tokens, stopping early at EOF.
104    fn fetch(&mut self, count: usize) -> usize {
105        let mut fetched = 0;
106        while fetched < count && !self.fetched_eof {
107            self.fetch_one();
108            fetched += 1;
109        }
110        fetched
111    }
112
113    fn fetch_one(&mut self) {
114        let mut token = self.source.next_token();
115        self.source_errors.extend(self.source.drain_errors());
116        let token_index = isize::try_from(self.tokens.len()).unwrap_or(isize::MAX);
117        token.set_token_index(token_index);
118        self.fetched_eof = token.token_type() == TOKEN_EOF;
119        self.tokens.push(token);
120    }
121
122    /// Moves a raw token index to the next token visible on this stream's
123    /// channel.
124    fn adjust_seek_index(&mut self, index: usize) -> usize {
125        self.next_token_on_channel(index, self.channel)
126    }
127
128    /// Finds the next buffered token on `channel`, fetching as needed.
129    fn next_token_on_channel(&mut self, mut index: usize, channel: i32) -> usize {
130        self.sync(index);
131        while let Some(token) = self.tokens.get(index) {
132            if token.token_type() == TOKEN_EOF || token.channel() == channel {
133                return index;
134            }
135            index += 1;
136            self.sync(index);
137        }
138        index
139    }
140
141    /// Finds the previous buffered token on `channel`.
142    fn previous_token_on_channel(&self, mut index: usize, channel: i32) -> Option<usize> {
143        while index > 0 {
144            index -= 1;
145            let token = self.tokens.get(index)?;
146            if token.token_type() == TOKEN_EOF || token.channel() == channel {
147                return Some(index);
148            }
149        }
150        None
151    }
152
153    /// Finds the previous buffered token visible to this stream before
154    /// `index`.
155    ///
156    /// Parser rule intervals and `$text` actions are defined in terms of
157    /// visible tokens, but their rendered source text still includes hidden
158    /// tokens between the visible start and stop. Returning the previous token
159    /// on the stream channel avoids accidentally using trailing hidden
160    /// whitespace as the stop token.
161    pub fn previous_visible_token_index(&mut self, index: usize) -> Option<usize> {
162        if index > 0 {
163            self.sync(index - 1);
164        }
165        self.previous_token_on_channel(index, self.channel)
166    }
167}
168
169impl<S> IntStream for CommonTokenStream<S>
170where
171    S: TokenSource,
172{
173    fn consume(&mut self) {
174        if self.la(1) == EOF {
175            return;
176        }
177        let current = self.next_token_on_channel(self.cursor, self.channel);
178        self.cursor = self.adjust_seek_index(current + 1);
179    }
180
181    fn la(&mut self, offset: isize) -> i32 {
182        self.la_token(offset)
183    }
184
185    fn index(&self) -> usize {
186        self.cursor
187    }
188
189    fn seek(&mut self, index: usize) {
190        self.cursor = self.adjust_seek_index(index);
191    }
192
193    fn size(&self) -> usize {
194        self.tokens.len()
195    }
196
197    fn source_name(&self) -> &str {
198        let source_name = self.source.source_name();
199        if source_name.is_empty() {
200            UNKNOWN_SOURCE_NAME
201        } else {
202            source_name
203        }
204    }
205}
206
207impl<S> CommonTokenStream<S>
208where
209    S: TokenSource,
210{
211    pub fn la_token(&mut self, offset: isize) -> i32 {
212        self.lt(offset).map_or(TOKEN_EOF, Token::token_type)
213    }
214
215    /// Returns the token type at a buffered absolute index, fetching from the
216    /// source on demand. Past-EOF reads are reported as `TOKEN_EOF` so the
217    /// caller does not need to special-case the buffer's stop. The cursor is
218    /// not modified, which lets hot speculative loops avoid the seek
219    /// round-trip when they only need lookahead types.
220    pub fn token_type_at_index(&mut self, index: usize) -> i32 {
221        self.sync(index);
222        self.tokens
223            .get(index)
224            .map_or(TOKEN_EOF, Token::token_type)
225    }
226
227    /// Returns the next parser-visible token index after consuming the token
228    /// at `index`, skipping hidden-channel tokens. The parser's stream cursor
229    /// is not modified. Used by speculative recognition that simulates token
230    /// consumption thousands of times without committing it.
231    pub fn next_visible_after(&mut self, index: usize) -> usize {
232        let mut next = index + 1;
233        loop {
234            self.sync(next);
235            match self.tokens.get(next) {
236                Some(token)
237                    if token.token_type() != TOKEN_EOF && token.channel() != self.channel =>
238                {
239                    next += 1;
240                    continue;
241                }
242                _ => return next,
243            }
244        }
245    }
246
247    pub fn text(&mut self, start: usize, stop: usize) -> String {
248        self.sync(stop);
249        if start > stop || start >= self.tokens.len() {
250            return String::new();
251        }
252        self.tokens[start..=stop.min(self.tokens.len().saturating_sub(1))]
253            .iter()
254            .filter_map(Token::text)
255            .collect::<Vec<_>>()
256            .join("")
257    }
258
259    /// Returns and clears diagnostics emitted by the underlying token source
260    /// while this stream was fetching tokens.
261    pub fn drain_source_errors(&mut self) -> Vec<TokenSourceError> {
262        std::mem::take(&mut self.source_errors)
263    }
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use crate::token::{CommonToken, HIDDEN_CHANNEL};
270
271    #[derive(Debug)]
272    struct VecTokenSource {
273        tokens: Vec<CommonToken>,
274        index: usize,
275    }
276
277    impl TokenSource for VecTokenSource {
278        fn next_token(&mut self) -> CommonToken {
279            let token = self
280                .tokens
281                .get(self.index)
282                .cloned()
283                .unwrap_or_else(|| CommonToken::eof("vec", self.index, 1, self.index));
284            self.index += 1;
285            token
286        }
287
288        fn line(&self) -> usize {
289            1
290        }
291
292        fn column(&self) -> usize {
293            self.index
294        }
295
296        fn source_name(&self) -> &'static str {
297            "vec"
298        }
299    }
300
301    #[test]
302    fn stream_skips_hidden_channel_for_lookahead() {
303        let source = VecTokenSource {
304            tokens: vec![
305                CommonToken::new(1).with_text("a"),
306                CommonToken::new(2)
307                    .with_text(" ")
308                    .with_channel(HIDDEN_CHANNEL),
309                CommonToken::new(3).with_text("b"),
310                CommonToken::eof("vec", 3, 1, 3),
311            ],
312            index: 0,
313        };
314        let mut stream = CommonTokenStream::new(source);
315        assert_eq!(stream.la_token(1), 1);
316        stream.consume();
317        assert_eq!(stream.la_token(1), 3);
318        assert_eq!(
319            stream
320                .lt(-1)
321                .expect("look-behind token should be buffered")
322                .token_type(),
323            1
324        );
325    }
326
327    #[test]
328    fn lookahead_skips_hidden_token_at_initial_cursor() {
329        let source = VecTokenSource {
330            tokens: vec![
331                CommonToken::new(2)
332                    .with_text(" ")
333                    .with_channel(HIDDEN_CHANNEL),
334                CommonToken::new(1).with_text("a"),
335                CommonToken::eof("vec", 2, 1, 2),
336            ],
337            index: 0,
338        };
339        let mut stream = CommonTokenStream::new(source);
340
341        assert_eq!(stream.la_token(1), 1);
342        assert_eq!(stream.lt(1).and_then(Token::text), Some("a"));
343        stream.consume();
344        assert_eq!(stream.la_token(1), TOKEN_EOF);
345    }
346
347    #[test]
348    fn text_returns_empty_when_start_is_past_buffer() {
349        let source = VecTokenSource {
350            tokens: vec![
351                CommonToken::new(1).with_text("a"),
352                CommonToken::eof("vec", 1, 1, 1),
353            ],
354            index: 0,
355        };
356        let mut stream = CommonTokenStream::new(source);
357
358        assert_eq!(stream.text(10, 12), "");
359    }
360}