Skip to main content

antlr4_runtime/
token_stream.rs

1use crate::int_stream::{EOF, IntStream, UNKNOWN_SOURCE_NAME};
2use crate::token::{CommonToken, DEFAULT_CHANNEL, TOKEN_EOF, Token, TokenSource, TokenSourceError};
3
4#[derive(Debug)]
5pub struct CommonTokenStream<S> {
6    source: S,
7    tokens: Vec<CommonToken>,
8    cursor: usize,
9    fetched_eof: bool,
10    channel: i32,
11    source_errors: Vec<TokenSourceError>,
12}
13
14impl<S> CommonTokenStream<S>
15where
16    S: TokenSource,
17{
18    /// Creates a token stream that filters lookahead to the default channel.
19    pub const fn new(source: S) -> Self {
20        Self::with_channel(source, DEFAULT_CHANNEL)
21    }
22
23    /// Creates a token stream whose `LT/LA` operations see only `channel`.
24    pub const fn with_channel(source: S, channel: i32) -> Self {
25        Self {
26            source,
27            tokens: Vec::new(),
28            cursor: 0,
29            fetched_eof: false,
30            channel,
31            source_errors: Vec::new(),
32        }
33    }
34
35    /// Reads tokens from the source until EOF is buffered.
36    pub fn fill(&mut self) {
37        while !self.fetched_eof {
38            self.fetch_one();
39        }
40        self.cursor = self.adjust_seek_index(self.cursor);
41    }
42
43    /// Returns the token at an absolute buffered index, fetching from the source
44    /// as needed.
45    pub fn get(&mut self, index: usize) -> Option<&CommonToken> {
46        self.sync(index);
47        self.tokens.get(index)
48    }
49
50    /// Returns the token at one-based lookahead/lookbehind offset, skipping
51    /// tokens outside the configured channel for positive offsets.
52    pub fn lt(&mut self, offset: isize) -> Option<&CommonToken> {
53        if offset == 0 {
54            return None;
55        }
56        if offset < 0 {
57            return offset
58                .checked_neg()
59                .map(isize::cast_unsigned)
60                .and_then(|offset| self.lb(offset));
61        }
62
63        let mut index = self.next_token_on_channel(self.cursor, self.channel);
64        let mut remaining = offset;
65        while remaining > 1 {
66            index = self.next_token_on_channel(index + 1, self.channel);
67            remaining -= 1;
68        }
69        self.sync(index);
70        self.tokens.get(index)
71    }
72
73    pub fn lb(&self, offset: usize) -> Option<&CommonToken> {
74        if offset == 0 || self.cursor == 0 {
75            return None;
76        }
77        let mut index = self.cursor;
78        let mut remaining = offset;
79        while remaining > 0 {
80            index = self.previous_token_on_channel(index, self.channel)?;
81            remaining -= 1;
82        }
83        self.tokens.get(index)
84    }
85
86    pub const fn token_source(&self) -> &S {
87        &self.source
88    }
89
90    pub fn tokens(&self) -> &[CommonToken] {
91        &self.tokens
92    }
93
94    /// Ensures the buffer contains `index`, unless EOF has already been fetched.
95    fn sync(&mut self, index: usize) -> bool {
96        if index < self.tokens.len() {
97            return true;
98        }
99        let needed = index + 1 - self.tokens.len();
100        self.fetch(needed) >= needed
101    }
102
103    /// Fetches up to `count` more tokens, stopping early at EOF.
104    fn fetch(&mut self, count: usize) -> usize {
105        let mut fetched = 0;
106        while fetched < count && !self.fetched_eof {
107            self.fetch_one();
108            fetched += 1;
109        }
110        fetched
111    }
112
113    fn fetch_one(&mut self) {
114        let mut token = self.source.next_token();
115        self.source_errors.extend(self.source.drain_errors());
116        let token_index = isize::try_from(self.tokens.len()).unwrap_or(isize::MAX);
117        token.set_token_index(token_index);
118        self.fetched_eof = token.token_type() == TOKEN_EOF;
119        self.tokens.push(token);
120    }
121
122    /// Moves a raw token index to the next token visible on this stream's
123    /// channel.
124    fn adjust_seek_index(&mut self, index: usize) -> usize {
125        self.next_token_on_channel(index, self.channel)
126    }
127
128    /// Finds the next buffered token on `channel`, fetching as needed.
129    fn next_token_on_channel(&mut self, mut index: usize, channel: i32) -> usize {
130        self.sync(index);
131        while let Some(token) = self.tokens.get(index) {
132            if token.token_type() == TOKEN_EOF || token.channel() == channel {
133                return index;
134            }
135            index += 1;
136            self.sync(index);
137        }
138        index
139    }
140
141    /// Finds the previous buffered token on `channel`.
142    fn previous_token_on_channel(&self, mut index: usize, channel: i32) -> Option<usize> {
143        while index > 0 {
144            index -= 1;
145            let token = self.tokens.get(index)?;
146            if token.token_type() == TOKEN_EOF || token.channel() == channel {
147                return Some(index);
148            }
149        }
150        None
151    }
152
153    /// Finds the previous buffered token visible to this stream before
154    /// `index`.
155    ///
156    /// Parser rule intervals and `$text` actions are defined in terms of
157    /// visible tokens, but their rendered source text still includes hidden
158    /// tokens between the visible start and stop. Returning the previous token
159    /// on the stream channel avoids accidentally using trailing hidden
160    /// whitespace as the stop token.
161    pub fn previous_visible_token_index(&mut self, index: usize) -> Option<usize> {
162        if index > 0 {
163            self.sync(index - 1);
164        }
165        self.previous_token_on_channel(index, self.channel)
166    }
167}
168
169impl<S> IntStream for CommonTokenStream<S>
170where
171    S: TokenSource,
172{
173    fn consume(&mut self) {
174        if self.la(1) == EOF {
175            return;
176        }
177        let current = self.next_token_on_channel(self.cursor, self.channel);
178        self.cursor = self.adjust_seek_index(current + 1);
179    }
180
181    fn la(&mut self, offset: isize) -> i32 {
182        self.la_token(offset)
183    }
184
185    fn index(&self) -> usize {
186        self.cursor
187    }
188
189    fn seek(&mut self, index: usize) {
190        self.cursor = self.adjust_seek_index(index);
191    }
192
193    fn size(&self) -> usize {
194        self.tokens.len()
195    }
196
197    fn source_name(&self) -> &str {
198        let source_name = self.source.source_name();
199        if source_name.is_empty() {
200            UNKNOWN_SOURCE_NAME
201        } else {
202            source_name
203        }
204    }
205}
206
207impl<S> CommonTokenStream<S>
208where
209    S: TokenSource,
210{
211    pub fn la_token(&mut self, offset: isize) -> i32 {
212        self.lt(offset).map_or(TOKEN_EOF, Token::token_type)
213    }
214
215    pub fn text(&mut self, start: usize, stop: usize) -> String {
216        self.sync(stop);
217        if start > stop || start >= self.tokens.len() {
218            return String::new();
219        }
220        self.tokens[start..=stop.min(self.tokens.len().saturating_sub(1))]
221            .iter()
222            .filter_map(Token::text)
223            .collect::<Vec<_>>()
224            .join("")
225    }
226
227    /// Returns and clears diagnostics emitted by the underlying token source
228    /// while this stream was fetching tokens.
229    pub fn drain_source_errors(&mut self) -> Vec<TokenSourceError> {
230        std::mem::take(&mut self.source_errors)
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use crate::token::{CommonToken, HIDDEN_CHANNEL};
238
239    #[derive(Debug)]
240    struct VecTokenSource {
241        tokens: Vec<CommonToken>,
242        index: usize,
243    }
244
245    impl TokenSource for VecTokenSource {
246        fn next_token(&mut self) -> CommonToken {
247            let token = self
248                .tokens
249                .get(self.index)
250                .cloned()
251                .unwrap_or_else(|| CommonToken::eof("vec", self.index, 1, self.index));
252            self.index += 1;
253            token
254        }
255
256        fn line(&self) -> usize {
257            1
258        }
259
260        fn column(&self) -> usize {
261            self.index
262        }
263
264        fn source_name(&self) -> &'static str {
265            "vec"
266        }
267    }
268
269    #[test]
270    fn stream_skips_hidden_channel_for_lookahead() {
271        let source = VecTokenSource {
272            tokens: vec![
273                CommonToken::new(1).with_text("a"),
274                CommonToken::new(2)
275                    .with_text(" ")
276                    .with_channel(HIDDEN_CHANNEL),
277                CommonToken::new(3).with_text("b"),
278                CommonToken::eof("vec", 3, 1, 3),
279            ],
280            index: 0,
281        };
282        let mut stream = CommonTokenStream::new(source);
283        assert_eq!(stream.la_token(1), 1);
284        stream.consume();
285        assert_eq!(stream.la_token(1), 3);
286        assert_eq!(
287            stream
288                .lt(-1)
289                .expect("look-behind token should be buffered")
290                .token_type(),
291            1
292        );
293    }
294
295    #[test]
296    fn lookahead_skips_hidden_token_at_initial_cursor() {
297        let source = VecTokenSource {
298            tokens: vec![
299                CommonToken::new(2)
300                    .with_text(" ")
301                    .with_channel(HIDDEN_CHANNEL),
302                CommonToken::new(1).with_text("a"),
303                CommonToken::eof("vec", 2, 1, 2),
304            ],
305            index: 0,
306        };
307        let mut stream = CommonTokenStream::new(source);
308
309        assert_eq!(stream.la_token(1), 1);
310        assert_eq!(stream.lt(1).and_then(Token::text), Some("a"));
311        stream.consume();
312        assert_eq!(stream.la_token(1), TOKEN_EOF);
313    }
314
315    #[test]
316    fn text_returns_empty_when_start_is_past_buffer() {
317        let source = VecTokenSource {
318            tokens: vec![
319                CommonToken::new(1).with_text("a"),
320                CommonToken::eof("vec", 1, 1, 1),
321            ],
322            index: 0,
323        };
324        let mut stream = CommonTokenStream::new(source);
325
326        assert_eq!(stream.text(10, 12), "");
327    }
328}