Skip to main content

antlr4_runtime/
token_stream.rs

1use crate::int_stream::{EOF, IntStream, UNKNOWN_SOURCE_NAME};
2use crate::token::{CommonToken, DEFAULT_CHANNEL, TOKEN_EOF, Token, TokenSource, TokenSourceError};
3
4#[derive(Debug)]
5pub struct CommonTokenStream<S> {
6    source: S,
7    tokens: Vec<CommonToken>,
8    cursor: usize,
9    fetched_eof: bool,
10    channel: i32,
11    source_errors: Vec<TokenSourceError>,
12}
13
14impl<S> CommonTokenStream<S>
15where
16    S: TokenSource,
17{
18    /// Creates a token stream that filters lookahead to the default channel.
19    pub const fn new(source: S) -> Self {
20        Self::with_channel(source, DEFAULT_CHANNEL)
21    }
22
23    /// Creates a token stream whose `LT/LA` operations see only `channel`.
24    pub const fn with_channel(source: S, channel: i32) -> Self {
25        Self {
26            source,
27            tokens: Vec::new(),
28            cursor: 0,
29            fetched_eof: false,
30            channel,
31            source_errors: Vec::new(),
32        }
33    }
34
35    /// Reads tokens from the source until EOF is buffered.
36    pub fn fill(&mut self) {
37        while !self.fetched_eof {
38            self.fetch_one();
39        }
40        self.cursor = self.adjust_seek_index(self.cursor);
41    }
42
43    /// Returns the token at an absolute buffered index, fetching from the source
44    /// as needed.
45    pub fn get(&mut self, index: usize) -> Option<&CommonToken> {
46        self.sync(index);
47        self.tokens.get(index)
48    }
49
50    /// Returns the token at one-based lookahead/lookbehind offset, skipping
51    /// tokens outside the configured channel for positive offsets.
52    pub fn lt(&mut self, offset: isize) -> Option<&CommonToken> {
53        if offset == 0 {
54            return None;
55        }
56        if offset < 0 {
57            return offset
58                .checked_neg()
59                .map(isize::cast_unsigned)
60                .and_then(|offset| self.lb(offset));
61        }
62
63        let mut index = self.cursor;
64        let mut remaining = offset;
65        while remaining > 1 {
66            index = self.next_token_on_channel(index + 1, self.channel);
67            remaining -= 1;
68        }
69        self.sync(index);
70        self.tokens.get(index)
71    }
72
73    pub fn lb(&self, offset: usize) -> Option<&CommonToken> {
74        if offset == 0 || self.cursor == 0 {
75            return None;
76        }
77        let mut index = self.cursor;
78        let mut remaining = offset;
79        while remaining > 0 {
80            index = self.previous_token_on_channel(index, self.channel)?;
81            remaining -= 1;
82        }
83        self.tokens.get(index)
84    }
85
86    pub const fn token_source(&self) -> &S {
87        &self.source
88    }
89
90    pub fn tokens(&self) -> &[CommonToken] {
91        &self.tokens
92    }
93
94    /// Ensures the buffer contains `index`, unless EOF has already been fetched.
95    fn sync(&mut self, index: usize) -> bool {
96        if index < self.tokens.len() {
97            return true;
98        }
99        let needed = index + 1 - self.tokens.len();
100        self.fetch(needed) >= needed
101    }
102
103    /// Fetches up to `count` more tokens, stopping early at EOF.
104    fn fetch(&mut self, count: usize) -> usize {
105        let mut fetched = 0;
106        while fetched < count && !self.fetched_eof {
107            self.fetch_one();
108            fetched += 1;
109        }
110        fetched
111    }
112
113    fn fetch_one(&mut self) {
114        let mut token = self.source.next_token();
115        self.source_errors.extend(self.source.drain_errors());
116        let token_index = isize::try_from(self.tokens.len()).unwrap_or(isize::MAX);
117        token.set_token_index(token_index);
118        self.fetched_eof = token.token_type() == TOKEN_EOF;
119        self.tokens.push(token);
120    }
121
122    /// Moves a raw token index to the next token visible on this stream's
123    /// channel.
124    fn adjust_seek_index(&mut self, index: usize) -> usize {
125        self.next_token_on_channel(index, self.channel)
126    }
127
128    /// Finds the next buffered token on `channel`, fetching as needed.
129    fn next_token_on_channel(&mut self, mut index: usize, channel: i32) -> usize {
130        self.sync(index);
131        while let Some(token) = self.tokens.get(index) {
132            if token.token_type() == TOKEN_EOF || token.channel() == channel {
133                return index;
134            }
135            index += 1;
136            self.sync(index);
137        }
138        index
139    }
140
141    /// Finds the previous buffered token on `channel`.
142    fn previous_token_on_channel(&self, mut index: usize, channel: i32) -> Option<usize> {
143        while index > 0 {
144            index -= 1;
145            let token = self.tokens.get(index)?;
146            if token.token_type() == TOKEN_EOF || token.channel() == channel {
147                return Some(index);
148            }
149        }
150        None
151    }
152
153    /// Finds the previous buffered token visible to this stream before
154    /// `index`.
155    ///
156    /// Parser rule intervals and `$text` actions are defined in terms of
157    /// visible tokens, but their rendered source text still includes hidden
158    /// tokens between the visible start and stop. Returning the previous token
159    /// on the stream channel avoids accidentally using trailing hidden
160    /// whitespace as the stop token.
161    pub fn previous_visible_token_index(&mut self, index: usize) -> Option<usize> {
162        if index > 0 {
163            self.sync(index - 1);
164        }
165        self.previous_token_on_channel(index, self.channel)
166    }
167}
168
169impl<S> IntStream for CommonTokenStream<S>
170where
171    S: TokenSource,
172{
173    fn consume(&mut self) {
174        if self.la(1) == EOF {
175            return;
176        }
177        self.cursor = self.adjust_seek_index(self.cursor + 1);
178    }
179
180    fn la(&mut self, offset: isize) -> i32 {
181        self.la_token(offset)
182    }
183
184    fn index(&self) -> usize {
185        self.cursor
186    }
187
188    fn seek(&mut self, index: usize) {
189        self.cursor = self.adjust_seek_index(index);
190    }
191
192    fn size(&self) -> usize {
193        self.tokens.len()
194    }
195
196    fn source_name(&self) -> &str {
197        let source_name = self.source.source_name();
198        if source_name.is_empty() {
199            UNKNOWN_SOURCE_NAME
200        } else {
201            source_name
202        }
203    }
204}
205
206impl<S> CommonTokenStream<S>
207where
208    S: TokenSource,
209{
210    pub fn la_token(&mut self, offset: isize) -> i32 {
211        self.lt(offset).map_or(TOKEN_EOF, Token::token_type)
212    }
213
214    pub fn text(&mut self, start: usize, stop: usize) -> String {
215        self.sync(stop);
216        if start > stop {
217            return String::new();
218        }
219        self.tokens[start..=stop.min(self.tokens.len().saturating_sub(1))]
220            .iter()
221            .filter_map(Token::text)
222            .collect::<Vec<_>>()
223            .join("")
224    }
225
226    /// Returns and clears diagnostics emitted by the underlying token source
227    /// while this stream was fetching tokens.
228    pub fn drain_source_errors(&mut self) -> Vec<TokenSourceError> {
229        std::mem::take(&mut self.source_errors)
230    }
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236    use crate::token::{CommonToken, HIDDEN_CHANNEL};
237
238    #[derive(Debug)]
239    struct VecTokenSource {
240        tokens: Vec<CommonToken>,
241        index: usize,
242    }
243
244    impl TokenSource for VecTokenSource {
245        fn next_token(&mut self) -> CommonToken {
246            let token = self
247                .tokens
248                .get(self.index)
249                .cloned()
250                .unwrap_or_else(|| CommonToken::eof("vec", self.index, 1, self.index));
251            self.index += 1;
252            token
253        }
254
255        fn line(&self) -> usize {
256            1
257        }
258
259        fn column(&self) -> usize {
260            self.index
261        }
262
263        fn source_name(&self) -> &'static str {
264            "vec"
265        }
266    }
267
268    #[test]
269    fn stream_skips_hidden_channel_for_lookahead() {
270        let source = VecTokenSource {
271            tokens: vec![
272                CommonToken::new(1).with_text("a"),
273                CommonToken::new(2)
274                    .with_text(" ")
275                    .with_channel(HIDDEN_CHANNEL),
276                CommonToken::new(3).with_text("b"),
277                CommonToken::eof("vec", 3, 1, 3),
278            ],
279            index: 0,
280        };
281        let mut stream = CommonTokenStream::new(source);
282        assert_eq!(stream.la_token(1), 1);
283        stream.consume();
284        assert_eq!(stream.la_token(1), 3);
285        assert_eq!(
286            stream
287                .lt(-1)
288                .expect("look-behind token should be buffered")
289                .token_type(),
290            1
291        );
292    }
293}