antlr4_runtime/
token_stream.rs1use crate::int_stream::{EOF, IntStream, UNKNOWN_SOURCE_NAME};
2use crate::token::{CommonToken, DEFAULT_CHANNEL, TOKEN_EOF, Token, TokenSource, TokenSourceError};
3
4#[derive(Debug)]
5pub struct CommonTokenStream<S> {
6 source: S,
7 tokens: Vec<CommonToken>,
8 cursor: usize,
9 fetched_eof: bool,
10 channel: i32,
11 source_errors: Vec<TokenSourceError>,
12}
13
14impl<S> CommonTokenStream<S>
15where
16 S: TokenSource,
17{
18 pub const fn new(source: S) -> Self {
20 Self::with_channel(source, DEFAULT_CHANNEL)
21 }
22
23 pub const fn with_channel(source: S, channel: i32) -> Self {
25 Self {
26 source,
27 tokens: Vec::new(),
28 cursor: 0,
29 fetched_eof: false,
30 channel,
31 source_errors: Vec::new(),
32 }
33 }
34
35 pub fn fill(&mut self) {
37 while !self.fetched_eof {
38 self.fetch_one();
39 }
40 self.cursor = self.adjust_seek_index(self.cursor);
41 }
42
43 pub fn get(&mut self, index: usize) -> Option<&CommonToken> {
46 self.sync(index);
47 self.tokens.get(index)
48 }
49
50 pub fn lt(&mut self, offset: isize) -> Option<&CommonToken> {
53 if offset == 0 {
54 return None;
55 }
56 if offset < 0 {
57 return offset
58 .checked_neg()
59 .map(isize::cast_unsigned)
60 .and_then(|offset| self.lb(offset));
61 }
62
63 let mut index = self.next_token_on_channel(self.cursor, self.channel);
64 let mut remaining = offset;
65 while remaining > 1 {
66 index = self.next_token_on_channel(index + 1, self.channel);
67 remaining -= 1;
68 }
69 self.sync(index);
70 self.tokens.get(index)
71 }
72
73 pub fn lb(&self, offset: usize) -> Option<&CommonToken> {
74 if offset == 0 || self.cursor == 0 {
75 return None;
76 }
77 let mut index = self.cursor;
78 let mut remaining = offset;
79 while remaining > 0 {
80 index = self.previous_token_on_channel(index, self.channel)?;
81 remaining -= 1;
82 }
83 self.tokens.get(index)
84 }
85
86 pub const fn token_source(&self) -> &S {
87 &self.source
88 }
89
90 pub fn tokens(&self) -> &[CommonToken] {
91 &self.tokens
92 }
93
94 fn sync(&mut self, index: usize) -> bool {
96 if index < self.tokens.len() {
97 return true;
98 }
99 let needed = index + 1 - self.tokens.len();
100 self.fetch(needed) >= needed
101 }
102
103 fn fetch(&mut self, count: usize) -> usize {
105 let mut fetched = 0;
106 while fetched < count && !self.fetched_eof {
107 self.fetch_one();
108 fetched += 1;
109 }
110 fetched
111 }
112
113 fn fetch_one(&mut self) {
114 let mut token = self.source.next_token();
115 self.source_errors.extend(self.source.drain_errors());
116 let token_index = isize::try_from(self.tokens.len()).unwrap_or(isize::MAX);
117 token.set_token_index(token_index);
118 self.fetched_eof = token.token_type() == TOKEN_EOF;
119 self.tokens.push(token);
120 }
121
122 fn adjust_seek_index(&mut self, index: usize) -> usize {
125 self.next_token_on_channel(index, self.channel)
126 }
127
128 fn next_token_on_channel(&mut self, mut index: usize, channel: i32) -> usize {
130 self.sync(index);
131 while let Some(token) = self.tokens.get(index) {
132 if token.token_type() == TOKEN_EOF || token.channel() == channel {
133 return index;
134 }
135 index += 1;
136 self.sync(index);
137 }
138 index
139 }
140
141 fn previous_token_on_channel(&self, mut index: usize, channel: i32) -> Option<usize> {
143 while index > 0 {
144 index -= 1;
145 let token = self.tokens.get(index)?;
146 if token.token_type() == TOKEN_EOF || token.channel() == channel {
147 return Some(index);
148 }
149 }
150 None
151 }
152
153 pub fn previous_visible_token_index(&mut self, index: usize) -> Option<usize> {
162 if index > 0 {
163 self.sync(index - 1);
164 }
165 self.previous_token_on_channel(index, self.channel)
166 }
167}
168
169impl<S> IntStream for CommonTokenStream<S>
170where
171 S: TokenSource,
172{
173 fn consume(&mut self) {
174 if self.la(1) == EOF {
175 return;
176 }
177 let current = self.next_token_on_channel(self.cursor, self.channel);
178 self.cursor = self.adjust_seek_index(current + 1);
179 }
180
181 fn la(&mut self, offset: isize) -> i32 {
182 self.la_token(offset)
183 }
184
185 fn index(&self) -> usize {
186 self.cursor
187 }
188
189 fn seek(&mut self, index: usize) {
190 self.cursor = self.adjust_seek_index(index);
191 }
192
193 fn size(&self) -> usize {
194 self.tokens.len()
195 }
196
197 fn source_name(&self) -> &str {
198 let source_name = self.source.source_name();
199 if source_name.is_empty() {
200 UNKNOWN_SOURCE_NAME
201 } else {
202 source_name
203 }
204 }
205}
206
207impl<S> CommonTokenStream<S>
208where
209 S: TokenSource,
210{
211 pub fn la_token(&mut self, offset: isize) -> i32 {
212 self.lt(offset).map_or(TOKEN_EOF, Token::token_type)
213 }
214
215 pub fn text(&mut self, start: usize, stop: usize) -> String {
216 self.sync(stop);
217 if start > stop || start >= self.tokens.len() {
218 return String::new();
219 }
220 self.tokens[start..=stop.min(self.tokens.len().saturating_sub(1))]
221 .iter()
222 .filter_map(Token::text)
223 .collect::<Vec<_>>()
224 .join("")
225 }
226
227 pub fn drain_source_errors(&mut self) -> Vec<TokenSourceError> {
230 std::mem::take(&mut self.source_errors)
231 }
232}
233
234#[cfg(test)]
235mod tests {
236 use super::*;
237 use crate::token::{CommonToken, HIDDEN_CHANNEL};
238
239 #[derive(Debug)]
240 struct VecTokenSource {
241 tokens: Vec<CommonToken>,
242 index: usize,
243 }
244
245 impl TokenSource for VecTokenSource {
246 fn next_token(&mut self) -> CommonToken {
247 let token = self
248 .tokens
249 .get(self.index)
250 .cloned()
251 .unwrap_or_else(|| CommonToken::eof("vec", self.index, 1, self.index));
252 self.index += 1;
253 token
254 }
255
256 fn line(&self) -> usize {
257 1
258 }
259
260 fn column(&self) -> usize {
261 self.index
262 }
263
264 fn source_name(&self) -> &'static str {
265 "vec"
266 }
267 }
268
269 #[test]
270 fn stream_skips_hidden_channel_for_lookahead() {
271 let source = VecTokenSource {
272 tokens: vec![
273 CommonToken::new(1).with_text("a"),
274 CommonToken::new(2)
275 .with_text(" ")
276 .with_channel(HIDDEN_CHANNEL),
277 CommonToken::new(3).with_text("b"),
278 CommonToken::eof("vec", 3, 1, 3),
279 ],
280 index: 0,
281 };
282 let mut stream = CommonTokenStream::new(source);
283 assert_eq!(stream.la_token(1), 1);
284 stream.consume();
285 assert_eq!(stream.la_token(1), 3);
286 assert_eq!(
287 stream
288 .lt(-1)
289 .expect("look-behind token should be buffered")
290 .token_type(),
291 1
292 );
293 }
294
295 #[test]
296 fn lookahead_skips_hidden_token_at_initial_cursor() {
297 let source = VecTokenSource {
298 tokens: vec![
299 CommonToken::new(2)
300 .with_text(" ")
301 .with_channel(HIDDEN_CHANNEL),
302 CommonToken::new(1).with_text("a"),
303 CommonToken::eof("vec", 2, 1, 2),
304 ],
305 index: 0,
306 };
307 let mut stream = CommonTokenStream::new(source);
308
309 assert_eq!(stream.la_token(1), 1);
310 assert_eq!(stream.lt(1).and_then(Token::text), Some("a"));
311 stream.consume();
312 assert_eq!(stream.la_token(1), TOKEN_EOF);
313 }
314
315 #[test]
316 fn text_returns_empty_when_start_is_past_buffer() {
317 let source = VecTokenSource {
318 tokens: vec![
319 CommonToken::new(1).with_text("a"),
320 CommonToken::eof("vec", 1, 1, 1),
321 ],
322 index: 0,
323 };
324 let mut stream = CommonTokenStream::new(source);
325
326 assert_eq!(stream.text(10, 12), "");
327 }
328}