do_not_use_antlr_rust/
token_stream.rs

1//! `IntStream` that produces tokens for Parser
2use std::borrow::Borrow;
3use std::cmp::min;
4use std::marker::PhantomData;
5
6use crate::char_stream::InputData;
7use crate::int_stream::{IntStream, IterWrapper};
8use crate::token::{OwningToken, Token, TOKEN_EOF, TOKEN_INVALID_TYPE};
9use crate::token_factory::TokenFactory;
10use crate::token_source::TokenSource;
11use better_any::{Tid, TidAble};
12use std::fmt::{Debug, Formatter};
13
14/// An `IntSteam` of `Token`s
15///
16/// Used as an input for `Parser`s
17/// If there is an existing source of tokens, you should implement
18/// `TokenSource`, not `TokenStream`
19pub trait TokenStream<'input>: IntStream {
20    /// Token factory that created tokens in this stream
21    type TF: TokenFactory<'input> + 'input;
22
23    /// Lookahead for tokens, same as `IntSteam::la` but return reference to full token
24    fn lt(&mut self, k: isize) -> Option<&<Self::TF as TokenFactory<'input>>::Tok>;
25    /// Returns reference to token at `index`
26    fn get(&self, index: isize) -> &<Self::TF as TokenFactory<'input>>::Tok;
27
28    /// Token source that produced data for tokens for this stream
29    fn get_token_source(&self) -> &dyn TokenSource<'input, TF = Self::TF>;
30    //    fn set_token_source(&self,source: Box<TokenSource>);
31    /// Get combined text of all tokens in this stream
32    fn get_all_text(&self) -> String { self.get_text_from_interval(0, self.size() - 1) }
33    /// Get combined text of tokens in start..=stop interval
34    fn get_text_from_interval(&self, start: isize, stop: isize) -> String;
35    //    fn get_text_from_rule_context(&self,context: RuleContext) -> String;
36    /// Get combined text of tokens in between `a` and `b`
37    fn get_text_from_tokens<T: Token + ?Sized>(&self, a: &T, b: &T) -> String
38    where
39        Self: Sized,
40    {
41        self.get_text_from_interval(a.get_token_index(), b.get_token_index())
42    }
43}
44
45/// Iterator over tokens in `T`
46#[derive(Debug)]
47pub struct TokenIter<'a, 'input: 'a, T: TokenStream<'input>>(
48    &'a mut T,
49    bool,
50    PhantomData<fn() -> &'input str>,
51);
52
53impl<'a, 'input: 'a, T: TokenStream<'input>> Iterator for TokenIter<'a, 'input, T> {
54    type Item = OwningToken;
55
56    fn next(&mut self) -> Option<Self::Item> {
57        if self.1 {
58            return None;
59        }
60        let result = self.0.lt(1).unwrap().borrow().to_owned();
61        if result.get_token_type() == TOKEN_EOF {
62            self.1 = true;
63        } else {
64            self.0.consume();
65        }
66        Some(result)
67    }
68}
69
70/// Token stream that keeps all data in internal Vec
71pub struct UnbufferedTokenStream<'input, T: TokenSource<'input>> {
72    token_source: T,
73    pub(crate) tokens: Vec<<T::TF as TokenFactory<'input>>::Tok>,
74    //todo prev token for lt(-1)
75    pub(crate) current_token_index: isize,
76    markers_count: isize,
77    pub(crate) p: isize,
78}
79better_any::tid! { impl<'input,T> TidAble<'input> for UnbufferedTokenStream<'input, T> where T: TokenSource<'input>}
80
81impl<'input, T: TokenSource<'input>> Debug for UnbufferedTokenStream<'input, T> {
82    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
83        f.debug_struct("UnbufferedTokenStream")
84            .field("tokens", &self.tokens)
85            .field("current_token_index", &self.current_token_index)
86            .field("markers_count", &self.markers_count)
87            .field("p(buffer index)", &self.p)
88            .finish()
89    }
90}
91
92impl<'input, T: TokenSource<'input>> UnbufferedTokenStream<'input, T> {
93    /// Creates iterator over this token stream
94    pub fn iter(&mut self) -> IterWrapper<'_, Self> { IterWrapper(self) }
95
96    /// Creates iterator over tokens in this token stream
97    pub fn token_iter(&mut self) -> TokenIter<'_, 'input, Self> {
98        TokenIter(self, false, PhantomData)
99    }
100
101    /// Creates token stream that keeps all tokens inside
102    pub fn new_buffered(source: T) -> Self {
103        let mut a = UnbufferedTokenStream::new_unbuffered(source);
104        a.mark();
105        a
106    }
107
108    /// Creates token stream that keeps only tokens required by `mark`
109    pub fn new_unbuffered(source: T) -> Self {
110        UnbufferedTokenStream {
111            token_source: source,
112            tokens: vec![],
113            current_token_index: 0,
114            markers_count: 0,
115            p: 0,
116        }
117    }
118
119    fn sync(&mut self, want: isize) {
120        let need = (self.p + want - 1) - self.tokens.len() as isize + 1;
121        if need > 0 {
122            self.fill(need);
123        }
124    }
125
126    fn get_buffer_start_index(&self) -> isize { self.current_token_index - self.p }
127
128    pub(crate) fn fill(&mut self, need: isize) -> isize {
129        for i in 0..need {
130            if self.tokens.len() > 0
131                && self.tokens.last().unwrap().borrow().get_token_type() == TOKEN_EOF
132            {
133                return i;
134            }
135            let token = self.token_source.next_token();
136            token
137                .borrow()
138                .set_token_index(self.get_buffer_start_index() + self.tokens.len() as isize);
139            self.tokens.push(token);
140        }
141
142        need
143    }
144}
145
146impl<'input, T: TokenSource<'input>> TokenStream<'input> for UnbufferedTokenStream<'input, T> {
147    type TF = T::TF;
148
149    #[inline]
150    fn lt(&mut self, i: isize) -> Option<&<Self::TF as TokenFactory<'input>>::Tok> {
151        if i == -1 {
152            return self.tokens.get(self.p as usize - 1);
153        }
154
155        self.sync(i);
156
157        self.tokens.get((self.p + i - 1) as usize)
158    }
159
160    #[inline]
161    fn get(&self, index: isize) -> &<Self::TF as TokenFactory<'input>>::Tok {
162        &self.tokens[(index - self.get_buffer_start_index()) as usize]
163    }
164
165    fn get_token_source(&self) -> &dyn TokenSource<'input, TF = Self::TF> { &self.token_source }
166
167    fn get_text_from_interval(&self, start: isize, stop: isize) -> String {
168        //        println!("get_text_from_interval {}..{}",start,stop);
169        //        println!("all tokens {:?}",self.tokens.iter().map(|x|x.as_ref().to_owned()).collect::<Vec<OwningToken>>());
170
171        let buffer_start_index = self.get_buffer_start_index();
172        let buffer_stop_index = buffer_start_index + self.tokens.len() as isize - 1;
173        if start < buffer_start_index || stop > buffer_stop_index {
174            panic!(
175                "interval {}..={} not in token buffer window: {}..{}",
176                start, stop, buffer_start_index, buffer_stop_index
177            );
178        }
179
180        let a = start - buffer_start_index;
181        let b = stop - buffer_start_index;
182
183        let mut buf = String::new();
184        for i in a..(b + 1) {
185            let t = self.tokens[i as usize].borrow();
186            if t.get_token_type() == TOKEN_EOF {
187                break;
188            }
189            buf.extend(t.get_text().to_display().chars());
190        }
191
192        return buf;
193    }
194}
195
196impl<'input, T: TokenSource<'input>> IntStream for UnbufferedTokenStream<'input, T> {
197    #[inline]
198    fn consume(&mut self) {
199        if self.la(1) == TOKEN_EOF {
200            panic!("cannot consume EOF");
201        }
202
203        if self.p == self.tokens.len() as isize && self.markers_count == 0 {
204            self.tokens.clear();
205            self.p = -1;
206        }
207
208        self.p += 1;
209        self.current_token_index += 1;
210
211        self.sync(1);
212        // Ok(())
213    }
214
215    #[inline]
216    fn la(&mut self, i: isize) -> isize {
217        self.lt(i)
218            .map(|t| t.borrow().get_token_type())
219            .unwrap_or(TOKEN_INVALID_TYPE)
220    }
221
222    #[inline]
223    fn mark(&mut self) -> isize {
224        self.markers_count += 1;
225        -self.markers_count
226    }
227
228    #[inline]
229    fn release(&mut self, marker: isize) {
230        assert_eq!(marker, -self.markers_count);
231
232        self.markers_count -= 1;
233        if self.markers_count == 0 {
234            if self.p > 0 {
235                self.tokens.drain(0..self.p as usize);
236                //todo drain assembly is almost 2x longer than
237                // unsafe manual copy but need to bench before using unsafe
238                //let new_len = self.tokens.len() - self.p as usize;
239                // unsafe {
240                //     // drop first p elements
241                //     for i in 0..(self.p as usize) {
242                //         drop_in_place(&mut self.tokens[i]);
243                //     }
244                //     // move len-p elements to beginning
245                //     std::intrinsics::copy(
246                //         &self.tokens[self.p as usize],
247                //         &mut self.tokens[0],
248                //         new_len,
249                //     );
250                //     self.tokens.set_len(new_len);
251                // }
252
253                self.p = 0;
254            }
255        }
256    }
257
258    #[inline(always)]
259    fn index(&self) -> isize { self.current_token_index }
260
261    #[inline]
262    fn seek(&mut self, mut index: isize) {
263        if self.current_token_index == index {
264            return;
265        }
266        if index > self.current_token_index {
267            self.sync(index - self.current_token_index);
268            index = min(index, self.get_buffer_start_index() + self.size() + 1);
269        }
270        let i = index - self.get_buffer_start_index();
271        if i < 0 || i >= self.tokens.len() as isize {
272            panic!()
273        }
274
275        self.p = i;
276        self.current_token_index = index;
277    }
278
279    #[inline(always)]
280    fn size(&self) -> isize { self.tokens.len() as isize }
281
282    fn get_source_name(&self) -> String { self.token_source.get_source_name() }
283}