antlr_rust_sleagon/
input_stream.rs

1//! Input to lexer
2use std::borrow::Cow;
3
4use crate::char_stream::{CharStream, InputData};
5use crate::int_stream::IntStream;
6use std::ops::Deref;
7
8use better_any::TidAble;
9
10/// Default rust target input stream.
11///
12/// Since Rust uses UTF-8 format which does not support indexing by char,
13/// `InputStream<&str>` has slightly different index behavior in compare to java runtime when there are
14/// non-ASCII unicode characters.
15/// If you need it to generate exactly the same indexes as Java runtime, you have to use `CodePoint8/16/32BitCharStream`,
16/// which does not use rusts native `str` type, so it would do additional conversions and allocations along the way.
17#[derive(Debug)]
18pub struct InputStream<Data: Deref> {
19    name: String,
20    data_raw: Data,
21    index: isize,
22}
23
24// #[impl_tid]
25// impl<'a, T: ?Sized + 'static> TidAble<'a> for InputStream<Box<T>> {}
26// #[impl_tid]
27// impl<'a, T: ?Sized + 'static> TidAble<'a> for InputStream<&'a T> {}
28better_any::tid! {impl<'a, T: 'static> TidAble<'a> for InputStream<&'a T> where T: ?Sized}
29better_any::tid! {impl<'a, T: 'static> TidAble<'a> for InputStream<Box<T>> where T: ?Sized}
30
31impl<'a, T: From<&'a str>> CharStream<T> for InputStream<&'a str> {
32    #[inline]
33    fn get_text(&self, start: isize, stop: isize) -> T {
34        self.get_text_inner(start, stop).into()
35    }
36}
37
38impl<T: From<D::Owned>, D: ?Sized + InputData> CharStream<T> for InputStream<Box<D>> {
39    #[inline]
40    fn get_text(&self, start: isize, stop: isize) -> T {
41        self.get_text_owned(start, stop).into()
42    }
43}
44/// `InputStream` over byte slice
45pub type ByteStream<'a> = InputStream<&'a [u8]>;
46/// InputStream which treats the input as a series of Unicode code points that fit into `u8`
47pub type CodePoint8BitCharStream<'a> = InputStream<&'a [u8]>;
48/// InputStream which treats the input as a series of Unicode code points that fit into `u16`
49pub type CodePoint16BitCharStream<'a> = InputStream<&'a [u16]>;
50/// InputStream which treats the input as a series of Unicode code points
51pub type CodePoint32BitCharStream<'a> = InputStream<&'a [u32]>;
52
53impl<'a, T> CharStream<Cow<'a, [T]>> for InputStream<&'a [T]>
54where
55    [T]: InputData,
56{
57    #[inline]
58    fn get_text(&self, a: isize, b: isize) -> Cow<'a, [T]> {
59        Cow::Borrowed(self.get_text_inner(a, b))
60    }
61}
62
63impl<'a, T> CharStream<String> for InputStream<&'a [T]>
64where
65    [T]: InputData,
66{
67    fn get_text(&self, a: isize, b: isize) -> String {
68        self.get_text_inner(a, b).to_display()
69    }
70}
71
72impl<'a, 'b, T> CharStream<Cow<'b, str>> for InputStream<&'a [T]>
73where
74    [T]: InputData,
75{
76    #[inline]
77    fn get_text(&self, a: isize, b: isize) -> Cow<'b, str> {
78        self.get_text_inner(a, b).to_display().into()
79    }
80}
81
82impl<'a, T> CharStream<&'a [T]> for InputStream<&'a [T]>
83where
84    [T]: InputData,
85{
86    #[inline]
87    fn get_text(&self, a: isize, b: isize) -> &'a [T] {
88        self.get_text_inner(a, b)
89    }
90}
91
92impl<Data: ?Sized + InputData> InputStream<Box<Data>> {
93    fn get_text_owned(&self, start: isize, stop: isize) -> Data::Owned {
94        let start = start as usize;
95        let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize;
96
97        if stop < self.data_raw.len() {
98            &self.data_raw[start..stop]
99        } else {
100            &self.data_raw[start..]
101        }
102        .to_owned()
103    }
104
105    /// Creates new `InputStream` over owned data   
106    pub fn new_owned(data: Box<Data>) -> Self {
107        Self {
108            name: "<empty>".to_string(),
109            data_raw: data.into(),
110            index: 0,
111        }
112    }
113}
114
115impl<'a, Data> InputStream<&'a Data>
116where
117    Data: ?Sized + InputData,
118{
119    fn get_text_inner(&self, start: isize, stop: isize) -> &'a Data {
120        // println!("get text {}..{} of {:?}",start,stop,self.data_raw.to_display());
121        let start = start as usize;
122        let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize;
123        // println!("justed range {}..{} ",start,stop);
124        // let start = self.data_raw.offset(0,start).unwrap() as usize;
125        // let stop = self.data_raw.offset(0,stop + 1).unwrap() as usize;
126
127        if stop < self.data_raw.len() {
128            &self.data_raw[start..stop]
129        } else {
130            &self.data_raw[start..]
131        }
132    }
133
134    /// Creates new `InputStream` over borrowed data
135    pub fn new(data_raw: &'a Data) -> Self {
136        // let data_raw = data_raw.as_ref();
137        // let data = data_raw.to_indexed_vec();
138        Self {
139            name: "<empty>".to_string(),
140            data_raw,
141            index: 0,
142            // phantom: Default::default(),
143        }
144    }
145}
146impl<'a, Data: Deref> InputStream<Data>
147where
148    Data::Target: InputData,
149{
150    /// Resets input stream to start from the beginning of this slice
151    #[inline]
152    pub fn reset(&mut self) {
153        self.index = 0
154    }
155}
156
157impl<'a, Data: Deref> IntStream for InputStream<Data>
158where
159    Data::Target: InputData,
160{
161    #[inline]
162    fn consume(&mut self) {
163        if let Some(index) = self.data_raw.offset(self.index, 1) {
164            self.index = index;
165            // self.current = self.data_raw.deref().item(index).unwrap_or(TOKEN_EOF);
166            // Ok(())
167        } else {
168            panic!("cannot consume EOF");
169        }
170    }
171
172    #[inline]
173    fn la(&mut self, mut offset: isize) -> isize {
174        if offset == 1 {
175            return self
176                .data_raw
177                .item(self.index)
178                .unwrap_or(crate::int_stream::EOF);
179        }
180        if offset == 0 {
181            panic!("should not be called with offset 0");
182        }
183        if offset < 0 {
184            offset += 1; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
185        }
186
187        self.data_raw
188            .offset(self.index, offset - 1)
189            .and_then(|index| self.data_raw.item(index))
190            .unwrap_or(crate::int_stream::EOF)
191    }
192
193    #[inline]
194    fn mark(&mut self) -> isize {
195        -1
196    }
197
198    #[inline]
199    fn release(&mut self, _marker: isize) {}
200
201    #[inline]
202    fn index(&self) -> isize {
203        self.index
204    }
205
206    #[inline]
207    fn seek(&mut self, index: isize) {
208        self.index = index
209    }
210
211    #[inline]
212    fn size(&self) -> isize {
213        self.data_raw.len() as isize
214    }
215
216    fn get_source_name(&self) -> String {
217        self.name.clone()
218    }
219}
220
221#[cfg(test)]
222mod test {
223    use std::ops::Deref;
224
225    use crate::char_stream::CharStream;
226    use crate::int_stream::{IntStream, EOF};
227
228    use super::InputStream;
229
230    #[test]
231    fn test_str_input_stream() {
232        let mut input = InputStream::new("V1は3");
233        let input = &mut input as &mut dyn CharStream<String>;
234        assert_eq!(input.la(1), 'V' as isize);
235        assert_eq!(input.index(), 0);
236        input.consume();
237        assert_eq!(input.la(1), '1' as isize);
238        assert_eq!(input.la(-1), 'V' as isize);
239        assert_eq!(input.index(), 1);
240        input.consume();
241        assert_eq!(input.la(1), 0x306F);
242        assert_eq!(input.index(), 2);
243        input.consume();
244        assert_eq!(input.index(), 5);
245        assert_eq!(input.la(-2), '1' as isize);
246        assert_eq!(input.la(2), EOF);
247        assert_eq!(input.get_text(1, 1).deref(), "1");
248        assert_eq!(input.get_text(1, 2).deref(), "1は");
249        assert_eq!(input.get_text(2, 2).deref(), "は");
250        assert_eq!(input.get_text(2, 5).deref(), "は3");
251        assert_eq!(input.get_text(5, 5).deref(), "3");
252    }
253
254    #[test]
255    fn test_byte_input_stream() {
256        let mut input = InputStream::new(&b"V\xaa\xbb"[..]);
257        assert_eq!(input.la(1), 'V' as isize);
258        input.seek(2);
259        assert_eq!(input.la(1), 0xBB);
260        assert_eq!(input.index(), 2);
261        let mut input = InputStream::new("は".as_bytes());
262        assert_eq!(input.la(1), 227);
263    }
264}