do_not_use_antlr_rust/
input_stream.rs

1//! Input to lexer
2use std::borrow::Cow;
3
4use crate::char_stream::{CharStream, InputData};
5use crate::int_stream::IntStream;
6use std::ops::Deref;
7
8use better_any::TidAble;
9
10/// Default rust target input stream.
11///
12/// Since Rust uses UTF-8 format which does not support indexing by char,
13/// `InputStream<&str>` has slightly different index behavior in compare to java runtime when there are
14/// non-ASCII unicode characters.
15/// If you need it to generate exactly the same indexes as Java runtime, you have to use `CodePoint8/16/32BitCharStream`,
16/// which does not use rusts native `str` type, so it would do additional conversions and allocations along the way.
17#[derive(Debug)]
18pub struct InputStream<Data: Deref> {
19    name: String,
20    data_raw: Data,
21    index: isize,
22}
23
24// #[impl_tid]
25// impl<'a, T: ?Sized + 'static> TidAble<'a> for InputStream<Box<T>> {}
26// #[impl_tid]
27// impl<'a, T: ?Sized + 'static> TidAble<'a> for InputStream<&'a T> {}
28better_any::tid! {impl<'a, T: 'static> TidAble<'a> for InputStream<&'a T> where T: ?Sized}
29better_any::tid! {impl<'a, T: 'static> TidAble<'a> for InputStream<Box<T>> where T: ?Sized}
30
31impl<'a, T: From<&'a str>> CharStream<T> for InputStream<&'a str> {
32    #[inline]
33    fn get_text(&self, start: isize, stop: isize) -> T { self.get_text_inner(start, stop).into() }
34}
35
36impl<T: From<D::Owned>, D: ?Sized + InputData> CharStream<T> for InputStream<Box<D>> {
37    #[inline]
38    fn get_text(&self, start: isize, stop: isize) -> T { self.get_text_owned(start, stop).into() }
39}
40/// `InputStream` over byte slice
41pub type ByteStream<'a> = InputStream<&'a [u8]>;
42/// InputStream which treats the input as a series of Unicode code points that fit into `u8`
43pub type CodePoint8BitCharStream<'a> = InputStream<&'a [u8]>;
44/// InputStream which treats the input as a series of Unicode code points that fit into `u16`
45pub type CodePoint16BitCharStream<'a> = InputStream<&'a [u16]>;
46/// InputStream which treats the input as a series of Unicode code points
47pub type CodePoint32BitCharStream<'a> = InputStream<&'a [u32]>;
48
49impl<'a, T> CharStream<Cow<'a, [T]>> for InputStream<&'a [T]>
50where
51    [T]: InputData,
52{
53    #[inline]
54    fn get_text(&self, a: isize, b: isize) -> Cow<'a, [T]> {
55        Cow::Borrowed(self.get_text_inner(a, b))
56    }
57}
58
59impl<'a, T> CharStream<String> for InputStream<&'a [T]>
60where
61    [T]: InputData,
62{
63    fn get_text(&self, a: isize, b: isize) -> String { self.get_text_inner(a, b).to_display() }
64}
65
66impl<'a, 'b, T> CharStream<Cow<'b, str>> for InputStream<&'a [T]>
67where
68    [T]: InputData,
69{
70    #[inline]
71    fn get_text(&self, a: isize, b: isize) -> Cow<'b, str> {
72        self.get_text_inner(a, b).to_display().into()
73    }
74}
75
76impl<'a, T> CharStream<&'a [T]> for InputStream<&'a [T]>
77where
78    [T]: InputData,
79{
80    #[inline]
81    fn get_text(&self, a: isize, b: isize) -> &'a [T] { self.get_text_inner(a, b) }
82}
83
84impl<Data: ?Sized + InputData> InputStream<Box<Data>> {
85    fn get_text_owned(&self, start: isize, stop: isize) -> Data::Owned {
86        let start = start as usize;
87        let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize;
88
89        if stop < self.data_raw.len() {
90            &self.data_raw[start..stop]
91        } else {
92            &self.data_raw[start..]
93        }
94        .to_owned()
95    }
96
97    /// Creates new `InputStream` over owned data   
98    pub fn new_owned(data: Box<Data>) -> Self {
99        Self {
100            name: "<empty>".to_string(),
101            data_raw: data.into(),
102            index: 0,
103        }
104    }
105}
106
107impl<'a, Data> InputStream<&'a Data>
108where
109    Data: ?Sized + InputData,
110{
111    fn get_text_inner(&self, start: isize, stop: isize) -> &'a Data {
112        // println!("get text {}..{} of {:?}",start,stop,self.data_raw.to_display());
113        let start = start as usize;
114        let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize;
115        // println!("justed range {}..{} ",start,stop);
116        // let start = self.data_raw.offset(0,start).unwrap() as usize;
117        // let stop = self.data_raw.offset(0,stop + 1).unwrap() as usize;
118
119        if stop < self.data_raw.len() {
120            &self.data_raw[start..stop]
121        } else {
122            &self.data_raw[start..]
123        }
124    }
125
126    /// Creates new `InputStream` over borrowed data
127    pub fn new(data_raw: &'a Data) -> Self {
128        // let data_raw = data_raw.as_ref();
129        // let data = data_raw.to_indexed_vec();
130        Self {
131            name: "<empty>".to_string(),
132            data_raw,
133            index: 0,
134            // phantom: Default::default(),
135        }
136    }
137}
138impl<'a, Data: Deref> InputStream<Data>
139where
140    Data::Target: InputData,
141{
142    /// Resets input stream to start from the beginning of this slice
143    #[inline]
144    pub fn reset(&mut self) { self.index = 0 }
145}
146
147impl<'a, Data: Deref> IntStream for InputStream<Data>
148where
149    Data::Target: InputData,
150{
151    #[inline]
152    fn consume(&mut self) {
153        if let Some(index) = self.data_raw.offset(self.index, 1) {
154            self.index = index;
155            // self.current = self.data_raw.deref().item(index).unwrap_or(TOKEN_EOF);
156            // Ok(())
157        } else {
158            panic!("cannot consume EOF");
159        }
160    }
161
162    #[inline]
163    fn la(&mut self, mut offset: isize) -> isize {
164        if offset == 1 {
165            return self
166                .data_raw
167                .item(self.index)
168                .unwrap_or(crate::int_stream::EOF);
169        }
170        if offset == 0 {
171            panic!("should not be called with offset 0");
172        }
173        if offset < 0 {
174            offset += 1; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
175        }
176
177        self.data_raw
178            .offset(self.index, offset - 1)
179            .and_then(|index| self.data_raw.item(index))
180            .unwrap_or(crate::int_stream::EOF)
181    }
182
183    #[inline]
184    fn mark(&mut self) -> isize { -1 }
185
186    #[inline]
187    fn release(&mut self, _marker: isize) {}
188
189    #[inline]
190    fn index(&self) -> isize { self.index }
191
192    #[inline]
193    fn seek(&mut self, index: isize) { self.index = index }
194
195    #[inline]
196    fn size(&self) -> isize { self.data_raw.len() as isize }
197
198    fn get_source_name(&self) -> String { self.name.clone() }
199}
200
201#[cfg(test)]
202mod test {
203    use std::ops::Deref;
204
205    use crate::char_stream::CharStream;
206    use crate::int_stream::{IntStream, EOF};
207
208    use super::InputStream;
209
210    #[test]
211    fn test_str_input_stream() {
212        let mut input = InputStream::new("V1は3");
213        let input = &mut input as &mut dyn CharStream<String>;
214        assert_eq!(input.la(1), 'V' as isize);
215        assert_eq!(input.index(), 0);
216        input.consume();
217        assert_eq!(input.la(1), '1' as isize);
218        assert_eq!(input.la(-1), 'V' as isize);
219        assert_eq!(input.index(), 1);
220        input.consume();
221        assert_eq!(input.la(1), 0x306F);
222        assert_eq!(input.index(), 2);
223        input.consume();
224        assert_eq!(input.index(), 5);
225        assert_eq!(input.la(-2), '1' as isize);
226        assert_eq!(input.la(2), EOF);
227        assert_eq!(input.get_text(1, 1).deref(), "1");
228        assert_eq!(input.get_text(1, 2).deref(), "1は");
229        assert_eq!(input.get_text(2, 2).deref(), "は");
230        assert_eq!(input.get_text(2, 5).deref(), "は3");
231        assert_eq!(input.get_text(5, 5).deref(), "3");
232    }
233
234    #[test]
235    fn test_byte_input_stream() {
236        let mut input = InputStream::new(&b"V\xaa\xbb"[..]);
237        assert_eq!(input.la(1), 'V' as isize);
238        input.seek(2);
239        assert_eq!(input.la(1), 0xBB);
240        assert_eq!(input.index(), 2);
241        let mut input = InputStream::new("は".as_bytes());
242        assert_eq!(input.la(1), 227);
243    }
244}