Skip to main content

dbt_antlr4/
input_stream.rs

1//! Input to lexer
2use std::borrow::Cow;
3
4use crate::char_stream::{CharStream, InputData};
5use crate::int_stream::IntStream;
6
7/// Default rust target input stream.
8///
9/// Since Rust uses UTF-8 format which does not support indexing by char,
10/// `InputStream<&str>` has slightly different index behavior in compare to java runtime when there are
11/// non-ASCII unicode characters.
12/// If you need it to generate exactly the same indexes as Java runtime, you have to use `CodePoint8/16/32BitCharStream`,
13/// which does not use rusts native `str` type, so it would do additional conversions and allocations along the way.
14#[derive(Debug)]
15pub struct InputStream<'input> {
16    name: String,
17    data_raw: &'input str,
18    index: isize,
19}
20
21impl<'input> CharStream<'input> for InputStream<'input> {
22    #[inline]
23    fn get_text(&self, start: isize, stop: isize) -> Cow<'input, str> {
24        self.get_text_inner(start, stop).into()
25    }
26}
27
28// /// `InputStream` over byte slice
29// pub type ByteStream<'a> = InputStream<&'a [u8]>;
30// /// InputStream which treats the input as a series of Unicode code points that fit into `u8`
31// pub type CodePoint8BitCharStream<'a> = InputStream<&'a [u8]>;
32// /// InputStream which treats the input as a series of Unicode code points that fit into `u16`
33// pub type CodePoint16BitCharStream<'a> = InputStream<&'a [u16]>;
34// /// InputStream which treats the input as a series of Unicode code points
35// pub type CodePoint32BitCharStream<'a> = InputStream<&'a [u32]>;
36
37// impl<'a, T> CharStream<Cow<'a, [T]>> for InputStream<&'a [T]>
38// where
39//     [T]: InputData,
40// {
41//     #[inline]
42//     fn get_text(&self, a: isize, b: isize) -> Cow<'a, [T]> {
43//         Cow::Borrowed(self.get_text_inner(a, b))
44//     }
45// }
46
47// impl<T> CharStream<String> for InputStream<&[T]>
48// where
49//     [T]: InputData,
50// {
51//     fn get_text(&self, a: isize, b: isize) -> String {
52//         self.get_text_inner(a, b).to_display()
53//     }
54// }
55
56// impl<'b, T> CharStream<Cow<'b, str>> for InputStream<&[T]>
57// where
58//     [T]: InputData,
59// {
60//     #[inline]
61//     fn get_text(&self, a: isize, b: isize) -> Cow<'b, str> {
62//         self.get_text_inner(a, b).to_display().into()
63//     }
64// }
65
66// impl<'a, T> CharStream<&'a [T]> for InputStream<&'a [T]>
67// where
68//     [T]: InputData,
69// {
70//     #[inline]
71//     fn get_text(&self, a: isize, b: isize) -> &'a [T] {
72//         self.get_text_inner(a, b)
73//     }
74// }
75
76// impl<Data: ?Sized + InputData> InputStream<Box<Data>> {
77//     fn get_text_owned(&self, start: isize, stop: isize) -> Data::Owned {
78//         let start = start as usize;
79//         let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize;
80
81//         if stop < self.data_raw.len() {
82//             &self.data_raw[start..stop]
83//         } else {
84//             &self.data_raw[start..]
85//         }
86//         .to_owned()
87//     }
88
89//     /// Creates new `InputStream` over owned data
90//     pub fn new_owned(data: Box<Data>) -> Self {
91//         Self {
92//             name: "<empty>".to_string(),
93//             data_raw: data,
94//             index: 0,
95//         }
96//     }
97// }
98
99impl<'input> InputStream<'input> {
100    fn get_text_inner(&self, start: isize, stop: isize) -> &'input str {
101        // println!("get text {}..{} of {:?}",start,stop,self.data_raw.to_display());
102        let start = start as usize;
103        let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize;
104        // println!("justed range {}..{} ",start,stop);
105        // let start = self.data_raw.offset(0,start).unwrap() as usize;
106        // let stop = self.data_raw.offset(0,stop + 1).unwrap() as usize;
107
108        if stop < self.data_raw.len() {
109            &self.data_raw[start..stop]
110        } else {
111            &self.data_raw[start..]
112        }
113    }
114
115    /// Creates new `InputStream` over borrowed data
116    pub fn new(data_raw: &'input str) -> Self {
117        // let data_raw = data_raw.as_ref();
118        // let data = data_raw.to_indexed_vec();
119        Self {
120            name: "<empty>".to_string(),
121            data_raw,
122            index: 0,
123            // phantom: Default::default(),
124        }
125    }
126
127    /// Resets input stream to start from the beginning of this slice
128    #[inline]
129    pub fn reset(&mut self) {
130        self.index = 0
131    }
132}
133
134impl IntStream for InputStream<'_> {
135    #[inline]
136    fn consume(&mut self) {
137        if let Some(index) = self.data_raw.offset(self.index, 1) {
138            self.index = index;
139            // self.current = self.data_raw.deref().item(index).unwrap_or(TOKEN_EOF);
140            // Ok(())
141        } else {
142            panic!("cannot consume EOF");
143        }
144    }
145
146    #[inline]
147    fn la(&mut self, mut offset: isize) -> i32 {
148        if offset == 1 {
149            return self
150                .data_raw
151                .item(self.index)
152                .unwrap_or(crate::int_stream::EOF);
153        }
154        if offset == 0 {
155            panic!("should not be called with offset 0");
156        }
157        if offset < 0 {
158            offset += 1; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
159        }
160
161        self.data_raw
162            .offset(self.index, offset - 1)
163            .and_then(|index| self.data_raw.item(index))
164            .unwrap_or(crate::int_stream::EOF)
165    }
166
167    #[inline]
168    fn mark(&mut self) -> isize {
169        -1
170    }
171
172    #[inline]
173    fn release(&mut self, _marker: isize) {}
174
175    #[inline]
176    fn index(&self) -> isize {
177        self.index
178    }
179
180    #[inline]
181    fn seek(&mut self, index: isize) {
182        self.index = index
183    }
184
185    #[inline]
186    fn size(&self) -> isize {
187        self.data_raw.len() as isize
188    }
189
190    fn get_source_name(&self) -> String {
191        self.name.clone()
192    }
193}
194
195#[cfg(test)]
196mod test {
197    use crate::{char_stream::CharStream, int_stream::EOF};
198
199    use super::InputStream;
200
201    #[test]
202    fn test_str_input_stream() {
203        let mut input = InputStream::new("V1は3");
204        let input = &mut input as &mut dyn CharStream;
205        assert_eq!(input.la(1), 'V' as i32);
206        assert_eq!(input.index(), 0);
207        input.consume();
208        assert_eq!(input.la(1), '1' as i32);
209        assert_eq!(input.la(-1), 'V' as i32);
210        assert_eq!(input.index(), 1);
211        input.consume();
212        assert_eq!(input.la(1), 0x306F);
213        assert_eq!(input.index(), 2);
214        input.consume();
215        assert_eq!(input.index(), 5);
216        assert_eq!(input.la(-2), '1' as i32);
217        assert_eq!(input.la(2), EOF);
218        assert_eq!(input.get_text(1, 1), "1");
219        assert_eq!(input.get_text(1, 2), "1は");
220        assert_eq!(input.get_text(2, 2), "は");
221        assert_eq!(input.get_text(2, 5), "は3");
222        assert_eq!(input.get_text(5, 5), "3");
223    }
224
225    // #[test]
226    // fn test_byte_input_stream() {
227    //     let mut input = InputStream::new(&b"V\xaa\xbb"[..]);
228    //     assert_eq!(input.la(1), 'V' as i32);
229    //     input.seek(2);
230    //     assert_eq!(input.la(1), 0xBB);
231    //     assert_eq!(input.index(), 2);
232    //     let mut input = InputStream::new("は".as_bytes());
233    //     assert_eq!(input.la(1), 227);
234    // }
235}