dbt_antlr4/input_stream.rs
1//! Input to lexer
2use std::borrow::Cow;
3
4use crate::char_stream::{CharStream, InputData};
5use crate::int_stream::IntStream;
6
7/// Default rust target input stream.
8///
9/// Since Rust uses UTF-8 format which does not support indexing by char,
10/// `InputStream<&str>` has slightly different index behavior in compare to java runtime when there are
11/// non-ASCII unicode characters.
12/// If you need it to generate exactly the same indexes as Java runtime, you have to use `CodePoint8/16/32BitCharStream`,
13/// which does not use rusts native `str` type, so it would do additional conversions and allocations along the way.
14#[derive(Debug)]
15pub struct InputStream<'input> {
16 name: String,
17 data_raw: &'input str,
18 index: isize,
19}
20
21impl<'input> CharStream<'input> for InputStream<'input> {
22 #[inline]
23 fn get_text(&self, start: isize, stop: isize) -> Cow<'input, str> {
24 self.get_text_inner(start, stop).into()
25 }
26}
27
28// /// `InputStream` over byte slice
29// pub type ByteStream<'a> = InputStream<&'a [u8]>;
30// /// InputStream which treats the input as a series of Unicode code points that fit into `u8`
31// pub type CodePoint8BitCharStream<'a> = InputStream<&'a [u8]>;
32// /// InputStream which treats the input as a series of Unicode code points that fit into `u16`
33// pub type CodePoint16BitCharStream<'a> = InputStream<&'a [u16]>;
34// /// InputStream which treats the input as a series of Unicode code points
35// pub type CodePoint32BitCharStream<'a> = InputStream<&'a [u32]>;
36
37// impl<'a, T> CharStream<Cow<'a, [T]>> for InputStream<&'a [T]>
38// where
39// [T]: InputData,
40// {
41// #[inline]
42// fn get_text(&self, a: isize, b: isize) -> Cow<'a, [T]> {
43// Cow::Borrowed(self.get_text_inner(a, b))
44// }
45// }
46
47// impl<T> CharStream<String> for InputStream<&[T]>
48// where
49// [T]: InputData,
50// {
51// fn get_text(&self, a: isize, b: isize) -> String {
52// self.get_text_inner(a, b).to_display()
53// }
54// }
55
56// impl<'b, T> CharStream<Cow<'b, str>> for InputStream<&[T]>
57// where
58// [T]: InputData,
59// {
60// #[inline]
61// fn get_text(&self, a: isize, b: isize) -> Cow<'b, str> {
62// self.get_text_inner(a, b).to_display().into()
63// }
64// }
65
66// impl<'a, T> CharStream<&'a [T]> for InputStream<&'a [T]>
67// where
68// [T]: InputData,
69// {
70// #[inline]
71// fn get_text(&self, a: isize, b: isize) -> &'a [T] {
72// self.get_text_inner(a, b)
73// }
74// }
75
76// impl<Data: ?Sized + InputData> InputStream<Box<Data>> {
77// fn get_text_owned(&self, start: isize, stop: isize) -> Data::Owned {
78// let start = start as usize;
79// let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize;
80
81// if stop < self.data_raw.len() {
82// &self.data_raw[start..stop]
83// } else {
84// &self.data_raw[start..]
85// }
86// .to_owned()
87// }
88
89// /// Creates new `InputStream` over owned data
90// pub fn new_owned(data: Box<Data>) -> Self {
91// Self {
92// name: "<empty>".to_string(),
93// data_raw: data,
94// index: 0,
95// }
96// }
97// }
98
99impl<'input> InputStream<'input> {
100 fn get_text_inner(&self, start: isize, stop: isize) -> &'input str {
101 // println!("get text {}..{} of {:?}",start,stop,self.data_raw.to_display());
102 let start = start as usize;
103 let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize;
104 // println!("justed range {}..{} ",start,stop);
105 // let start = self.data_raw.offset(0,start).unwrap() as usize;
106 // let stop = self.data_raw.offset(0,stop + 1).unwrap() as usize;
107
108 if stop < self.data_raw.len() {
109 &self.data_raw[start..stop]
110 } else {
111 &self.data_raw[start..]
112 }
113 }
114
115 /// Creates new `InputStream` over borrowed data
116 pub fn new(data_raw: &'input str) -> Self {
117 // let data_raw = data_raw.as_ref();
118 // let data = data_raw.to_indexed_vec();
119 Self {
120 name: "<empty>".to_string(),
121 data_raw,
122 index: 0,
123 // phantom: Default::default(),
124 }
125 }
126
127 /// Resets input stream to start from the beginning of this slice
128 #[inline]
129 pub fn reset(&mut self) {
130 self.index = 0
131 }
132}
133
134impl IntStream for InputStream<'_> {
135 #[inline]
136 fn consume(&mut self) {
137 if let Some(index) = self.data_raw.offset(self.index, 1) {
138 self.index = index;
139 // self.current = self.data_raw.deref().item(index).unwrap_or(TOKEN_EOF);
140 // Ok(())
141 } else {
142 panic!("cannot consume EOF");
143 }
144 }
145
146 #[inline]
147 fn la(&mut self, mut offset: isize) -> i32 {
148 if offset == 1 {
149 return self
150 .data_raw
151 .item(self.index)
152 .unwrap_or(crate::int_stream::EOF);
153 }
154 if offset == 0 {
155 panic!("should not be called with offset 0");
156 }
157 if offset < 0 {
158 offset += 1; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
159 }
160
161 self.data_raw
162 .offset(self.index, offset - 1)
163 .and_then(|index| self.data_raw.item(index))
164 .unwrap_or(crate::int_stream::EOF)
165 }
166
167 #[inline]
168 fn mark(&mut self) -> isize {
169 -1
170 }
171
172 #[inline]
173 fn release(&mut self, _marker: isize) {}
174
175 #[inline]
176 fn index(&self) -> isize {
177 self.index
178 }
179
180 #[inline]
181 fn seek(&mut self, index: isize) {
182 self.index = index
183 }
184
185 #[inline]
186 fn size(&self) -> isize {
187 self.data_raw.len() as isize
188 }
189
190 fn get_source_name(&self) -> String {
191 self.name.clone()
192 }
193}
194
195#[cfg(test)]
196mod test {
197 use crate::{char_stream::CharStream, int_stream::EOF};
198
199 use super::InputStream;
200
201 #[test]
202 fn test_str_input_stream() {
203 let mut input = InputStream::new("V1は3");
204 let input = &mut input as &mut dyn CharStream;
205 assert_eq!(input.la(1), 'V' as i32);
206 assert_eq!(input.index(), 0);
207 input.consume();
208 assert_eq!(input.la(1), '1' as i32);
209 assert_eq!(input.la(-1), 'V' as i32);
210 assert_eq!(input.index(), 1);
211 input.consume();
212 assert_eq!(input.la(1), 0x306F);
213 assert_eq!(input.index(), 2);
214 input.consume();
215 assert_eq!(input.index(), 5);
216 assert_eq!(input.la(-2), '1' as i32);
217 assert_eq!(input.la(2), EOF);
218 assert_eq!(input.get_text(1, 1), "1");
219 assert_eq!(input.get_text(1, 2), "1は");
220 assert_eq!(input.get_text(2, 2), "は");
221 assert_eq!(input.get_text(2, 5), "は3");
222 assert_eq!(input.get_text(5, 5), "3");
223 }
224
225 // #[test]
226 // fn test_byte_input_stream() {
227 // let mut input = InputStream::new(&b"V\xaa\xbb"[..]);
228 // assert_eq!(input.la(1), 'V' as i32);
229 // input.seek(2);
230 // assert_eq!(input.la(1), 0xBB);
231 // assert_eq!(input.index(), 2);
232 // let mut input = InputStream::new("は".as_bytes());
233 // assert_eq!(input.la(1), 227);
234 // }
235}