lib_ruby_parser/source/
buffer.rs1use std::convert::TryFrom;
2
3use crate::maybe_byte::*;
4use crate::source::input::Input;
5use crate::source::Decoder;
6use crate::source::InputError;
7
8#[derive(Debug, Default)]
9pub(crate) struct Buffer {
10 pub(crate) input: Input,
11
12 pub(crate) line_count: usize,
13 pub(crate) prevline: Option<usize>, pub(crate) lastline: usize, pub(crate) nextline: usize, pub(crate) pbeg: usize,
17 pub(crate) pcur: usize,
18 pub(crate) pend: usize,
19 pub(crate) ptok: usize,
20
21 pub(crate) eofp: bool,
22 pub(crate) cr_seen: bool,
23
24 pub(crate) heredoc_end: usize,
25 pub(crate) heredoc_indent: i32,
26 pub(crate) heredoc_line_indent: i32,
27
28 pub(crate) tokidx: usize,
29 pub(crate) tokline: usize,
31
32 pub(crate) has_shebang: bool,
33
34 pub(crate) ruby_sourceline: usize,
36 }
39
40macro_rules! println_if_debug_buffer {
41 ($fmt_string:expr, $( $arg:expr ),*) => {
42 if cfg!(feature = "debug-buffer") {
43 println!($fmt_string, $( $arg ),*);
44 }
45 };
46}
47
48impl Buffer {
49 const CTRL_Z_CHAR: u8 = 0x1a;
50 const CTRL_D_CHAR: u8 = 0x04;
51
52 pub(crate) fn new(name: String, bytes: Vec<u8>, decoder: Option<Decoder>) -> Self {
53 let mut input = Input::new(name, decoder);
54
55 input.update_bytes(bytes);
56
57 let mut this = Self {
58 input,
59 ..Self::default()
60 };
61
62 this.prepare();
63
64 this
65 }
66
67 fn prepare(&mut self) {
68 let c = self.nextc();
69 match c.as_option() {
70 Some(b'#') => {
71 if self.peek(b'!') {
72 self.has_shebang = true;
73 }
74 }
75 Some(0xef) => {
76 if self.pend - self.pcur >= 2
78 && self.byte_at(self.pcur) == 0xbb
79 && self.byte_at(self.pcur + 1) == 0xbf
80 {
81 self.pcur += 2;
82 self.pbeg = self.pcur;
83 return;
84 }
85 }
86 None => return,
87 _ => {}
88 }
89
90 self.pushback(c)
91 }
92
93 pub(crate) fn nextc(&mut self) -> MaybeByte {
94 if self.pcur == self.pend || self.eofp || self.nextline != 0 {
95 let n = self.nextline();
96 println_if_debug_buffer!("nextline = {:?}", n);
97 if n.is_err() {
98 return MaybeByte::EndOfInput;
99 }
100 }
101 let mut c = match self.input.byte_at(self.pcur) {
102 Some(c) => c,
103 None => return MaybeByte::EndOfInput,
104 };
105 self.pcur += 1;
106 if c == b'\r' {
107 c = self.parser_cr(c);
108 }
109 println_if_debug_buffer!("nextc = {:?}", c);
110 MaybeByte::new(c)
111 }
112
113 pub(crate) fn goto_eol(&mut self) {
114 self.pcur = self.pend;
115 }
116
117 pub(crate) fn is_eol(&self) -> bool {
118 self.pcur >= self.pend
119 }
120
121 pub(crate) fn is_eol_n(&self, n: usize) -> bool {
122 self.pcur + n >= self.pend
123 }
124
125 pub(crate) fn peek(&self, c: u8) -> bool {
126 self.peek_n(c, 0)
127 }
128 pub(crate) fn peek_n(&self, c: u8, n: usize) -> bool {
129 !self.is_eol_n(n) && c == self.input.unchecked_byte_at(self.pcur + n)
130 }
131 pub(crate) fn peekc(&self) -> MaybeByte {
132 self.peekc_n(0)
133 }
134 pub(crate) fn peekc_n(&self, n: usize) -> MaybeByte {
135 if self.is_eol_n(n) {
136 MaybeByte::EndOfInput
137 } else {
138 self.byte_at(self.pcur + n)
139 }
140 }
141
142 pub(crate) fn nextline(&mut self) -> Result<(), ()> {
143 let mut v = self.nextline;
144 self.nextline = 0;
145
146 if v == 0 {
147 if self.eofp {
148 return Err(());
149 }
150
151 if self.pend > self.pbeg && self.input.unchecked_byte_at(self.pend - 1) != b'\n' {
152 self.eofp = true;
153 self.goto_eol();
154 return Err(());
155 }
156
157 match self.getline() {
158 Ok(line) => v = line,
159 Err(_) => {
160 self.eofp = true;
161 self.goto_eol();
162 return Err(());
163 }
164 }
165
166 self.cr_seen = false;
167 }
168 let line = self.input.line_at(v);
171
172 if self.heredoc_end > 0 {
173 self.ruby_sourceline = self.heredoc_end;
174 self.heredoc_end = 0;
175 }
176 self.ruby_sourceline += 1;
177 self.pbeg = line.start;
178 self.pcur = line.start;
179 self.pend = line.end;
180 self.token_flush();
181 self.prevline = Some(self.lastline);
182 self.lastline = v;
183
184 Ok(())
185 }
186
187 pub(crate) fn getline(&mut self) -> Result<usize, ()> {
188 if self.line_count < self.input.lines_count() {
189 self.line_count += 1;
190 println_if_debug_buffer!("line_count = {}", self.line_count);
191 Ok(self.line_count - 1)
192 } else {
193 Err(())
194 }
195 }
196
197 pub(crate) fn token_flush(&mut self) {
198 self.set_ptok(self.pcur);
199 }
200
201 pub(crate) fn set_ptok(&mut self, ptok: usize) {
202 println_if_debug_buffer!("set_ptok({})", ptok);
203 self.ptok = ptok;
204 }
205
206 pub(crate) fn parser_cr(&mut self, mut c: u8) -> u8 {
207 if self.peek(b'\n') {
208 self.pcur += 1;
209 c = b'\n';
210 }
211 c
212 }
213
214 pub(crate) fn byte_at(&self, idx: usize) -> MaybeByte {
215 match self.input.byte_at(idx) {
216 Some(byte) => MaybeByte::Some(byte),
217 None => MaybeByte::EndOfInput,
218 }
219 }
220
221 pub(crate) fn substr_at(&self, start: usize, end: usize) -> Option<&[u8]> {
222 self.input.substr_at(start, end)
223 }
224
225 pub(crate) fn was_bol(&self) -> bool {
226 self.pcur == self.pbeg + 1
227 }
228
229 pub(crate) fn is_word_match(&self, word: &str) -> bool {
230 let len = word.len();
231
232 if self.substr_at(self.pcur, self.pcur + len) != Some(word.as_bytes()) {
233 return false;
234 }
235 if self.pcur + len == self.pend {
236 return true;
237 }
238 let c = self.byte_at(self.pcur + len);
239 if c.is_space() {
240 return true;
241 }
242 if c == b'\0' || c == Self::CTRL_Z_CHAR || c == Self::CTRL_D_CHAR {
243 return true;
244 }
245 false
246 }
247
248 pub(crate) fn is_looking_at_eol(&self) -> bool {
249 let mut ptr = self.pcur;
250 while ptr < self.pend {
251 let c = self.input.byte_at(ptr);
252 ptr += 1;
253 if let Some(c) = c {
254 let eol = c == b'\n' || c == b'#';
255 if eol || !c.is_ascii_whitespace() {
256 return eol;
257 }
258 };
259 }
260 true
261 }
262
263 pub(crate) fn is_whole_match(&self, eos: &[u8], indent: usize) -> bool {
264 let mut ptr = self.pbeg;
265 let len = eos.len();
266
267 if indent > 0 {
268 while let Some(c) = self.input.byte_at(ptr) {
269 if !c.is_ascii_whitespace() {
270 break;
271 }
272 ptr += 1;
273 }
274 }
275
276 if self.pend < ptr + len {
277 return false;
278 }
279
280 if let Ok(n) = isize::try_from(self.pend - (ptr + len)) {
281 if n < 0 {
282 return false;
283 }
284 let last_char = self.byte_at(ptr + len);
285 let char_after_last_char = self.byte_at(ptr + len + 1);
286
287 if n > 0 && last_char != b'\n' {
288 if last_char != b'\r' {
289 return false;
290 }
291 if n <= 1 || char_after_last_char != b'\n' {
292 return false;
293 }
294 }
295
296 let next_len_chars = self.substr_at(ptr, ptr + len);
297 Some(eos) == next_len_chars
298 } else {
299 false
300 }
301 }
302
303 pub(crate) fn eof_no_decrement(&mut self) {
304 if let Some(prevline) = self.prevline {
305 if !self.eofp {
306 self.lastline = prevline;
307 }
308 }
309 self.pbeg = self.input.line_at(self.lastline).start;
310 self.pend = self.pbeg + self.input.line_at(self.lastline).len();
311 self.pcur = self.pend;
312 self.pushback(1);
313 self.set_ptok(self.pcur);
314 }
315
316 pub(crate) fn is_identchar(&self, begin: usize, _end: usize) -> bool {
317 let byte = match self.input.byte_at(begin) {
318 Some(byte) => byte,
319 None => return false,
320 };
321
322 byte.is_ascii_alphanumeric() || byte == b'_' || !byte.is_ascii()
323 }
324
325 pub(crate) fn set_encoding(&mut self, encoding: &str) -> Result<(), InputError> {
326 self.input.set_encoding(encoding)
327 }
328}
329
330pub(crate) trait Pushback<T> {
331 fn pushback(&mut self, c: T);
332}
333
334impl Pushback<u8> for Buffer {
335 fn pushback(&mut self, c: u8) {
336 self.pcur -= 1;
337 if self.pcur > self.pbeg
338 && self.byte_at(self.pcur) == b'\n'
339 && self.byte_at(self.pcur - 1) == b'\r'
340 {
341 self.pcur -= 1;
342 }
343 println_if_debug_buffer!("pushback({:?}) pcur = {}", c, self.pcur);
344 }
345}
346
347impl Pushback<Option<u8>> for Buffer {
348 fn pushback(&mut self, c: Option<u8>) {
349 if let Some(c) = c {
350 self.pushback(c)
351 }
352 }
353}
354
355impl Pushback<MaybeByte> for Buffer {
356 fn pushback(&mut self, c: MaybeByte) {
357 self.pushback(c.as_option())
358 }
359}
360
361impl Pushback<&mut MaybeByte> for Buffer {
362 fn pushback(&mut self, c: &mut MaybeByte) {
363 self.pushback(c.as_option())
364 }
365}