lib_ruby_parser/source/
buffer.rs

1use std::convert::TryFrom;
2
3use crate::maybe_byte::*;
4use crate::source::input::Input;
5use crate::source::Decoder;
6use crate::source::InputError;
7
8#[derive(Debug, Default)]
9pub(crate) struct Buffer {
10    pub(crate) input: Input,
11
12    pub(crate) line_count: usize,
13    pub(crate) prevline: Option<usize>, // index
14    pub(crate) lastline: usize,         // index
15    pub(crate) nextline: usize,         // index
16    pub(crate) pbeg: usize,
17    pub(crate) pcur: usize,
18    pub(crate) pend: usize,
19    pub(crate) ptok: usize,
20
21    pub(crate) eofp: bool,
22    pub(crate) cr_seen: bool,
23
24    pub(crate) heredoc_end: usize,
25    pub(crate) heredoc_indent: i32,
26    pub(crate) heredoc_line_indent: i32,
27
28    pub(crate) tokidx: usize,
29    // pub(crate) toksize: usize,
30    pub(crate) tokline: usize,
31
32    pub(crate) has_shebang: bool,
33
34    /* current line no. */
35    pub(crate) ruby_sourceline: usize,
36    // pub(crate) ruby_sourcefile: Vec<char>, /* current source file */
37    // pub(crate) ruby_sourcefile_string: Vec<char>,
38}
39
40macro_rules! println_if_debug_buffer {
41    ($fmt_string:expr, $( $arg:expr ),*) => {
42        if cfg!(feature = "debug-buffer") {
43            println!($fmt_string, $( $arg ),*);
44        }
45    };
46}
47
48impl Buffer {
49    const CTRL_Z_CHAR: u8 = 0x1a;
50    const CTRL_D_CHAR: u8 = 0x04;
51
52    pub(crate) fn new(name: String, bytes: Vec<u8>, decoder: Option<Decoder>) -> Self {
53        let mut input = Input::new(name, decoder);
54
55        input.update_bytes(bytes);
56
57        let mut this = Self {
58            input,
59            ..Self::default()
60        };
61
62        this.prepare();
63
64        this
65    }
66
67    fn prepare(&mut self) {
68        let c = self.nextc();
69        match c.as_option() {
70            Some(b'#') => {
71                if self.peek(b'!') {
72                    self.has_shebang = true;
73                }
74            }
75            Some(0xef) => {
76                // handle UTF-8 BOM marker
77                if self.pend - self.pcur >= 2
78                    && self.byte_at(self.pcur) == 0xbb
79                    && self.byte_at(self.pcur + 1) == 0xbf
80                {
81                    self.pcur += 2;
82                    self.pbeg = self.pcur;
83                    return;
84                }
85            }
86            None => return,
87            _ => {}
88        }
89
90        self.pushback(c)
91    }
92
93    pub(crate) fn nextc(&mut self) -> MaybeByte {
94        if self.pcur == self.pend || self.eofp || self.nextline != 0 {
95            let n = self.nextline();
96            println_if_debug_buffer!("nextline = {:?}", n);
97            if n.is_err() {
98                return MaybeByte::EndOfInput;
99            }
100        }
101        let mut c = match self.input.byte_at(self.pcur) {
102            Some(c) => c,
103            None => return MaybeByte::EndOfInput,
104        };
105        self.pcur += 1;
106        if c == b'\r' {
107            c = self.parser_cr(c);
108        }
109        println_if_debug_buffer!("nextc = {:?}", c);
110        MaybeByte::new(c)
111    }
112
113    pub(crate) fn goto_eol(&mut self) {
114        self.pcur = self.pend;
115    }
116
117    pub(crate) fn is_eol(&self) -> bool {
118        self.pcur >= self.pend
119    }
120
121    pub(crate) fn is_eol_n(&self, n: usize) -> bool {
122        self.pcur + n >= self.pend
123    }
124
125    pub(crate) fn peek(&self, c: u8) -> bool {
126        self.peek_n(c, 0)
127    }
128    pub(crate) fn peek_n(&self, c: u8, n: usize) -> bool {
129        !self.is_eol_n(n) && c == self.input.unchecked_byte_at(self.pcur + n)
130    }
131    pub(crate) fn peekc(&self) -> MaybeByte {
132        self.peekc_n(0)
133    }
134    pub(crate) fn peekc_n(&self, n: usize) -> MaybeByte {
135        if self.is_eol_n(n) {
136            MaybeByte::EndOfInput
137        } else {
138            self.byte_at(self.pcur + n)
139        }
140    }
141
142    pub(crate) fn nextline(&mut self) -> Result<(), ()> {
143        let mut v = self.nextline;
144        self.nextline = 0;
145
146        if v == 0 {
147            if self.eofp {
148                return Err(());
149            }
150
151            if self.pend > self.pbeg && self.input.unchecked_byte_at(self.pend - 1) != b'\n' {
152                self.eofp = true;
153                self.goto_eol();
154                return Err(());
155            }
156
157            match self.getline() {
158                Ok(line) => v = line,
159                Err(_) => {
160                    self.eofp = true;
161                    self.goto_eol();
162                    return Err(());
163                }
164            }
165
166            self.cr_seen = false;
167        }
168        // TODO: after here-document without terminator
169
170        let line = self.input.line_at(v);
171
172        if self.heredoc_end > 0 {
173            self.ruby_sourceline = self.heredoc_end;
174            self.heredoc_end = 0;
175        }
176        self.ruby_sourceline += 1;
177        self.pbeg = line.start;
178        self.pcur = line.start;
179        self.pend = line.end;
180        self.token_flush();
181        self.prevline = Some(self.lastline);
182        self.lastline = v;
183
184        Ok(())
185    }
186
187    pub(crate) fn getline(&mut self) -> Result<usize, ()> {
188        if self.line_count < self.input.lines_count() {
189            self.line_count += 1;
190            println_if_debug_buffer!("line_count = {}", self.line_count);
191            Ok(self.line_count - 1)
192        } else {
193            Err(())
194        }
195    }
196
197    pub(crate) fn token_flush(&mut self) {
198        self.set_ptok(self.pcur);
199    }
200
201    pub(crate) fn set_ptok(&mut self, ptok: usize) {
202        println_if_debug_buffer!("set_ptok({})", ptok);
203        self.ptok = ptok;
204    }
205
206    pub(crate) fn parser_cr(&mut self, mut c: u8) -> u8 {
207        if self.peek(b'\n') {
208            self.pcur += 1;
209            c = b'\n';
210        }
211        c
212    }
213
214    pub(crate) fn byte_at(&self, idx: usize) -> MaybeByte {
215        match self.input.byte_at(idx) {
216            Some(byte) => MaybeByte::Some(byte),
217            None => MaybeByte::EndOfInput,
218        }
219    }
220
221    pub(crate) fn substr_at(&self, start: usize, end: usize) -> Option<&[u8]> {
222        self.input.substr_at(start, end)
223    }
224
225    pub(crate) fn was_bol(&self) -> bool {
226        self.pcur == self.pbeg + 1
227    }
228
229    pub(crate) fn is_word_match(&self, word: &str) -> bool {
230        let len = word.len();
231
232        if self.substr_at(self.pcur, self.pcur + len) != Some(word.as_bytes()) {
233            return false;
234        }
235        if self.pcur + len == self.pend {
236            return true;
237        }
238        let c = self.byte_at(self.pcur + len);
239        if c.is_space() {
240            return true;
241        }
242        if c == b'\0' || c == Self::CTRL_Z_CHAR || c == Self::CTRL_D_CHAR {
243            return true;
244        }
245        false
246    }
247
248    pub(crate) fn is_looking_at_eol(&self) -> bool {
249        let mut ptr = self.pcur;
250        while ptr < self.pend {
251            let c = self.input.byte_at(ptr);
252            ptr += 1;
253            if let Some(c) = c {
254                let eol = c == b'\n' || c == b'#';
255                if eol || !c.is_ascii_whitespace() {
256                    return eol;
257                }
258            };
259        }
260        true
261    }
262
263    pub(crate) fn is_whole_match(&self, eos: &[u8], indent: usize) -> bool {
264        let mut ptr = self.pbeg;
265        let len = eos.len();
266
267        if indent > 0 {
268            while let Some(c) = self.input.byte_at(ptr) {
269                if !c.is_ascii_whitespace() {
270                    break;
271                }
272                ptr += 1;
273            }
274        }
275
276        if self.pend < ptr + len {
277            return false;
278        }
279
280        if let Ok(n) = isize::try_from(self.pend - (ptr + len)) {
281            if n < 0 {
282                return false;
283            }
284            let last_char = self.byte_at(ptr + len);
285            let char_after_last_char = self.byte_at(ptr + len + 1);
286
287            if n > 0 && last_char != b'\n' {
288                if last_char != b'\r' {
289                    return false;
290                }
291                if n <= 1 || char_after_last_char != b'\n' {
292                    return false;
293                }
294            }
295
296            let next_len_chars = self.substr_at(ptr, ptr + len);
297            Some(eos) == next_len_chars
298        } else {
299            false
300        }
301    }
302
303    pub(crate) fn eof_no_decrement(&mut self) {
304        if let Some(prevline) = self.prevline {
305            if !self.eofp {
306                self.lastline = prevline;
307            }
308        }
309        self.pbeg = self.input.line_at(self.lastline).start;
310        self.pend = self.pbeg + self.input.line_at(self.lastline).len();
311        self.pcur = self.pend;
312        self.pushback(1);
313        self.set_ptok(self.pcur);
314    }
315
316    pub(crate) fn is_identchar(&self, begin: usize, _end: usize) -> bool {
317        let byte = match self.input.byte_at(begin) {
318            Some(byte) => byte,
319            None => return false,
320        };
321
322        byte.is_ascii_alphanumeric() || byte == b'_' || !byte.is_ascii()
323    }
324
325    pub(crate) fn set_encoding(&mut self, encoding: &str) -> Result<(), InputError> {
326        self.input.set_encoding(encoding)
327    }
328}
329
330pub(crate) trait Pushback<T> {
331    fn pushback(&mut self, c: T);
332}
333
334impl Pushback<u8> for Buffer {
335    fn pushback(&mut self, c: u8) {
336        self.pcur -= 1;
337        if self.pcur > self.pbeg
338            && self.byte_at(self.pcur) == b'\n'
339            && self.byte_at(self.pcur - 1) == b'\r'
340        {
341            self.pcur -= 1;
342        }
343        println_if_debug_buffer!("pushback({:?}) pcur = {}", c, self.pcur);
344    }
345}
346
347impl Pushback<Option<u8>> for Buffer {
348    fn pushback(&mut self, c: Option<u8>) {
349        if let Some(c) = c {
350            self.pushback(c)
351        }
352    }
353}
354
355impl Pushback<MaybeByte> for Buffer {
356    fn pushback(&mut self, c: MaybeByte) {
357        self.pushback(c.as_option())
358    }
359}
360
361impl Pushback<&mut MaybeByte> for Buffer {
362    fn pushback(&mut self, c: &mut MaybeByte) {
363        self.pushback(c.as_option())
364    }
365}