lib_ruby_parser/lexer/
parse_heredoc.rs

1use crate::lexer::TokAdd;
2use crate::maybe_byte::MaybeByte;
3use crate::source::buffer::*;
4use crate::str_term::{str_types::*, HeredocEnd, HeredocLiteral, StrTerm};
5use crate::Lexer;
6use crate::TokenBuf;
7use crate::{lex_states::*, DiagnosticMessage};
8
9const TAB_WIDTH: i32 = 8;
10
11impl Lexer {
12    pub(crate) fn heredoc_identifier(&mut self) -> Option<i32> {
13        /*
14         * term_len is length of `<<"END"` except `END`,
15         * in this case term_len is 4 (<, <, " and ").
16         */
17        let mut offset = self.buffer.pcur - self.buffer.pbeg;
18        let mut c = self.nextc();
19        let term;
20        let mut func = 0;
21        let mut quote = 0;
22        let mut token = Self::tSTRING_BEG;
23        let mut indent = 0;
24
25        if c == b'-' {
26            c = self.nextc();
27            func = STR_FUNC_INDENT;
28            offset += 1;
29        } else if c == b'~' {
30            c = self.nextc();
31            func = STR_FUNC_INDENT;
32            offset += 1;
33            indent = i32::MAX;
34        }
35
36        if c == b'\'' || c == b'"' || c == b'`' {
37            if c == b'\'' {
38                func |= str_squote
39            }
40            if c == b'"' {
41                func |= str_dquote
42            }
43            if c == b'`' {
44                func |= str_xquote;
45                token = Self::tXSTRING_BEG
46            }
47
48            quote += 1;
49            offset += 1;
50            term = c;
51
52            loop {
53                c = self.nextc();
54                if c == term {
55                    break;
56                }
57
58                if c.is_eof() || c == b'\r' || c == b'\n' {
59                    self.yyerror0(DiagnosticMessage::UnterminatedHeredocId {});
60                    return Some(Self::END_OF_INPUT);
61                }
62            }
63        } else {
64            if !self.is_identchar() {
65                self.buffer.pushback(c);
66                if (func & STR_FUNC_INDENT) != 0 {
67                    self.buffer.pushback(if indent > 0 { b'~' } else { b'-' });
68                }
69                return None;
70            }
71            func |= str_dquote;
72            loop {
73                let n = self.multibyte_char_len(self.buffer.pcur - 1);
74                match n {
75                    Some(n) => self.buffer.pcur += n - 1,
76                    None => return Some(Self::END_OF_INPUT),
77                }
78                c = self.nextc();
79                if c.is_eof() || !self.is_identchar() {
80                    break;
81                }
82            }
83            self.buffer.pushback(c);
84        }
85
86        let len = self.buffer.pcur - (self.buffer.pbeg + offset) - quote;
87
88        let id = self
89            .buffer
90            .substr_at(self.buffer.ptok, self.buffer.pcur)
91            .expect("failed to get heredoc id");
92        let id = TokenBuf::new(id);
93        self.set_yylval_str(&id);
94        self.lval_start = Some(self.buffer.ptok);
95        self.lval_end = Some(self.buffer.pcur);
96
97        self.buffer.goto_eol();
98
99        self.strterm = Some(Box::new(StrTerm::new_heredoc(HeredocLiteral::new(
100            self.buffer.lastline,
101            offset,
102            self.buffer.ruby_sourceline,
103            len,
104            quote,
105            func,
106        ))));
107
108        self.token_flush();
109        self.buffer.heredoc_indent = indent;
110        self.buffer.heredoc_line_indent = 0;
111        Some(token)
112    }
113
114    pub(crate) fn here_document(&mut self) -> i32 {
115        let here = match self.strterm.as_ref().unwrap().as_ref() {
116            StrTerm::StringLiteral(_) => unreachable!("strterm must be heredoc"),
117            StrTerm::HeredocLiteral(h) => h.clone(),
118        };
119        self.lval_start = Some(self.buffer.pcur);
120
121        let mut ptr;
122        let mut ptr_end;
123        let mut str_ = TokenBuf::new(b"");
124
125        let heredoc_end: HeredocEnd;
126
127        let eos = self.buffer.input.line_at(here.lastline).start + here.offset;
128        let len = here.length;
129        let func = here.func;
130        let indent = here.func & STR_FUNC_INDENT;
131
132        let mut c = self.nextc();
133        if c.is_eof() {
134            return self.here_document_error(&here, eos, len);
135        }
136        let bol = self.buffer.was_bol();
137        if !bol {
138            /* not beginning of line, cannot be the terminator */
139        } else if self.buffer.heredoc_line_indent == -1 {
140            /* `heredoc_line_indent == -1` means
141             * - "after an interpolation in the same line", or
142             * - "in a continuing line"
143             */
144            self.buffer.heredoc_line_indent = 0;
145        } else if self.buffer.is_whole_match(
146            self.buffer
147                .substr_at(eos, eos + len)
148                .expect("failed to get heredoc id for comparison"),
149            indent,
150        ) {
151            return self.here_document_restore(&here);
152        }
153
154        if (func & STR_FUNC_EXPAND) == 0 {
155            loop {
156                ptr = self.buffer.input.line_at(self.buffer.lastline).start;
157                ptr_end = self.buffer.pend;
158                if ptr_end > ptr {
159                    match self.buffer.input.unchecked_byte_at(ptr_end - 1) {
160                        b'\n' => {
161                            ptr_end -= 1;
162                            if ptr_end == ptr
163                                || self.buffer.input.unchecked_byte_at(ptr_end - 1) != b'\r'
164                            {
165                                ptr_end += 1;
166                            }
167                        }
168                        b'\r' => {
169                            ptr_end -= 1;
170                        }
171                        _ => {}
172                    }
173                }
174
175                if self.buffer.heredoc_indent > 0 {
176                    let mut i = 0;
177                    while (ptr + i < ptr_end) && self.update_heredoc_indent(self.char_at(ptr + i)) {
178                        i += 1;
179                    }
180                    self.buffer.heredoc_line_indent = 0;
181                }
182
183                match self.buffer.substr_at(ptr, ptr_end) {
184                    Some(s) => str_.append(s),
185                    _ => panic!(
186                        "no substr {}..{} (len = {})",
187                        ptr,
188                        ptr_end,
189                        self.buffer.input.len()
190                    ),
191                };
192                if ptr_end < self.buffer.pend {
193                    str_.push(b'\n')
194                }
195                self.buffer.goto_eol();
196                if self.buffer.heredoc_indent > 0 {
197                    return self.heredoc_flush_str(&str_);
198                }
199                if self.nextc().is_eof() {
200                    str_.clear();
201                    return self.here_document_error(&here, eos, len);
202                }
203
204                if self.buffer.is_whole_match(
205                    self.buffer
206                        .substr_at(eos, eos + len)
207                        .expect("failed to get heredoc id for comparison"),
208                    indent,
209                ) {
210                    self.lval_end = Some(self.buffer.pend - 1);
211                    heredoc_end = self.compute_heredoc_end();
212                    break;
213                }
214            }
215        } else {
216            self.newtok();
217            if c == b'#' {
218                let t = self.peek_variable_name();
219                if self.buffer.heredoc_line_indent != -1 {
220                    if self.buffer.heredoc_indent > self.buffer.heredoc_line_indent {
221                        self.buffer.heredoc_indent = self.buffer.heredoc_line_indent;
222                    }
223                    self.buffer.heredoc_line_indent = -1;
224                }
225                if let Some(t) = t {
226                    return t;
227                }
228                self.tokadd(b'#');
229                c = self.nextc();
230            }
231            loop {
232                self.buffer.pushback(c);
233                // enc = self.p.enc;
234                match self.tokadd_string(func, b'\n', None, &mut 0) {
235                    Some(cc) => c = cc,
236                    None => {
237                        if self.buffer.eofp {
238                            return self.here_document_error(&here, eos, len);
239                        }
240                        return self.here_document_restore(&here);
241                    }
242                }
243                self.lval_end = Some(self.buffer.pcur + 1);
244                if c != b'\n' {
245                    if c == b'\\' {
246                        self.buffer.heredoc_line_indent = -1
247                    }
248                    return self.heredoc_flush();
249                }
250                let cc = self.nextc();
251                self.tokadd(cc);
252                if self.buffer.heredoc_indent > 0 {
253                    self.buffer.goto_eol();
254                    return self.heredoc_flush();
255                }
256                c = self.nextc();
257                if c.is_eof() {
258                    return self.here_document_error(&here, eos, len);
259                }
260
261                if self.buffer.is_whole_match(
262                    self.buffer
263                        .substr_at(eos, eos + len)
264                        .expect("failed to get heredoc id for comparison"),
265                    indent,
266                ) {
267                    heredoc_end = self.compute_heredoc_end();
268
269                    break;
270                }
271            }
272            str_ = self.tokenbuf.clone();
273        }
274
275        self.heredoc_restore(&here);
276        self.token_flush();
277        self.strterm = self.new_strterm(func | STR_FUNC_TERM, 0, Some(0), Some(heredoc_end));
278        self.set_yylval_str(&str_);
279        Self::tSTRING_CONTENT
280    }
281
282    fn compute_heredoc_end(&self) -> HeredocEnd {
283        let start = self.buffer.pbeg;
284        let mut end_starts_at = start;
285        while self.buffer.byte_at(end_starts_at) == b' ' {
286            end_starts_at += 1;
287        }
288        let mut end = end_starts_at;
289        loop {
290            let c = self.buffer.byte_at(end);
291            if c.is_eof() || c == b'\n' {
292                break;
293            }
294            end += 1;
295        }
296        let value = self
297            .buffer
298            .substr_at(end_starts_at, end)
299            .expect("failed to get heredoc end");
300
301        HeredocEnd {
302            start,
303            end,
304            value: value.to_vec(),
305        }
306    }
307
308    fn here_document_error(&mut self, here: &HeredocLiteral, eos: usize, len: usize) -> i32 {
309        self.heredoc_restore(here);
310        self.compile_error(
311            DiagnosticMessage::UnterminatedHeredoc {
312                heredoc_id: String::from_utf8_lossy(
313                    self.buffer
314                        .substr_at(eos, eos + len)
315                        .expect("failed to get heredoc id for comparison"),
316                )
317                .into_owned(),
318            },
319            self.current_loc(),
320        );
321        self.token_flush();
322        self.strterm = None;
323        self.lex_state.set(EXPR_END);
324        Self::tSTRING_END
325    }
326
327    fn here_document_restore(&mut self, here: &HeredocLiteral) -> i32 {
328        let heredoc_end = self.compute_heredoc_end();
329        self.lval_start = Some(heredoc_end.start);
330        self.lval_end = Some(heredoc_end.end);
331        self.set_yylval_str(&TokenBuf::new(&heredoc_end.value));
332
333        self.heredoc_restore(here);
334        self.token_flush();
335        self.strterm = None;
336        self.lex_state.set(EXPR_END);
337
338        Self::tSTRING_END
339    }
340
341    fn heredoc_flush_str(&mut self, str_: &TokenBuf) -> i32 {
342        self.set_yylval_str(str_);
343        self.flush_string_content();
344        Self::tSTRING_CONTENT
345    }
346
347    fn heredoc_flush(&mut self) -> i32 {
348        let tokenbuf = self.tokenbuf.take();
349        self.heredoc_flush_str(&tokenbuf)
350    }
351
352    fn heredoc_restore(&mut self, here: &HeredocLiteral) {
353        self.strterm = None;
354        let line = here.lastline;
355        self.buffer.lastline = line;
356        self.buffer.pbeg = self.buffer.input.line_at(line).start;
357        self.buffer.pend = self.buffer.pbeg + self.buffer.input.line_at(line).len();
358        self.buffer.pcur = self.buffer.pbeg + here.offset + here.length + here.quote;
359        self.buffer.ptok = self.buffer.pbeg + here.offset - here.quote;
360        self.buffer.heredoc_end = self.buffer.ruby_sourceline;
361        self.buffer.ruby_sourceline = here.sourceline;
362        if self.buffer.eofp {
363            self.buffer.nextline = 0
364        }
365        self.buffer.eofp = false;
366    }
367
368    pub(crate) fn update_heredoc_indent(&mut self, c: MaybeByte) -> bool {
369        if self.buffer.heredoc_line_indent == -1 {
370            if c == b'\n' {
371                self.buffer.heredoc_line_indent = 0
372            }
373        } else if c == b' ' {
374            self.buffer.heredoc_line_indent += 1;
375            return true;
376        } else if c == b'\t' {
377            let w = (self.buffer.heredoc_line_indent / TAB_WIDTH) + 1;
378            self.buffer.heredoc_line_indent = w * TAB_WIDTH;
379            return true;
380        } else if c != b'\n' {
381            if self.buffer.heredoc_indent > self.buffer.heredoc_line_indent {
382                self.buffer.heredoc_indent = self.buffer.heredoc_line_indent
383            }
384            self.buffer.heredoc_line_indent = -1;
385        }
386        true
387    }
388}