lib_ruby_parser/lexer/
parse_heredoc.rs1use crate::lexer::TokAdd;
2use crate::maybe_byte::MaybeByte;
3use crate::source::buffer::*;
4use crate::str_term::{str_types::*, HeredocEnd, HeredocLiteral, StrTerm};
5use crate::Lexer;
6use crate::TokenBuf;
7use crate::{lex_states::*, DiagnosticMessage};
8
9const TAB_WIDTH: i32 = 8;
10
11impl Lexer {
12 pub(crate) fn heredoc_identifier(&mut self) -> Option<i32> {
13 let mut offset = self.buffer.pcur - self.buffer.pbeg;
18 let mut c = self.nextc();
19 let term;
20 let mut func = 0;
21 let mut quote = 0;
22 let mut token = Self::tSTRING_BEG;
23 let mut indent = 0;
24
25 if c == b'-' {
26 c = self.nextc();
27 func = STR_FUNC_INDENT;
28 offset += 1;
29 } else if c == b'~' {
30 c = self.nextc();
31 func = STR_FUNC_INDENT;
32 offset += 1;
33 indent = i32::MAX;
34 }
35
36 if c == b'\'' || c == b'"' || c == b'`' {
37 if c == b'\'' {
38 func |= str_squote
39 }
40 if c == b'"' {
41 func |= str_dquote
42 }
43 if c == b'`' {
44 func |= str_xquote;
45 token = Self::tXSTRING_BEG
46 }
47
48 quote += 1;
49 offset += 1;
50 term = c;
51
52 loop {
53 c = self.nextc();
54 if c == term {
55 break;
56 }
57
58 if c.is_eof() || c == b'\r' || c == b'\n' {
59 self.yyerror0(DiagnosticMessage::UnterminatedHeredocId {});
60 return Some(Self::END_OF_INPUT);
61 }
62 }
63 } else {
64 if !self.is_identchar() {
65 self.buffer.pushback(c);
66 if (func & STR_FUNC_INDENT) != 0 {
67 self.buffer.pushback(if indent > 0 { b'~' } else { b'-' });
68 }
69 return None;
70 }
71 func |= str_dquote;
72 loop {
73 let n = self.multibyte_char_len(self.buffer.pcur - 1);
74 match n {
75 Some(n) => self.buffer.pcur += n - 1,
76 None => return Some(Self::END_OF_INPUT),
77 }
78 c = self.nextc();
79 if c.is_eof() || !self.is_identchar() {
80 break;
81 }
82 }
83 self.buffer.pushback(c);
84 }
85
86 let len = self.buffer.pcur - (self.buffer.pbeg + offset) - quote;
87
88 let id = self
89 .buffer
90 .substr_at(self.buffer.ptok, self.buffer.pcur)
91 .expect("failed to get heredoc id");
92 let id = TokenBuf::new(id);
93 self.set_yylval_str(&id);
94 self.lval_start = Some(self.buffer.ptok);
95 self.lval_end = Some(self.buffer.pcur);
96
97 self.buffer.goto_eol();
98
99 self.strterm = Some(Box::new(StrTerm::new_heredoc(HeredocLiteral::new(
100 self.buffer.lastline,
101 offset,
102 self.buffer.ruby_sourceline,
103 len,
104 quote,
105 func,
106 ))));
107
108 self.token_flush();
109 self.buffer.heredoc_indent = indent;
110 self.buffer.heredoc_line_indent = 0;
111 Some(token)
112 }
113
114 pub(crate) fn here_document(&mut self) -> i32 {
115 let here = match self.strterm.as_ref().unwrap().as_ref() {
116 StrTerm::StringLiteral(_) => unreachable!("strterm must be heredoc"),
117 StrTerm::HeredocLiteral(h) => h.clone(),
118 };
119 self.lval_start = Some(self.buffer.pcur);
120
121 let mut ptr;
122 let mut ptr_end;
123 let mut str_ = TokenBuf::new(b"");
124
125 let heredoc_end: HeredocEnd;
126
127 let eos = self.buffer.input.line_at(here.lastline).start + here.offset;
128 let len = here.length;
129 let func = here.func;
130 let indent = here.func & STR_FUNC_INDENT;
131
132 let mut c = self.nextc();
133 if c.is_eof() {
134 return self.here_document_error(&here, eos, len);
135 }
136 let bol = self.buffer.was_bol();
137 if !bol {
138 } else if self.buffer.heredoc_line_indent == -1 {
140 self.buffer.heredoc_line_indent = 0;
145 } else if self.buffer.is_whole_match(
146 self.buffer
147 .substr_at(eos, eos + len)
148 .expect("failed to get heredoc id for comparison"),
149 indent,
150 ) {
151 return self.here_document_restore(&here);
152 }
153
154 if (func & STR_FUNC_EXPAND) == 0 {
155 loop {
156 ptr = self.buffer.input.line_at(self.buffer.lastline).start;
157 ptr_end = self.buffer.pend;
158 if ptr_end > ptr {
159 match self.buffer.input.unchecked_byte_at(ptr_end - 1) {
160 b'\n' => {
161 ptr_end -= 1;
162 if ptr_end == ptr
163 || self.buffer.input.unchecked_byte_at(ptr_end - 1) != b'\r'
164 {
165 ptr_end += 1;
166 }
167 }
168 b'\r' => {
169 ptr_end -= 1;
170 }
171 _ => {}
172 }
173 }
174
175 if self.buffer.heredoc_indent > 0 {
176 let mut i = 0;
177 while (ptr + i < ptr_end) && self.update_heredoc_indent(self.char_at(ptr + i)) {
178 i += 1;
179 }
180 self.buffer.heredoc_line_indent = 0;
181 }
182
183 match self.buffer.substr_at(ptr, ptr_end) {
184 Some(s) => str_.append(s),
185 _ => panic!(
186 "no substr {}..{} (len = {})",
187 ptr,
188 ptr_end,
189 self.buffer.input.len()
190 ),
191 };
192 if ptr_end < self.buffer.pend {
193 str_.push(b'\n')
194 }
195 self.buffer.goto_eol();
196 if self.buffer.heredoc_indent > 0 {
197 return self.heredoc_flush_str(&str_);
198 }
199 if self.nextc().is_eof() {
200 str_.clear();
201 return self.here_document_error(&here, eos, len);
202 }
203
204 if self.buffer.is_whole_match(
205 self.buffer
206 .substr_at(eos, eos + len)
207 .expect("failed to get heredoc id for comparison"),
208 indent,
209 ) {
210 self.lval_end = Some(self.buffer.pend - 1);
211 heredoc_end = self.compute_heredoc_end();
212 break;
213 }
214 }
215 } else {
216 self.newtok();
217 if c == b'#' {
218 let t = self.peek_variable_name();
219 if self.buffer.heredoc_line_indent != -1 {
220 if self.buffer.heredoc_indent > self.buffer.heredoc_line_indent {
221 self.buffer.heredoc_indent = self.buffer.heredoc_line_indent;
222 }
223 self.buffer.heredoc_line_indent = -1;
224 }
225 if let Some(t) = t {
226 return t;
227 }
228 self.tokadd(b'#');
229 c = self.nextc();
230 }
231 loop {
232 self.buffer.pushback(c);
233 match self.tokadd_string(func, b'\n', None, &mut 0) {
235 Some(cc) => c = cc,
236 None => {
237 if self.buffer.eofp {
238 return self.here_document_error(&here, eos, len);
239 }
240 return self.here_document_restore(&here);
241 }
242 }
243 self.lval_end = Some(self.buffer.pcur + 1);
244 if c != b'\n' {
245 if c == b'\\' {
246 self.buffer.heredoc_line_indent = -1
247 }
248 return self.heredoc_flush();
249 }
250 let cc = self.nextc();
251 self.tokadd(cc);
252 if self.buffer.heredoc_indent > 0 {
253 self.buffer.goto_eol();
254 return self.heredoc_flush();
255 }
256 c = self.nextc();
257 if c.is_eof() {
258 return self.here_document_error(&here, eos, len);
259 }
260
261 if self.buffer.is_whole_match(
262 self.buffer
263 .substr_at(eos, eos + len)
264 .expect("failed to get heredoc id for comparison"),
265 indent,
266 ) {
267 heredoc_end = self.compute_heredoc_end();
268
269 break;
270 }
271 }
272 str_ = self.tokenbuf.clone();
273 }
274
275 self.heredoc_restore(&here);
276 self.token_flush();
277 self.strterm = self.new_strterm(func | STR_FUNC_TERM, 0, Some(0), Some(heredoc_end));
278 self.set_yylval_str(&str_);
279 Self::tSTRING_CONTENT
280 }
281
282 fn compute_heredoc_end(&self) -> HeredocEnd {
283 let start = self.buffer.pbeg;
284 let mut end_starts_at = start;
285 while self.buffer.byte_at(end_starts_at) == b' ' {
286 end_starts_at += 1;
287 }
288 let mut end = end_starts_at;
289 loop {
290 let c = self.buffer.byte_at(end);
291 if c.is_eof() || c == b'\n' {
292 break;
293 }
294 end += 1;
295 }
296 let value = self
297 .buffer
298 .substr_at(end_starts_at, end)
299 .expect("failed to get heredoc end");
300
301 HeredocEnd {
302 start,
303 end,
304 value: value.to_vec(),
305 }
306 }
307
308 fn here_document_error(&mut self, here: &HeredocLiteral, eos: usize, len: usize) -> i32 {
309 self.heredoc_restore(here);
310 self.compile_error(
311 DiagnosticMessage::UnterminatedHeredoc {
312 heredoc_id: String::from_utf8_lossy(
313 self.buffer
314 .substr_at(eos, eos + len)
315 .expect("failed to get heredoc id for comparison"),
316 )
317 .into_owned(),
318 },
319 self.current_loc(),
320 );
321 self.token_flush();
322 self.strterm = None;
323 self.lex_state.set(EXPR_END);
324 Self::tSTRING_END
325 }
326
327 fn here_document_restore(&mut self, here: &HeredocLiteral) -> i32 {
328 let heredoc_end = self.compute_heredoc_end();
329 self.lval_start = Some(heredoc_end.start);
330 self.lval_end = Some(heredoc_end.end);
331 self.set_yylval_str(&TokenBuf::new(&heredoc_end.value));
332
333 self.heredoc_restore(here);
334 self.token_flush();
335 self.strterm = None;
336 self.lex_state.set(EXPR_END);
337
338 Self::tSTRING_END
339 }
340
341 fn heredoc_flush_str(&mut self, str_: &TokenBuf) -> i32 {
342 self.set_yylval_str(str_);
343 self.flush_string_content();
344 Self::tSTRING_CONTENT
345 }
346
347 fn heredoc_flush(&mut self) -> i32 {
348 let tokenbuf = self.tokenbuf.take();
349 self.heredoc_flush_str(&tokenbuf)
350 }
351
352 fn heredoc_restore(&mut self, here: &HeredocLiteral) {
353 self.strterm = None;
354 let line = here.lastline;
355 self.buffer.lastline = line;
356 self.buffer.pbeg = self.buffer.input.line_at(line).start;
357 self.buffer.pend = self.buffer.pbeg + self.buffer.input.line_at(line).len();
358 self.buffer.pcur = self.buffer.pbeg + here.offset + here.length + here.quote;
359 self.buffer.ptok = self.buffer.pbeg + here.offset - here.quote;
360 self.buffer.heredoc_end = self.buffer.ruby_sourceline;
361 self.buffer.ruby_sourceline = here.sourceline;
362 if self.buffer.eofp {
363 self.buffer.nextline = 0
364 }
365 self.buffer.eofp = false;
366 }
367
368 pub(crate) fn update_heredoc_indent(&mut self, c: MaybeByte) -> bool {
369 if self.buffer.heredoc_line_indent == -1 {
370 if c == b'\n' {
371 self.buffer.heredoc_line_indent = 0
372 }
373 } else if c == b' ' {
374 self.buffer.heredoc_line_indent += 1;
375 return true;
376 } else if c == b'\t' {
377 let w = (self.buffer.heredoc_line_indent / TAB_WIDTH) + 1;
378 self.buffer.heredoc_line_indent = w * TAB_WIDTH;
379 return true;
380 } else if c != b'\n' {
381 if self.buffer.heredoc_indent > self.buffer.heredoc_line_indent {
382 self.buffer.heredoc_indent = self.buffer.heredoc_line_indent
383 }
384 self.buffer.heredoc_line_indent = -1;
385 }
386 true
387 }
388}