lib_ruby_parser/lexer/
parse_magic_comment.rs

1use std::convert::TryInto;
2
3use crate::source::{MagicComment, MagicCommentKind};
4use crate::DiagnosticMessage;
5use crate::Lexer;
6
7type MagicCommentData = (&'static str, MagicCommentKind);
8
9const MAGIC_COMMENTS: &[MagicCommentData] = &[
10    ("coding", MagicCommentKind::Encoding),
11    ("encoding", MagicCommentKind::Encoding),
12    (
13        "frozen_string_literal",
14        MagicCommentKind::FrozenStringLiteral,
15    ),
16    (
17        "shareable_constant_value",
18        MagicCommentKind::ShareableConstantValue,
19    ),
20    ("warn_indent", MagicCommentKind::WarnIndent),
21];
22
23impl Lexer {
24    pub(crate) fn comment_at_top(&self) -> bool {
25        let mut ptr = self.buffer.pbeg;
26        let ptr_end = self.buffer.pcur - 1;
27        if self.buffer.line_count != (if self.buffer.has_shebang { 2 } else { 1 }) {
28            return false;
29        }
30        while ptr < ptr_end {
31            if !self.char_at(ptr).is_space() {
32                return false;
33            }
34            ptr += 1;
35        }
36        true
37    }
38
39    pub(crate) fn set_file_encoding(&mut self, mut str_: usize, send: usize) {
40        let mut sep = false;
41
42        loop {
43            if send - str_ <= 6 {
44                return;
45            }
46            match self.char_at(str_ + 6).as_option() {
47                Some(b'C') | Some(b'c') => {
48                    str_ += 6;
49                    continue;
50                }
51                Some(b'O') | Some(b'o') => {
52                    str_ += 5;
53                    continue;
54                }
55                Some(b'D') | Some(b'd') => {
56                    str_ += 4;
57                    continue;
58                }
59                Some(b'I') | Some(b'i') => {
60                    str_ += 3;
61                    continue;
62                }
63                Some(b'N') | Some(b'n') => {
64                    str_ += 2;
65                    continue;
66                }
67                Some(b'G') | Some(b'g') => {
68                    str_ += 1;
69                    continue;
70                }
71                Some(b'=') | Some(b':') => {
72                    sep = true;
73                    str_ += 6;
74                }
75                _ => {
76                    str_ += 6;
77                    if self.char_at(str_).is_space() {
78                        // nothing
79                    } else {
80                        continue;
81                    }
82                }
83            }
84            if self.buffer.substr_at(str_ - 6, str_) == Some(b"coding") {
85                break;
86            }
87            sep = false;
88        }
89        loop {
90            loop {
91                str_ += 1;
92                if str_ >= send {
93                    return;
94                }
95                if !(self.char_at(str_).is_space()) {
96                    break;
97                }
98            }
99            if sep {
100                break;
101            }
102            let c = self.char_at(str_);
103            if c != b'=' && c != b':' {
104                return;
105            }
106            sep = true;
107            str_ += 1;
108        }
109        let beg = str_;
110
111        while self.char_at(str_) == b'-'
112            || self.char_at(str_) == b'_'
113            || self.char_at(str_).is_alnum() && str_ + 1 < send
114        {
115            str_ += 1;
116        }
117
118        let _enc_name = self
119            .buffer
120            .substr_at(beg, str_)
121            .expect("failed to get encoding comment value");
122    }
123
124    fn magic_comment_marker(&self, str_: usize, len: usize) -> usize {
125        let mut i = 2;
126
127        while i < len {
128            match self.char_at(str_ + i).as_option() {
129                Some(b'-') => {
130                    if self.char_at(str_ + i - 1) == b'*' && self.char_at(str_ + i - 2) == b'-' {
131                        return str_ + i + 1;
132                    }
133                    i += 2
134                }
135                Some(b'*') => {
136                    if i + 1 >= len {
137                        return 0;
138                    }
139                    if self.char_at(str_ + i + 1) != b'-' {
140                        i += 4;
141                    } else if self.char_at(str_ + i - 1) != b'-' {
142                        i += 2;
143                    } else {
144                        return str_ + i + 2;
145                    }
146                }
147                _ => i += 3,
148            }
149        }
150        0
151    }
152
153    pub(crate) fn magic_comment(&mut self, mut str_: usize, mut len: usize) -> Result<bool, ()> {
154        let mut indicator = false;
155        let mut name;
156        let mut end;
157        let mut vbeg;
158        let mut vend;
159
160        if len <= 7 {
161            return Ok(false);
162        }
163        let mut beg = self.magic_comment_marker(str_, len);
164        if beg != 0 {
165            end = self.magic_comment_marker(beg, str_ + len - beg);
166            if end == 0 {
167                return Ok(false);
168            }
169            indicator = true;
170            str_ = beg;
171            len = end - beg - 3;
172        }
173
174        let mut len: i32 = len.try_into().unwrap();
175
176        while len > 0 {
177            loop {
178                let c = self.char_at(str_);
179                if !(len > 0 && c.is_some()) {
180                    break;
181                }
182
183                if c == b'\'' || c == b'"' || c == b':' || c == b';' {
184                    // noop
185                } else {
186                    if !c.is_space() {
187                        break;
188                    }
189                    str_ += 1;
190                    len -= 1;
191                    continue;
192                }
193
194                str_ += 1;
195                len -= 1;
196            }
197
198            beg = str_;
199            loop {
200                if len <= 0 {
201                    break;
202                }
203
204                let c = self.char_at(str_);
205                if c == b'\'' || c == b'"' || c == b':' || c == b';' {
206                    // noop
207                } else {
208                    if c.is_space() {
209                        // break from C switch;
210                    } else {
211                        str_ += 1;
212                        len -= 1;
213                        continue;
214                    }
215                }
216
217                break;
218            }
219
220            end = str_;
221            loop {
222                let c = self.char_at(str_);
223                if !(len > 0 && c.is_space()) {
224                    break;
225                }
226
227                // empty for loop body
228
229                str_ += 1;
230                len -= 1;
231            }
232
233            if len == 0 {
234                break;
235            }
236            if self.char_at(str_) != b':' {
237                if !indicator {
238                    return Ok(false);
239                }
240                continue;
241            }
242
243            loop {
244                str_ += 1;
245                len -= 1;
246
247                if !(len > 0 && self.char_at(str_).is_space()) {
248                    break;
249                }
250            }
251            if len == 0 {
252                break;
253            }
254            if self.char_at(str_) == b'"' {
255                str_ += 1;
256                vbeg = str_;
257
258                loop {
259                    let c = self.char_at(str_);
260                    len -= 1;
261                    if !(len > 0 && c != b'"') {
262                        break;
263                    }
264
265                    if c == b'\\' {
266                        len -= 1;
267                        str_ += 1;
268                    }
269
270                    str_ += 1;
271                }
272
273                vend = str_;
274                if len != 0 {
275                    len -= 1;
276                    str_ += 1;
277                }
278            } else {
279                vbeg = str_;
280                loop {
281                    let c = self.char_at(str_);
282                    if !(len > 0 && c != b'"' && c != b';' && !c.is_space()) {
283                        break;
284                    }
285
286                    // empty for loop body
287
288                    len -= 1;
289                    str_ += 1;
290                }
291                vend = str_;
292            }
293            if indicator {
294                while len > 0 && (self.char_at(str_) == b';' || self.char_at(str_).is_space()) {
295                    len -= 1;
296                    str_ += 1;
297                }
298            } else {
299                while len > 0 && self.char_at(str_).is_space() {
300                    len -= 1;
301                    str_ += 1;
302                }
303                if len != 0 {
304                    return Ok(false);
305                }
306            }
307
308            let n = end - beg;
309            name = String::from_utf8(
310                self.buffer
311                    .substr_at(beg, beg + n)
312                    .expect("failed to get magic comment name")
313                    .to_vec(),
314            )
315            .map_err(|_| ())?;
316
317            let name_to_compare = name.replace('-', "_");
318            for (name, kind) in MAGIC_COMMENTS.iter() {
319                if &name_to_compare == name {
320                    if kind == &MagicCommentKind::Encoding && self.comment_at_top() {
321                        let encoding = match String::from_utf8(
322                            self.buffer
323                                .substr_at(vbeg, vend)
324                                .expect("bug: Can't be None")
325                                .to_vec(),
326                        ) {
327                            Ok(encoding) => encoding,
328                            Err(err) => {
329                                self.yyerror1(
330                                    DiagnosticMessage::EncodingError {
331                                        error: format!(
332                                            "unknown encoding name: {}",
333                                            String::from_utf8_lossy(err.as_bytes())
334                                        ),
335                                    },
336                                    self.loc(vbeg, vend),
337                                );
338
339                                return Err(());
340                            }
341                        };
342                        match self.buffer.set_encoding(&encoding) {
343                            Ok(_) => {}
344                            Err(err) => {
345                                self.yyerror1(
346                                    DiagnosticMessage::EncodingError {
347                                        error: err.to_string(),
348                                    },
349                                    self.loc(vbeg, vend),
350                                );
351                                return Err(());
352                            }
353                        }
354                    }
355
356                    let key_l = self.loc(beg, beg + n);
357                    let value_l = self.loc(vbeg, vend);
358
359                    let magic_comment = MagicComment {
360                        kind: kind.clone(),
361                        key_l,
362                        value_l,
363                    };
364                    self.magic_comments.push(magic_comment);
365                }
366            }
367        }
368
369        Ok(true)
370    }
371}