mxmlextrema_as3parser/parser/
css_tokenizer.rs

1use crate::ns::*;
2use std::str::FromStr;
3
4pub struct CssTokenizer<'input> {
5    compilation_unit: Rc<CompilationUnit>,
6    characters: CharacterReader<'input>,
7}
8
9impl<'input> CssTokenizer<'input> {
10    /// Constructs a tokenizer.
11    pub fn new(compilation_unit: &'input Rc<CompilationUnit>, options: &ParserOptions) -> Self {
12        let text: &'input str = compilation_unit.text();
13        let compilation_unit = compilation_unit.clone();
14        let characters: CharacterReader<'input>;
15        if let Some(range) = options.byte_range {
16            characters = CharacterReader::from_offset(&text[0..range.1], range.0);
17        } else {
18            characters = CharacterReader::from(text);
19        }
20        Self {
21            compilation_unit,
22            characters,
23        }
24    }
25
26    pub fn compilation_unit(&self) -> &Rc<CompilationUnit> {
27        &self.compilation_unit
28    }
29
30    pub fn characters(&self) -> &CharacterReader<'input> {
31        &self.characters
32    }
33
34    pub fn characters_mut(&mut self) -> &mut CharacterReader<'input> {
35        &mut self.characters
36    }
37
38    fn character_ahead_location(&self) -> Location {
39        if self.characters.reached_end() {
40            return self.cursor_location();
41        }
42        let offset = self.characters.index();
43        let mut next_characters = self.characters.clone();
44        next_characters.next().unwrap();
45        Location::with_offsets(&self.compilation_unit, offset, next_characters.index())
46    }
47
48    pub fn cursor_location(&self) -> Location {
49        let offset = self.characters.index();
50        Location::with_offset(&self.compilation_unit, offset)
51    }
52
53    fn add_syntax_error(&self, location: &Location, kind: DiagnosticKind, arguments: Vec<Rc<dyn DiagnosticArgument>>) {
54        if self.compilation_unit().prevent_equal_offset_error(location) {
55            return;
56        }
57        self.compilation_unit().add_diagnostic(Diagnostic::new_syntax_error(location, kind, arguments));
58    }
59
60    fn add_unexpected_error(&self) {
61        if self.characters.has_remaining() {
62            self.add_syntax_error(&self.character_ahead_location(), DiagnosticKind::UnexpectedCharacter, diagarg![self.characters.peek_or_zero().to_string()])
63        } else {
64            self.add_syntax_error(&self.cursor_location(), DiagnosticKind::UnexpectedEnd, vec![])
65        }
66    }
67
68    fn add_unexpected_eof_error(&self, kind: DiagnosticKind) {
69        self.add_syntax_error(&self.cursor_location(), kind, vec![]);
70    }
71
72    pub fn scan(&mut self) -> (Token, Location) {
73        while self.consume_whitespace() || self.consume_comment() {
74            // Do nothing
75        }
76        let start = self.cursor_location();
77        let ch = self.characters.peek_or_zero();
78
79        if let Some(id) = self.consume_css_id() {
80            return (Token::Identifier(id), start.combine_with(self.cursor_location()));
81        }
82
83        // DECIMAL
84        // DECIMAL.PART
85        if CharacterValidator::is_dec_digit(ch) {
86            self.characters.next();
87            while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
88                self.characters.next();
89            }
90            if self.characters.peek_or_zero() == '.' {
91                self.characters.next();
92                if !CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
93                    self.add_unexpected_error();
94                }
95                while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
96                    self.characters.next();
97                }
98            }
99            return self.finish_number(start);
100        }
101
102        if ch == '#' {
103            self.characters.next();
104            let mut word = String::new();
105            loop {
106                let ch = self.characters.peek_or_zero();
107                if  (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
108                    (ch >= '0' && ch <= '9') || ch == '-' || ch == '_' {
109                    word.push(ch);
110                    self.characters.next();
111                } else {
112                    break;
113                }
114            }
115            if word.is_empty() {
116                self.add_unexpected_error();
117                word = INVALIDATED_IDENTIFIER.to_owned();
118            }
119            return (Token::CssHashWord(word), start.combine_with(self.cursor_location()));
120        }
121
122        if ch == '@' {
123            // @namespace
124            if self.characters.peek_seq(10) == "@namespace" {
125                self.characters.skip_count_in_place(10);
126                return (Token::CssAtNamespace, start.combine_with(self.cursor_location()));
127            }
128            // @font-face
129            if self.characters.peek_seq(10) == "@font-face" {
130                self.characters.skip_count_in_place(10);
131                return (Token::CssAtFontFace, start.combine_with(self.cursor_location()));
132            }
133            // @import
134            if self.characters.peek_seq(10) == "@import" {
135                self.characters.skip_count_in_place(10);
136                return (Token::CssAtImport, start.combine_with(self.cursor_location()));
137            }
138            // @media
139            if self.characters.peek_seq(6) == "@media" {
140                self.characters.skip_count_in_place(6);
141                return (Token::CssAtMedia, start.combine_with(self.cursor_location()));
142            }
143        }
144
145        if ch == '!' && self.characters.peek_seq(10) == "!important" {
146            self.characters.skip_count_in_place(10);
147            return (Token::CssImportant, start.combine_with(self.cursor_location()));
148        }
149
150        match ch {
151            // .
152            // .DECIMAL
153            '.' => {
154                self.characters.next();
155                if CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
156                    while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
157                        self.characters.next();
158                    }
159                    return self.finish_number(start);
160                }
161                (Token::Dot, start.combine_with(self.cursor_location()))
162            },
163            '"' | '\'' => {
164                self.scan_string(ch, start)
165            },
166            ';' => {
167                while self.characters.peek_or_zero() == ';' {
168                    self.characters.next();
169                }
170                (Token::CssSemicolons, start.combine_with(self.cursor_location()))
171            },
172            '^' => {
173                self.characters.next();
174                if self.characters.peek_or_zero() != '=' {
175                    self.add_unexpected_error();
176                    self.characters.next();
177                    self.scan()
178                } else {
179                    self.characters.next();
180                    (Token::CssBeginsWith, start.combine_with(self.cursor_location()))
181                }
182            },
183            '$' => {
184                self.characters.next();
185                if self.characters.peek_or_zero() != '=' {
186                    self.add_unexpected_error();
187                    self.characters.next();
188                    self.scan()
189                } else {
190                    self.characters.next();
191                    (Token::CssEndsWith, start.combine_with(self.cursor_location()))
192                }
193            },
194            '*' => {
195                self.characters.next();
196                if self.characters.peek_or_zero() == '=' {
197                    self.characters.next();
198                    (Token::CssContains, start.combine_with(self.cursor_location()))
199                } else {
200                    (Token::Times, start.combine_with(self.cursor_location()))
201                }
202            },
203            '~' => {
204                self.characters.next();
205                if self.characters.peek_or_zero() == '=' {
206                    self.characters.next();
207                    (Token::CssListMatch, start.combine_with(self.cursor_location()))
208                } else {
209                    (Token::Tilde, start.combine_with(self.cursor_location()))
210                }
211            },
212            '|' => {
213                self.characters.next();
214                if self.characters.peek_or_zero() == '=' {
215                    self.characters.next();
216                    (Token::CssHreflangMatch, start.combine_with(self.cursor_location()))
217                } else {
218                    (Token::Pipe, start.combine_with(self.cursor_location()))
219                }
220            },
221            '{' => {
222                self.characters.next();
223                (Token::BlockOpen, start.combine_with(self.cursor_location()))
224            },
225            '}' => {
226                self.characters.next();
227                (Token::BlockClose, start.combine_with(self.cursor_location()))
228            },
229            '[' => {
230                self.characters.next();
231                (Token::SquareOpen, start.combine_with(self.cursor_location()))
232            },
233            ']' => {
234                self.characters.next();
235                (Token::SquareClose, start.combine_with(self.cursor_location()))
236            },
237            '(' => {
238                self.characters.next();
239                (Token::ParenOpen, start.combine_with(self.cursor_location()))
240            },
241            ')' => {
242                self.characters.next();
243                (Token::ParenClose, start.combine_with(self.cursor_location()))
244            },
245            ',' => {
246                self.characters.next();
247                (Token::Comma, start.combine_with(self.cursor_location()))
248            },
249            '%' => {
250                self.characters.next();
251                (Token::Percent, start.combine_with(self.cursor_location()))
252            },
253            '=' => {
254                self.characters.next();
255                (Token::Assign, start.combine_with(self.cursor_location()))
256            },
257            ':' => {
258                self.characters.next();
259                if self.characters.peek_or_zero() == ':' {
260                    self.characters.next();
261                    (Token::ColonColon, start.combine_with(self.cursor_location()))
262                } else {
263                    (Token::Colon, start.combine_with(self.cursor_location()))
264                }
265            },
266            '>' => {
267                self.characters.next();
268                (Token::Gt, start.combine_with(self.cursor_location()))
269            },
270            '+' => {
271                self.characters.next();
272                (Token::Plus, start.combine_with(self.cursor_location()))
273            },
274            _ => {
275                if self.characters.reached_end() {
276                    return (Token::Eof, start);
277                }
278                self.add_unexpected_error();
279                self.characters.next();
280                self.scan()
281            },
282        }
283    }
284
285    pub fn consume_whitespace(&mut self) -> bool {
286        let ch = self.characters.peek_or_zero();
287        if [' ', '\t', '\n', '\r'].contains(&ch) {
288            self.characters.next();
289            true
290        } else {
291            false
292        }
293    }
294
295    fn consume_comment(&mut self) -> bool {
296        if self.characters.peek_or_zero() == '/' && self.characters.peek_at_or_zero(1) == '*' {
297            let start = self.cursor_location();
298            self.characters.skip_count_in_place(2);
299            loop {
300                if self.characters.peek_or_zero() == '*' && self.characters.peek_at_or_zero(1) == '/' {
301                    self.characters.skip_count_in_place(2);
302                    break;
303                } else if self.characters.has_remaining() {
304                    self.characters.skip_in_place();
305                } else {
306                    self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForMultiLineComment);
307                    break;
308                }
309            }
310
311            let location = start.combine_with(self.cursor_location());
312            let i = location.first_offset() + 2;
313            let j = decrease_last_offset(i, location.last_offset(), 2);
314
315            self.compilation_unit.add_comment(Rc::new(Comment {
316                multiline: true,
317                content: RefCell::new(self.compilation_unit.text()[i..j].to_owned()),
318                location: RefCell::new(location),
319            }));
320
321            true
322        } else {
323            false
324        }
325    }
326
327    fn consume_css_id(&mut self) -> Option<String> {
328        let i = self.characters.index();
329        let mut prefix_n = 0;
330        if self.characters.peek_or_zero() == '_' {
331            prefix_n += 1;
332            if self.characters.peek_at_or_zero(prefix_n) == '_' {
333                prefix_n += 1;
334                if self.characters.peek_at_or_zero(prefix_n) == '_' {
335                    prefix_n += 1;
336                }
337            }
338        } else if self.characters.peek_or_zero() == '-' {
339            prefix_n += 1;
340        }
341        if CharacterValidator::is_css_identifier_start(self.characters.peek_at_or_zero(prefix_n)) {
342            self.characters.skip_count_in_place(prefix_n + 1);
343            while CharacterValidator::is_css_identifier_part(self.characters.peek_or_zero()) {
344                self.characters.next();
345            }
346            return Some(self.compilation_unit.text()[i..self.characters.index()].to_owned());
347        }
348        None
349    }
350
351    fn finish_number(&mut self, start: Location) -> (Token, Location) {
352        let digits = &self.compilation_unit.text()[start.first_offset..self.characters.index()];
353        let mut mv = f64::from_str(digits).unwrap_or(f64::NAN);
354        let mut unit: Option<String> = None;
355        if self.characters.peek_or_zero() == '%' {
356            self.characters.next();
357            mv /= 100.0;
358        } else {
359            unit = self.consume_css_id();
360        }
361        (Token::CssNumber {
362            value: mv,
363            unit,
364        }, start.combine_with(self.cursor_location()))
365    }
366
367    fn scan_string(&mut self, delim: char, start: Location) -> (Token, Location) {
368        let mut builder = String::new();
369        self.characters.next();
370        loop {
371            let ch = self.characters.peek_or_zero();
372            if ch == delim {
373                self.characters.next();
374                break;
375            } else if ch == '\\' {
376                let mut loc = self.cursor_location();
377                self.characters.next();
378                let mut digits = String::new();
379                for _ in 0..6 {
380                    let ch = self.characters.peek_or_zero();
381                    if CharacterValidator::is_hex_digit(ch) {
382                        digits.push(ch);
383                        self.characters.next();
384                    } else {
385                        break;
386                    }
387                }
388                if digits.is_empty() {
389                    self.add_unexpected_error();
390                } else {
391                    loc = loc.combine_with(self.cursor_location());
392                    let mv = u32::from_str_radix(&digits, 16).ok().and_then(|mv| char::from_u32(mv));
393                    if let Some(mv) = mv {
394                        builder.push(mv);
395                    } else {
396                        self.add_syntax_error(&loc, DiagnosticKind::CssInvalidHexEscape, diagarg![digits]);
397                    }
398                }
399            } else if self.characters.reached_end() {
400                self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
401                break;
402            } else {
403                builder.push(ch);
404                self.characters.next();
405            }
406        }
407        let loc = start.combine_with(self.cursor_location());
408        (Token::String(builder), loc)
409    }
410
411    pub fn scan_arguments(&mut self) -> ((usize, usize), (Token, Location)) {
412        let i = self.characters.index();
413        let mut j: usize;
414        let mut nesting = 1;
415        let token: (Token, Location);
416        loop {
417            j = self.characters.index();
418            let ch = self.characters.peek_or_zero();
419            if ch == ')' {
420                self.characters.next();
421                nesting -= 1;
422                if nesting == 0 {
423                    token = (Token::ParenClose, Location::with_offsets(&self.compilation_unit, j, self.characters.index()));
424                    break;
425                }
426            } else if ch == '(' {
427                self.characters.next();
428                nesting += 1;
429            } else if self.characters.reached_end() {
430                self.add_syntax_error(&self.cursor_location(), DiagnosticKind::Expecting, diagarg![Token::ParenClose, Token::Eof]);
431                token = (Token::Eof, self.cursor_location());
432                break;
433            } else {
434                self.characters.next();
435            }
436        }
437        ((i, j), token)
438    }
439}