azul_simplecss/
tokenizer.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5use stream;
6use stream::Stream;
7use error::Error;
8
9/// CSS combinator.
10#[derive(PartialEq,Debug)]
11pub enum Combinator {
12    /// Descendant selector
13    Space,
14    /// Child selector
15    GreaterThan,
16    /// Adjacent sibling selector
17    Plus,
18    /// General sibling selector
19    Tilde,
20}
21
22/// CSS token.
23#[derive(PartialEq,Debug)]
24pub enum Token<'a> {
25    /// Universal selector
26    ///
27    /// https://www.w3.org/TR/CSS21/selector.html#universal-selector
28    UniversalSelector,
29    /// Type selector
30    ///
31    /// https://www.w3.org/TR/CSS21/selector.html#type-selectors
32    TypeSelector(&'a str),
33    /// ID selector
34    ///
35    /// Value contains ident without `#`.
36    ///
37    /// https://www.w3.org/TR/CSS21/selector.html#id-selectors
38    IdSelector(&'a str),
39    /// Class selector
40    ///
41    /// Value contains ident without `.`.
42    ///
43    /// https://www.w3.org/TR/CSS21/selector.html#class-html
44    ClassSelector(&'a str),
45    /// Attribute selector
46    ///
47    /// We do not parse it's content yet, so value contains everything between `[]`.
48    ///
49    /// https://www.w3.org/TR/CSS21/selector.html#attribute-selectors
50    AttributeSelector(&'a str),
51    /// Pseudo-class
52    ///
53    /// Value contains ident without `:`.
54    /// Selector: `"nth-child"`, value: The thing between the braces - `Some("3")`
55    ///
56    /// https://www.w3.org/TR/CSS21/selector.html#pseudo-class-selectors
57    PseudoClass { selector: &'a str, value: Option<&'a str> },
58    /// `Combinator`
59    Combinator(Combinator),
60    /// Rules separator
61    ///
62    /// https://www.w3.org/TR/CSS21/selector.html#grouping
63    Comma,
64    /// Block start
65    ///
66    /// Indicates `{`.
67    ///
68    /// https://www.w3.org/TR/CSS21/syndata.html#rule-sets
69    BlockStart,
70    /// Block end
71    ///
72    /// Indicates `}`.
73    ///
74    /// https://www.w3.org/TR/CSS21/syndata.html#rule-sets
75    BlockEnd,
76    /// Declaration
77    ///
78    /// Contains property name and property value.
79    ///
80    /// https://www.w3.org/TR/CSS21/syndata.html#declaration
81    Declaration(&'a str, &'a str),
82    /// `@` rule (excluding the `@` sign itself). The content is not parsed,
83    /// for example `@keyframes mymove` = `AtRule("keyframes"), AtStr("mymove")`.
84    AtRule(&'a str),
85    /// Raw Str inside of block
86    DeclarationStr(&'a str),
87    /// String following an @rule
88    AtStr(&'a str),
89    /// Same as PseudoClass, but with two colons (`::thing`).
90    DoublePseudoClass { selector: &'a str, value: Option<&'a str> },
91    /// End of stream
92    ///
93    /// Parsing is finished.
94    EndOfStream,
95}
96
97#[derive(PartialEq)]
98enum State {
99    Rule,
100    Declaration,
101    DeclarationRule,
102}
103
104/// CSS tokenizer.
105pub struct Tokenizer<'a> {
106    stream: Stream<'a>,
107    state: State,
108    after_selector: bool,
109    has_at_rule: bool,
110    at_start: bool,
111}
112
113impl<'a> Tokenizer<'a> {
114    /// Constructs a new `Tokenizer`.
115    pub fn new(text: &str) -> Tokenizer {
116        Tokenizer {
117            stream: Stream::new(text.as_bytes()),
118            state: State::Rule,
119            after_selector: false,
120            has_at_rule: false,
121            at_start: true,
122        }
123    }
124
125    /// Constructs a new bounded `Tokenizer`.
126    ///
127    /// It can be useful if CSS data is inside other data, like HTML.
128    /// Using this method you will get an absolute error positions and not relative,
129    /// like when using [`new()`].
130    ///
131    /// [`new()`]: #method.new
132    pub fn new_bound(text: &str, start: usize, end: usize) -> Tokenizer {
133        Tokenizer {
134            stream: Stream::new_bound(text.as_bytes(), start, end),
135            state: State::Rule,
136            after_selector: false,
137            has_at_rule: false,
138            at_start: true,
139        }
140    }
141
142    /// Returns a current position in the text.
143    pub fn pos(&self) -> usize {
144        self.stream.pos()
145    }
146
147    /// Parses a next token.
148    pub fn parse_next(&mut self) -> Result<Token<'a>, Error> {
149        if self.at_start {
150            self.stream.skip_spaces();
151            self.at_start = false;
152        }
153
154        if self.stream.at_end() {
155            return Ok(Token::EndOfStream);
156        }
157
158        match self.state {
159            State::Rule         => self.consume_rule(),
160            State::Declaration  => self.consume_declaration(),
161            State::DeclarationRule => self.consume_declaration(),
162        }
163    }
164
165    fn consume_rule(&mut self) -> Result<Token<'a>, Error> {
166        match self.stream.curr_char_raw() {
167            b'@' => {
168                self.after_selector = true;
169                self.has_at_rule = true;
170                self.stream.advance_raw(1);
171                let s = self.consume_ident()?;
172                return Ok(Token::AtRule(s));
173            }
174            b'#' => {
175                self.after_selector = true;
176                self.has_at_rule = false;
177                self.stream.advance_raw(1);
178                let s = try!(self.consume_ident());
179                return Ok(Token::IdSelector(s));
180            }
181            b'.' => {
182                self.after_selector = true;
183                self.has_at_rule = false;
184                self.stream.advance_raw(1);
185                let s = try!(self.consume_ident());
186                return Ok(Token::ClassSelector(s));
187            }
188            b'*' => {
189                self.after_selector = true;
190                self.has_at_rule = false;
191                self.stream.advance_raw(1);
192                self.stream.skip_spaces();
193                return Ok(Token::UniversalSelector);
194            }
195            b':' => {
196                self.after_selector = true;
197                self.has_at_rule = false;
198                self.stream.advance_raw(1);
199
200                // Whether this selector is a ::selector.
201                let is_double_colon = self.stream.is_char_eq(b':')?;
202                if is_double_colon {
203                    self.stream.advance_raw(1); // consume the second :
204                }
205
206                let s = try!(self.consume_ident());
207
208                if self.stream.curr_char() == Ok(b'(') {
209                    // Item is a thing()
210                    self.stream.advance_raw(1); // (
211                    let inner_len = self.stream.length_to(b')')?;
212                    let inner = self.stream.read_raw_str(inner_len);
213                    self.stream.advance_raw(1); // )
214                    return Ok(if is_double_colon {
215                        Token::DoublePseudoClass { selector: s, value: Some(inner) }
216                    } else {
217                        Token::PseudoClass { selector: s, value: Some(inner) }
218                    });
219                } else {
220                    return Ok(if is_double_colon {
221                        Token::DoublePseudoClass { selector: s, value: None }
222                    } else {
223                        Token::PseudoClass { selector: s, value: None }
224                    });
225                }
226            }
227            b'[' => {
228                self.after_selector = true;
229                self.has_at_rule = false;
230                self.stream.advance_raw(1);
231                let len = try!(self.stream.length_to(b']'));
232                let s = self.stream.read_raw_str(len);
233                self.stream.advance_raw(1); // ]
234                self.stream.skip_spaces();
235                return Ok(Token::AttributeSelector(s));
236            }
237            b',' => {
238                self.after_selector = false;
239                self.has_at_rule = false;
240                self.stream.advance_raw(1);
241                self.stream.skip_spaces();
242                return Ok(Token::Comma);
243            }
244            b'{' => {
245                self.after_selector = false;
246                self.has_at_rule = false;
247                self.state = State::Declaration;
248                self.stream.advance_raw(1);
249                return Ok(Token::BlockStart);
250            }
251            b'>' => {
252                if self.after_selector {
253                    self.after_selector = false;
254                    self.has_at_rule = false;
255                    self.stream.advance_raw(1);
256                    self.stream.skip_spaces();
257                    return Ok(Token::Combinator(Combinator::GreaterThan));
258                } else {
259                    return Err(Error::UnknownToken(self.stream.gen_error_pos()));
260                }
261            }
262            b'+' => {
263                if self.after_selector {
264                    self.after_selector = false;
265                    self.has_at_rule = false;
266                    self.stream.advance_raw(1);
267                    self.stream.skip_spaces();
268                    return Ok(Token::Combinator(Combinator::Plus));
269                } else {
270                    return Err(Error::UnknownToken(self.stream.gen_error_pos()));
271                }
272            }
273            b'~' => {
274                if self.after_selector {
275                    self.after_selector = false;
276                    self.has_at_rule = false;
277                    self.stream.advance_raw(1);
278                    self.stream.skip_spaces();
279                    return Ok(Token::Combinator(Combinator::Tilde));
280                } else {
281                    return Err(Error::UnknownToken(self.stream.gen_error_pos()));
282                }
283            }
284            b'/' => {
285                if try!(self.consume_comment()) {
286                    return self.parse_next();
287                } else {
288                    return Err(Error::UnknownToken(self.stream.gen_error_pos()));
289                }
290            }
291            _ => {
292                if self.stream.is_space_raw() {
293                    self.stream.skip_spaces();
294
295                    if !self.after_selector {
296                        return self.parse_next();
297                    }
298
299                    match self.stream.curr_char()? {
300                        b'{' | b'/' | b'>' | b'+' | b'~' | b'*' => { return self.parse_next(); },
301                        _ => {
302                            self.after_selector = false;
303                            if !self.has_at_rule {
304                                return Ok(Token::Combinator(Combinator::Space));
305                            }
306                        }
307                    }
308                }
309
310                let s = try!(self.consume_ident());
311                let token_type = if self.has_at_rule {
312                    self.has_at_rule = true;
313                    Token::AtStr(s)
314                } else {
315                    self.has_at_rule = false;
316                    Token::TypeSelector(s)
317                };
318
319                self.after_selector = true;
320                return Ok(token_type);
321            }
322        }
323    }
324
325    fn consume_declaration(&mut self) -> Result<Token<'a>, Error> {
326        self.stream.skip_spaces();
327        self.has_at_rule = false;
328
329        match self.stream.curr_char_raw() {
330            b'}' => {
331                if self.state == State::DeclarationRule {
332                    self.state = State::Declaration;
333                } else if self.state == State::Declaration {
334                    self.state = State::Rule;
335                }
336                self.stream.advance_raw(1);
337                self.stream.skip_spaces();
338                return Ok(Token::BlockEnd);
339            },
340            b'{' => {
341                if self.state == State::Rule {
342                    self.state = State::Declaration;
343                } else if self.state == State::Declaration {
344                    self.state = State::DeclarationRule;
345                }
346                self.stream.advance_raw(1);
347                self.stream.skip_spaces();
348                return Ok(Token::BlockStart);
349            },
350            b'/' => {
351                if try!(self.consume_comment()) {
352                    return self.parse_next();
353                } else {
354                    return Err(Error::UnknownToken(self.stream.gen_error_pos()));
355                }
356            }
357            _ => {
358                let name = self.consume_ident()?;
359
360                self.stream.skip_spaces();
361
362                if self.stream.is_char_eq(b'/')? {
363                    if !try!(self.consume_comment()) {
364                        return Err(Error::UnknownToken(self.stream.gen_error_pos()));
365                    }
366                }
367
368                if self.stream.is_char_eq(b'{')? {
369                    if name.is_empty() {
370                        return Err(Error::UnknownToken(self.stream.gen_error_pos()));
371                    } else {
372                        return Ok(Token::DeclarationStr(name));
373                    }
374                }
375
376                self.stream.advance_raw(1); // :
377                self.stream.skip_spaces();
378
379                if self.stream.is_char_eq(b'/')? {
380                    if !try!(self.consume_comment()) {
381                        return Err(Error::UnknownToken(self.stream.gen_error_pos()));
382                    }
383                }
384
385                let len = self.stream.length_to_either(&[b';', b'}'])?;
386
387                if len == 0 {
388                    return Err(Error::UnknownToken(self.stream.gen_error_pos()));
389                }
390
391                let mut value = self.stream.read_raw_str(len);
392                // trim spaces at the end of the value
393                if let Some(p) = value.as_bytes().iter().rposition(|c| !stream::is_space(*c)) {
394                    value = &value[0..(p + 1)];
395                }
396
397                self.stream.skip_spaces();
398                while try!(self.stream.is_char_eq(b';')) {
399                    self.stream.advance_raw(1);
400                    self.stream.skip_spaces();
401                }
402
403                Ok(Token::Declaration(name, value))
404            }
405        }
406    }
407
408    fn consume_ident(&mut self) -> Result<&'a str, Error> {
409        let start = self.stream.pos();
410
411        while !self.stream.at_end() {
412            if self.stream.is_ident_raw() {
413                try!(self.stream.advance(1));
414            } else {
415                break;
416            }
417        }
418
419        if start == self.stream.pos() {
420            return Err(Error::UnknownToken(self.stream.gen_error_pos()));
421        }
422
423        let s = self.stream.slice_region_raw_str(start, self.stream.pos());
424        Ok(s)
425    }
426
427    fn consume_comment(&mut self) -> Result<bool, Error>  {
428        self.stream.advance_raw(1);
429
430        if try!(self.stream.is_char_eq(b'*')) {
431            self.stream.advance_raw(1); // *
432
433            while !self.stream.at_end() {
434                let len = try!(self.stream.length_to(b'*'));
435                try!(self.stream.advance(len + 1));
436                if try!(self.stream.is_char_eq(b'/')) {
437                    self.stream.advance_raw(1);
438                    break;
439                }
440            }
441
442            return Ok(true);
443        } else {
444            return Ok(false);
445        }
446    }
447}