azul_simplecss/
tokenizer.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5use stream;
6use stream::Stream;
7use error::Error;
8
9/// CSS combinator.
10#[derive(PartialEq,Debug)]
11pub enum Combinator {
12    /// Descendant selector
13    Space,
14    /// Child selector
15    GreaterThan,
16    /// Adjacent sibling selector
17    Plus,
18}
19
20/// CSS token.
21#[derive(PartialEq,Debug)]
22pub enum Token<'a> {
23    /// Universal selector
24    ///
25    /// https://www.w3.org/TR/CSS21/selector.html#universal-selector
26    UniversalSelector,
27    /// Type selector
28    ///
29    /// https://www.w3.org/TR/CSS21/selector.html#type-selectors
30    TypeSelector(&'a str),
31    /// ID selector
32    ///
33    /// Value contains ident without `#`.
34    ///
35    /// https://www.w3.org/TR/CSS21/selector.html#id-selectors
36    IdSelector(&'a str),
37    /// Class selector
38    ///
39    /// Value contains ident without `.`.
40    ///
41    /// https://www.w3.org/TR/CSS21/selector.html#class-html
42    ClassSelector(&'a str),
43    /// Attribute selector
44    ///
45    /// We do not parse it's content yet, so value contains everything between `[]`.
46    ///
47    /// https://www.w3.org/TR/CSS21/selector.html#attribute-selectors
48    AttributeSelector(&'a str),
49    /// Pseudo-class
50    ///
51    /// Value contains ident without `:`.
52    /// Selector: `"nth-child"`, value: The thing between the braces - `Some("3")`
53    ///
54    /// https://www.w3.org/TR/CSS21/selector.html#pseudo-class-selectors
55    PseudoClass { selector: &'a str, value: Option<&'a str> },
56    /// `Combinator`
57    Combinator(Combinator),
58    /// Rules separator
59    ///
60    /// https://www.w3.org/TR/CSS21/selector.html#grouping
61    Comma,
62    /// Block start
63    ///
64    /// Indicates `{`.
65    ///
66    /// https://www.w3.org/TR/CSS21/syndata.html#rule-sets
67    BlockStart,
68    /// Block end
69    ///
70    /// Indicates `}`.
71    ///
72    /// https://www.w3.org/TR/CSS21/syndata.html#rule-sets
73    BlockEnd,
74    /// Declaration
75    ///
76    /// Contains property name and property value.
77    ///
78    /// https://www.w3.org/TR/CSS21/syndata.html#declaration
79    Declaration(&'a str, &'a str),
80    /// `@` rule (excluding the `@` sign itself). The content is not parsed,
81    /// for example `@keyframes mymove` = `AtRule("keyframes"), AtStr("mymove")`.
82    AtRule(&'a str),
83    /// Raw Str inside of block
84    DeclarationStr(&'a str),
85    /// String following an @rule
86    AtStr(&'a str),
87    /// Same as PseudoClass, but with two colons (`::thing`).
88    DoublePseudoClass { selector: &'a str, value: Option<&'a str> },
89    /// End of stream
90    ///
91    /// Parsing is finished.
92    EndOfStream,
93}
94
95#[derive(PartialEq)]
96enum State {
97    Rule,
98    Declaration,
99    DeclarationRule,
100}
101
102/// CSS tokenizer.
103pub struct Tokenizer<'a> {
104    stream: Stream<'a>,
105    state: State,
106    after_selector: bool,
107    has_at_rule: bool,
108    at_start: bool,
109}
110
111impl<'a> Tokenizer<'a> {
112    /// Constructs a new `Tokenizer`.
113    pub fn new(text: &str) -> Tokenizer {
114        Tokenizer {
115            stream: Stream::new(text.as_bytes()),
116            state: State::Rule,
117            after_selector: false,
118            has_at_rule: false,
119            at_start: true,
120        }
121    }
122
123    /// Constructs a new bounded `Tokenizer`.
124    ///
125    /// It can be useful if CSS data is inside other data, like HTML.
126    /// Using this method you will get an absolute error positions and not relative,
127    /// like when using [`new()`].
128    ///
129    /// [`new()`]: #method.new
130    pub fn new_bound(text: &str, start: usize, end: usize) -> Tokenizer {
131        Tokenizer {
132            stream: Stream::new_bound(text.as_bytes(), start, end),
133            state: State::Rule,
134            after_selector: false,
135            has_at_rule: false,
136            at_start: true,
137        }
138    }
139
140    /// Returns a current position in the text.
141    pub fn pos(&self) -> usize {
142        self.stream.pos()
143    }
144
145    /// Parses a next token.
146    pub fn parse_next(&mut self) -> Result<Token<'a>, Error> {
147        if self.at_start {
148            self.stream.skip_spaces();
149            self.at_start = false;
150        }
151
152        if self.stream.at_end() {
153            return Ok(Token::EndOfStream);
154        }
155
156        match self.state {
157            State::Rule         => self.consume_rule(),
158            State::Declaration  => self.consume_declaration(),
159            State::DeclarationRule => self.consume_declaration(),
160        }
161    }
162
163    fn consume_rule(&mut self) -> Result<Token<'a>, Error> {
164        match self.stream.curr_char_raw() {
165            b'@' => {
166                self.after_selector = true;
167                self.has_at_rule = true;
168                self.stream.advance_raw(1);
169                let s = self.consume_ident()?;
170                return Ok(Token::AtRule(s));
171            }
172            b'#' => {
173                self.after_selector = true;
174                self.has_at_rule = false;
175                self.stream.advance_raw(1);
176                let s = try!(self.consume_ident());
177                return Ok(Token::IdSelector(s));
178            }
179            b'.' => {
180                self.after_selector = true;
181                self.has_at_rule = false;
182                self.stream.advance_raw(1);
183                let s = try!(self.consume_ident());
184                return Ok(Token::ClassSelector(s));
185            }
186            b'*' => {
187                self.after_selector = true;
188                self.has_at_rule = false;
189                self.stream.advance_raw(1);
190                self.stream.skip_spaces();
191                return Ok(Token::UniversalSelector);
192            }
193            b':' => {
194                self.after_selector = true;
195                self.has_at_rule = false;
196                self.stream.advance_raw(1);
197
198                // Whether this selector is a ::selector.
199                let is_double_colon = self.stream.is_char_eq(b':')?;
200                if is_double_colon {
201                    self.stream.advance_raw(1); // consume the second :
202                }
203
204                let s = try!(self.consume_ident());
205
206                if self.stream.curr_char() == Ok(b'(') {
207                    // Item is a thing()
208                    self.stream.advance_raw(1); // (
209                    let inner_len = self.stream.length_to(b')')?;
210                    let inner = self.stream.read_raw_str(inner_len);
211                    self.stream.advance_raw(1); // )
212                    return Ok(if is_double_colon {
213                        Token::DoublePseudoClass { selector: s, value: Some(inner) }
214                    } else {
215                        Token::PseudoClass { selector: s, value: Some(inner) }
216                    });
217                } else {
218                    return Ok(if is_double_colon {
219                        Token::DoublePseudoClass { selector: s, value: None }
220                    } else {
221                        Token::PseudoClass { selector: s, value: None }
222                    });
223                }
224            }
225            b'[' => {
226                self.after_selector = true;
227                self.has_at_rule = false;
228                self.stream.advance_raw(1);
229                let len = try!(self.stream.length_to(b']'));
230                let s = self.stream.read_raw_str(len);
231                self.stream.advance_raw(1); // ]
232                self.stream.skip_spaces();
233                return Ok(Token::AttributeSelector(s));
234            }
235            b',' => {
236                self.after_selector = false;
237                self.has_at_rule = false;
238                self.stream.advance_raw(1);
239                self.stream.skip_spaces();
240                return Ok(Token::Comma);
241            }
242            b'{' => {
243                self.after_selector = false;
244                self.has_at_rule = false;
245                self.state = State::Declaration;
246                self.stream.advance_raw(1);
247                return Ok(Token::BlockStart);
248            }
249            b'>' => {
250                if self.after_selector {
251                    self.after_selector = false;
252                    self.has_at_rule = false;
253                    self.stream.advance_raw(1);
254                    self.stream.skip_spaces();
255                    return Ok(Token::Combinator(Combinator::GreaterThan));
256                } else {
257                    return Err(Error::UnknownToken(self.stream.gen_error_pos()));
258                }
259            }
260            b'+' => {
261                if self.after_selector {
262                    self.after_selector = false;
263                    self.has_at_rule = false;
264                    self.stream.advance_raw(1);
265                    self.stream.skip_spaces();
266                    return Ok(Token::Combinator(Combinator::Plus));
267                } else {
268                    return Err(Error::UnknownToken(self.stream.gen_error_pos()));
269                }
270            }
271            b'/' => {
272                if try!(self.consume_comment()) {
273                    return self.parse_next();
274                } else {
275                    return Err(Error::UnknownToken(self.stream.gen_error_pos()));
276                }
277            }
278            _ => {
279                if self.stream.is_space_raw() {
280                    self.stream.skip_spaces();
281
282                    if !self.after_selector {
283                        return self.parse_next();
284                    }
285
286                    match self.stream.curr_char()? {
287                        b'{' | b'/' | b'>' | b'+' | b'*' => { return self.parse_next(); },
288                        _ => {
289                            self.after_selector = false;
290                            if !self.has_at_rule {
291                                return Ok(Token::Combinator(Combinator::Space));
292                            }
293                        }
294                    }
295                }
296
297                let s = try!(self.consume_ident());
298                let token_type = if self.has_at_rule {
299                    self.has_at_rule = true;
300                    Token::AtStr(s)
301                } else {
302                    self.has_at_rule = false;
303                    Token::TypeSelector(s)
304                };
305
306                self.after_selector = true;
307                return Ok(token_type);
308            }
309        }
310    }
311
312    fn consume_declaration(&mut self) -> Result<Token<'a>, Error> {
313        self.stream.skip_spaces();
314        self.has_at_rule = false;
315
316        match self.stream.curr_char_raw() {
317            b'}' => {
318                if self.state == State::DeclarationRule {
319                    self.state = State::Declaration;
320                } else if self.state == State::Declaration {
321                    self.state = State::Rule;
322                }
323                self.stream.advance_raw(1);
324                self.stream.skip_spaces();
325                return Ok(Token::BlockEnd);
326            },
327            b'{' => {
328                if self.state == State::Rule {
329                    self.state = State::Declaration;
330                } else if self.state == State::Declaration {
331                    self.state = State::DeclarationRule;
332                }
333                self.stream.advance_raw(1);
334                self.stream.skip_spaces();
335                return Ok(Token::BlockStart);
336            },
337            b'/' => {
338                if try!(self.consume_comment()) {
339                    return self.parse_next();
340                } else {
341                    return Err(Error::UnknownToken(self.stream.gen_error_pos()));
342                }
343            }
344            _ => {
345                let name = self.consume_ident()?;
346
347                self.stream.skip_spaces();
348
349                if self.stream.is_char_eq(b'/')? {
350                    if !try!(self.consume_comment()) {
351                        return Err(Error::UnknownToken(self.stream.gen_error_pos()));
352                    }
353                }
354
355                if self.stream.is_char_eq(b'{')? {
356                    if name.is_empty() {
357                        return Err(Error::UnknownToken(self.stream.gen_error_pos()));
358                    } else {
359                        return Ok(Token::DeclarationStr(name));
360                    }
361                }
362
363                self.stream.advance_raw(1); // :
364                self.stream.skip_spaces();
365
366                if self.stream.is_char_eq(b'/')? {
367                    if !try!(self.consume_comment()) {
368                        return Err(Error::UnknownToken(self.stream.gen_error_pos()));
369                    }
370                }
371
372                let len = self.stream.length_to_either(&[b';', b'}'])?;
373
374                if len == 0 {
375                    return Err(Error::UnknownToken(self.stream.gen_error_pos()));
376                }
377
378                let mut value = self.stream.read_raw_str(len);
379                // trim spaces at the end of the value
380                if let Some(p) = value.as_bytes().iter().rposition(|c| !stream::is_space(*c)) {
381                    value = &value[0..(p + 1)];
382                }
383
384                self.stream.skip_spaces();
385                while try!(self.stream.is_char_eq(b';')) {
386                    self.stream.advance_raw(1);
387                    self.stream.skip_spaces();
388                }
389
390                Ok(Token::Declaration(name, value))
391            }
392        }
393    }
394
395    fn consume_ident(&mut self) -> Result<&'a str, Error> {
396        let start = self.stream.pos();
397
398        while !self.stream.at_end() {
399            if self.stream.is_ident_raw() {
400                try!(self.stream.advance(1));
401            } else {
402                break;
403            }
404        }
405
406        if start == self.stream.pos() {
407            return Err(Error::UnknownToken(self.stream.gen_error_pos()));
408        }
409
410        let s = self.stream.slice_region_raw_str(start, self.stream.pos());
411        Ok(s)
412    }
413
414    fn consume_comment(&mut self) -> Result<bool, Error>  {
415        self.stream.advance_raw(1);
416
417        if try!(self.stream.is_char_eq(b'*')) {
418            self.stream.advance_raw(1); // *
419
420            while !self.stream.at_end() {
421                let len = try!(self.stream.length_to(b'*'));
422                try!(self.stream.advance(len + 1));
423                if try!(self.stream.is_char_eq(b'/')) {
424                    self.stream.advance_raw(1);
425                    break;
426                }
427            }
428
429            return Ok(true);
430        } else {
431            return Ok(false);
432        }
433    }
434}