Skip to main content

hpx_browser/css_parser/
parser.rs

1use crate::css_parser::{
2    ast::*,
3    error::ParseError,
4    source::SourceLocation,
5    token::{Token, TokenKind},
6    tokenizer::Tokenizer,
7};
8
9/// CSS parser. Buffers all tokens for nesting disambiguation.
10pub struct Parser<'a> {
11    tokens: Vec<Token<'a>>,
12    pos: usize,
13    errors: Vec<ParseError>,
14}
15
16impl<'a> Parser<'a> {
17    fn new(input: &'a str) -> Self {
18        let tokens: Vec<Token<'a>> = {
19            let mut tokenizer = Tokenizer::new(input);
20            let mut tokens = Vec::new();
21            loop {
22                let token = tokenizer.next_token();
23                let is_eof = token.kind == TokenKind::Eof;
24                tokens.push(token);
25                if is_eof {
26                    break;
27                }
28            }
29            tokens
30        };
31
32        Self {
33            tokens,
34            pos: 0,
35            errors: Vec::new(),
36        }
37    }
38
39    pub fn parse_stylesheet(input: &'a str) -> (Stylesheet<'a>, Vec<ParseError>) {
40        let mut parser = Self::new(input);
41        let loc = parser.current_location();
42        let rules = parser.consume_rule_list(true);
43        let errors = parser.errors;
44        (Stylesheet { rules, loc }, errors)
45    }
46
47    pub fn parse_declaration_list(input: &'a str) -> (Vec<Declaration<'a>>, Vec<ParseError>) {
48        let mut parser = Self::new(input);
49        let (declarations, _rules) = parser.consume_block_contents();
50        let errors = parser.errors;
51        (declarations, errors)
52    }
53
54    fn current(&self) -> &Token<'a> {
55        &self.tokens[self.pos.min(self.tokens.len() - 1)]
56    }
57
58    fn current_kind(&self) -> &TokenKind<'a> {
59        &self.current().kind
60    }
61
62    fn current_location(&self) -> SourceLocation {
63        self.current().loc
64    }
65
66    fn advance(&mut self) -> &Token<'a> {
67        let token = &self.tokens[self.pos.min(self.tokens.len() - 1)];
68        if self.pos < self.tokens.len() {
69            self.pos += 1;
70        }
71        token
72    }
73
74    fn is_eof(&self) -> bool {
75        matches!(self.current_kind(), TokenKind::Eof)
76    }
77
78    fn skip_whitespace(&mut self) {
79        while matches!(self.current_kind(), TokenKind::Whitespace) {
80            self.advance();
81        }
82    }
83
84    fn consume_rule_list(&mut self, top_level: bool) -> Vec<Rule<'a>> {
85        let mut rules = Vec::new();
86        loop {
87            self.skip_whitespace();
88            match self.current_kind() {
89                TokenKind::Eof => return rules,
90                TokenKind::CloseCurly => return rules,
91                TokenKind::AtKeyword(_) => {
92                    if let Some(rule) = self.consume_at_rule() {
93                        rules.push(Rule::At(rule));
94                    }
95                }
96                TokenKind::Cdo | TokenKind::Cdc if top_level => {
97                    self.advance();
98                }
99                _ => {
100                    if let Some(rule) = self.consume_qualified_rule() {
101                        rules.push(Rule::Qualified(rule));
102                    }
103                }
104            }
105        }
106    }
107
108    fn consume_at_rule(&mut self) -> Option<AtRule<'a>> {
109        let loc = self.current_location();
110        let name = match self.current_kind() {
111            TokenKind::AtKeyword(n) => *n,
112            _ => return None,
113        };
114        self.advance();
115
116        let mut prelude = Vec::new();
117
118        loop {
119            match self.current_kind() {
120                TokenKind::Semicolon => {
121                    self.advance();
122                    return Some(AtRule {
123                        name,
124                        prelude,
125                        block: None,
126                        loc,
127                    });
128                }
129                TokenKind::Eof => {
130                    return Some(AtRule {
131                        name,
132                        prelude,
133                        block: None,
134                        loc,
135                    });
136                }
137                TokenKind::OpenCurly => {
138                    self.advance();
139                    let block = self.consume_at_rule_block(name);
140                    if matches!(self.current_kind(), TokenKind::CloseCurly) {
141                        self.advance();
142                    }
143                    return Some(AtRule {
144                        name,
145                        prelude,
146                        block: Some(block),
147                        loc,
148                    });
149                }
150                _ => {
151                    prelude.push(self.consume_component_value());
152                }
153            }
154        }
155    }
156
157    fn consume_at_rule_block(&mut self, name: &str) -> Block<'a> {
158        let is_rule_list = matches!(
159            name.to_ascii_lowercase().as_str(),
160            "media" | "supports" | "layer" | "container" | "scope" | "document" | "keyframes"
161        );
162
163        if is_rule_list {
164            Block::RuleList(self.consume_rule_list(false))
165        } else {
166            let (declarations, rules) = self.consume_block_contents();
167            Block::DeclarationBlock {
168                declarations,
169                rules,
170            }
171        }
172    }
173
174    fn consume_qualified_rule(&mut self) -> Option<QualifiedRule<'a>> {
175        let loc = self.current_location();
176        let mut prelude = Vec::new();
177
178        loop {
179            match self.current_kind() {
180                TokenKind::Eof => {
181                    self.errors.push(ParseError::UnexpectedEof {
182                        loc: self.current_location(),
183                    });
184                    return None;
185                }
186                TokenKind::OpenCurly => {
187                    self.advance();
188                    let (declarations, rules) = self.consume_block_contents();
189                    if matches!(self.current_kind(), TokenKind::CloseCurly) {
190                        self.advance();
191                    }
192                    return Some(QualifiedRule {
193                        prelude,
194                        declarations,
195                        rules,
196                        loc,
197                    });
198                }
199                _ => {
200                    prelude.push(self.consume_component_value());
201                }
202            }
203        }
204    }
205
206    fn consume_block_contents(&mut self) -> (Vec<Declaration<'a>>, Vec<Rule<'a>>) {
207        let mut declarations = Vec::new();
208        let mut rules = Vec::new();
209
210        loop {
211            self.skip_whitespace();
212            match self.current_kind() {
213                TokenKind::Eof | TokenKind::CloseCurly => {
214                    return (declarations, rules);
215                }
216                TokenKind::Semicolon => {
217                    self.advance();
218                }
219                TokenKind::AtKeyword(_) => {
220                    if let Some(at_rule) = self.consume_at_rule() {
221                        rules.push(Rule::At(at_rule));
222                    }
223                }
224                TokenKind::Ident(_) => {
225                    let saved_pos = self.pos;
226                    if let Some(decl) = self.try_consume_declaration() {
227                        declarations.push(decl);
228                    } else {
229                        self.pos = saved_pos;
230                        if let Some(rule) = self.consume_qualified_rule() {
231                            rules.push(Rule::Qualified(rule));
232                        }
233                    }
234                }
235                _ => {
236                    if let Some(rule) = self.consume_qualified_rule() {
237                        rules.push(Rule::Qualified(rule));
238                    }
239                }
240            }
241        }
242    }
243
244    fn try_consume_declaration(&mut self) -> Option<Declaration<'a>> {
245        let loc = self.current_location();
246
247        let name = match self.current_kind() {
248            TokenKind::Ident(n) => *n,
249            _ => return None,
250        };
251        self.advance();
252
253        self.skip_whitespace();
254
255        if !matches!(self.current_kind(), TokenKind::Colon) {
256            return None;
257        }
258        self.advance();
259
260        self.skip_whitespace();
261
262        let mut value = Vec::new();
263        loop {
264            match self.current_kind() {
265                TokenKind::Semicolon => {
266                    self.advance();
267                    break;
268                }
269                TokenKind::CloseCurly | TokenKind::Eof => {
270                    break;
271                }
272                _ => {
273                    value.push(self.consume_component_value());
274                }
275            }
276        }
277
278        let important = check_and_strip_important(&mut value);
279
280        Some(Declaration {
281            name,
282            value,
283            important,
284            loc,
285        })
286    }
287
288    fn consume_component_value(&mut self) -> ComponentValue<'a> {
289        match self.current_kind() {
290            TokenKind::OpenCurly | TokenKind::OpenSquare | TokenKind::OpenParen => {
291                ComponentValue::SimpleBlock(self.consume_simple_block())
292            }
293            TokenKind::Function(_) => ComponentValue::Function(self.consume_function()),
294            _ => {
295                let token = self.advance().clone();
296                ComponentValue::Token(token)
297            }
298        }
299    }
300
301    fn consume_simple_block(&mut self) -> SimpleBlock<'a> {
302        let loc = self.current_location();
303        let opening = match self.current_kind() {
304            TokenKind::OpenCurly => '{',
305            TokenKind::OpenSquare => '[',
306            TokenKind::OpenParen => '(',
307            _ => unreachable!(),
308        };
309        let closing = match opening {
310            '{' => TokenKind::CloseCurly,
311            '[' => TokenKind::CloseSquare,
312            '(' => TokenKind::CloseParen,
313            _ => unreachable!(),
314        };
315        self.advance();
316
317        let mut value = Vec::new();
318        loop {
319            if *self.current_kind() == closing {
320                self.advance();
321                return SimpleBlock {
322                    token: opening,
323                    value,
324                    loc,
325                };
326            }
327            if self.is_eof() {
328                return SimpleBlock {
329                    token: opening,
330                    value,
331                    loc,
332                };
333            }
334            value.push(self.consume_component_value());
335        }
336    }
337
338    fn consume_function(&mut self) -> CssFunction<'a> {
339        let loc = self.current_location();
340        let name = match self.current_kind() {
341            TokenKind::Function(n) => *n,
342            _ => unreachable!(),
343        };
344        self.advance();
345
346        let mut arguments = Vec::new();
347        loop {
348            match self.current_kind() {
349                TokenKind::CloseParen => {
350                    self.advance();
351                    return CssFunction {
352                        name,
353                        arguments,
354                        loc,
355                    };
356                }
357                TokenKind::Eof => {
358                    return CssFunction {
359                        name,
360                        arguments,
361                        loc,
362                    };
363                }
364                _ => {
365                    arguments.push(self.consume_component_value());
366                }
367            }
368        }
369    }
370}
371
372fn check_and_strip_important(value: &mut Vec<ComponentValue<'_>>) -> bool {
373    let mut i = value.len();
374    while i > 0 {
375        i -= 1;
376        match &value[i] {
377            ComponentValue::Token(Token {
378                kind: TokenKind::Whitespace,
379                ..
380            }) => continue,
381            ComponentValue::Token(Token {
382                kind: TokenKind::Ident(name),
383                ..
384            }) if name.eq_ignore_ascii_case("important") => {
385                while i > 0 {
386                    i -= 1;
387                    match &value[i] {
388                        ComponentValue::Token(Token {
389                            kind: TokenKind::Whitespace,
390                            ..
391                        }) => continue,
392                        ComponentValue::Token(Token {
393                            kind: TokenKind::Delim('!'),
394                            ..
395                        }) => {
396                            value.truncate(i);
397                            while value
398                                .last()
399                                .map(|v| {
400                                    matches!(
401                                        v,
402                                        ComponentValue::Token(Token {
403                                            kind: TokenKind::Whitespace,
404                                            ..
405                                        })
406                                    )
407                                })
408                                .unwrap_or(false)
409                            {
410                                value.pop();
411                            }
412                            return true;
413                        }
414                        _ => return false,
415                    }
416                }
417                return false;
418            }
419            _ => return false,
420        }
421    }
422    false
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428
429    #[test]
430    fn parse_simple_rule() {
431        let (stylesheet, errors) = Parser::parse_stylesheet("h1 { color: red; }");
432        assert!(errors.is_empty());
433        assert_eq!(stylesheet.rules.len(), 1);
434        match &stylesheet.rules[0] {
435            Rule::Qualified(qr) => {
436                assert_eq!(qr.declarations.len(), 1);
437                assert_eq!(qr.declarations[0].name, "color");
438                assert!(!qr.declarations[0].important);
439            }
440            _ => panic!("Expected qualified rule"),
441        }
442    }
443
444    #[test]
445    fn parse_important() {
446        let (stylesheet, _) = Parser::parse_stylesheet("h1 { color: red !important; }");
447        match &stylesheet.rules[0] {
448            Rule::Qualified(qr) => {
449                assert!(qr.declarations[0].important);
450            }
451            _ => panic!(),
452        }
453    }
454
455    #[test]
456    fn parse_at_rule_media() {
457        let (stylesheet, errors) = Parser::parse_stylesheet("@media screen { h1 { color: red; } }");
458        assert!(errors.is_empty());
459        assert_eq!(stylesheet.rules.len(), 1);
460        match &stylesheet.rules[0] {
461            Rule::At(at) => {
462                assert_eq!(at.name, "media");
463                assert!(at.block.is_some());
464            }
465            _ => panic!("Expected at-rule"),
466        }
467    }
468
469    #[test]
470    fn parse_multiple_declarations() {
471        let (stylesheet, _) =
472            Parser::parse_stylesheet("h1 { color: red; font-size: 2em; margin: 0; }");
473        match &stylesheet.rules[0] {
474            Rule::Qualified(qr) => {
475                assert_eq!(qr.declarations.len(), 3);
476            }
477            _ => panic!(),
478        }
479    }
480
481    #[test]
482    fn parse_declaration_list_inline() {
483        let (decls, _) = Parser::parse_declaration_list("color: red; font-size: 16px");
484        assert_eq!(decls.len(), 2);
485        assert_eq!(decls[0].name, "color");
486        assert_eq!(decls[1].name, "font-size");
487    }
488
489    #[test]
490    fn parse_nested_rule() {
491        let (stylesheet, errors) =
492            Parser::parse_stylesheet(".card { color: black; &:hover { color: blue; } }");
493        assert!(errors.is_empty(), "errors: {:?}", errors);
494        match &stylesheet.rules[0] {
495            Rule::Qualified(qr) => {
496                assert_eq!(qr.declarations.len(), 1);
497                assert_eq!(qr.rules.len(), 1);
498            }
499            _ => panic!(),
500        }
501    }
502
503    #[test]
504    fn parse_function_in_value() {
505        let (stylesheet, _) = Parser::parse_stylesheet("h1 { color: rgb(255, 0, 0); }");
506        match &stylesheet.rules[0] {
507            Rule::Qualified(qr) => {
508                let has_fn = qr.declarations[0]
509                    .value
510                    .iter()
511                    .any(|v| matches!(v, ComponentValue::Function(f) if f.name == "rgb"));
512                assert!(has_fn);
513            }
514            _ => panic!(),
515        }
516    }
517
518    #[test]
519    fn parse_empty_stylesheet() {
520        let (stylesheet, errors) = Parser::parse_stylesheet("");
521        assert!(errors.is_empty());
522        assert!(stylesheet.rules.is_empty());
523    }
524
525    #[test]
526    fn error_recovery_unclosed_rule() {
527        let (stylesheet, _) = Parser::parse_stylesheet("h1 { color: red; h2 { font-size: 1em; }");
528        assert!(!stylesheet.rules.is_empty());
529    }
530}