Skip to main content

hpx_browser/css_selectors/
parser.rs

1use crate::{
2    css_parser::{Token, TokenKind, Tokenizer, resolve_escapes},
3    css_selectors::{ast::*, error::SelectorParseError, nth::parse_nth},
4};
5
6pub fn parse_selector_list(input: &str) -> Result<SelectorList, SelectorParseError> {
7    let mut parser = SelectorParser::new(input);
8    parser.parse_selector_list()
9}
10
11pub fn parse_selector_list_forgiving(input: &str) -> SelectorList {
12    let mut parser = SelectorParser::new(input);
13    parser.parse_selector_list_forgiving()
14}
15
16struct SelectorParser<'a> {
17    tokens: Vec<Token<'a>>,
18    pos: usize,
19}
20
21impl<'a> SelectorParser<'a> {
22    fn new(input: &'a str) -> Self {
23        let tokens: Vec<Token<'a>> = Tokenizer::new(input).collect();
24        Self { tokens, pos: 0 }
25    }
26
27    fn from_tokens(tokens: Vec<Token<'a>>) -> Self {
28        Self { tokens, pos: 0 }
29    }
30
31    fn current_kind(&self) -> TokenKind<'a> {
32        if self.pos < self.tokens.len() {
33            self.tokens[self.pos].kind.clone()
34        } else {
35            TokenKind::Eof
36        }
37    }
38
39    fn current_token(&self) -> Option<&Token<'a>> {
40        self.tokens.get(self.pos)
41    }
42
43    fn advance(&mut self) {
44        if self.pos < self.tokens.len() {
45            self.pos += 1;
46        }
47    }
48
49    fn skip_whitespace(&mut self) {
50        while matches!(self.current_kind(), TokenKind::Whitespace) {
51            self.advance();
52        }
53    }
54
55    fn is_eof(&self) -> bool {
56        self.pos >= self.tokens.len()
57    }
58
59    fn parse_selector_list(&mut self) -> Result<SelectorList, SelectorParseError> {
60        let mut selectors = Vec::new();
61        self.skip_whitespace();
62
63        if self.is_eof() {
64            return Err(SelectorParseError::EmptySelector);
65        }
66
67        selectors.push(self.parse_complex_selector()?);
68
69        loop {
70            self.skip_whitespace();
71            if self.is_eof() {
72                break;
73            }
74            if matches!(self.current_kind(), TokenKind::Comma) {
75                self.advance();
76                self.skip_whitespace();
77                selectors.push(self.parse_complex_selector()?);
78            } else {
79                break;
80            }
81        }
82
83        Ok(selectors)
84    }
85
86    fn parse_selector_list_forgiving(&mut self) -> SelectorList {
87        let mut selectors = Vec::new();
88        self.skip_whitespace();
89
90        if self.is_eof() {
91            return selectors;
92        }
93
94        if let Ok(sel) = self.try_parse_complex_selector() {
95            selectors.push(sel);
96        }
97
98        loop {
99            self.skip_whitespace();
100            if self.is_eof() {
101                break;
102            }
103            if matches!(self.current_kind(), TokenKind::Comma) {
104                self.advance();
105                self.skip_whitespace();
106                if let Ok(sel) = self.try_parse_complex_selector() {
107                    selectors.push(sel);
108                } else {
109                    while !self.is_eof() && !matches!(self.current_kind(), TokenKind::Comma) {
110                        self.advance();
111                    }
112                }
113            } else {
114                break;
115            }
116        }
117
118        selectors
119    }
120
121    fn try_parse_complex_selector(&mut self) -> Result<Selector, SelectorParseError> {
122        let saved = self.pos;
123        match self.parse_complex_selector() {
124            Ok(s) => Ok(s),
125            Err(e) => {
126                self.pos = saved;
127                Err(e)
128            }
129        }
130    }
131
132    fn parse_complex_selector(&mut self) -> Result<Selector, SelectorParseError> {
133        let mut components = Vec::new();
134
135        let compound = self.parse_compound_selector()?;
136        components.extend(compound);
137
138        loop {
139            let had_whitespace = matches!(self.current_kind(), TokenKind::Whitespace);
140            self.skip_whitespace();
141
142            if self.is_eof()
143                || matches!(
144                    self.current_kind(),
145                    TokenKind::Comma | TokenKind::CloseParen | TokenKind::CloseCurly
146                )
147            {
148                break;
149            }
150
151            let combinator = match self.current_kind() {
152                TokenKind::Delim('>') => {
153                    self.advance();
154                    self.skip_whitespace();
155                    Combinator::Child
156                }
157                TokenKind::Delim('+') => {
158                    self.advance();
159                    self.skip_whitespace();
160                    Combinator::NextSibling
161                }
162                TokenKind::Delim('~') => {
163                    self.advance();
164                    self.skip_whitespace();
165                    Combinator::SubsequentSibling
166                }
167                _ if had_whitespace => Combinator::Descendant,
168                _ => break,
169            };
170
171            let compound = self.parse_compound_selector()?;
172            components.push(Component::Combinator(combinator));
173            components.extend(compound);
174        }
175
176        components.reverse();
177
178        let specificity = compute_specificity_from_components(&components);
179        Ok(Selector::new(components, specificity))
180    }
181
182    fn parse_compound_selector(&mut self) -> Result<Vec<Component>, SelectorParseError> {
183        let mut components = Vec::new();
184
185        loop {
186            match self.current_kind() {
187                TokenKind::Ident(name) => {
188                    let name = resolve_escapes(name).to_string();
189                    components.push(Component::Simple(SimpleSelector::Type(name)));
190                    self.advance();
191                }
192                TokenKind::Delim('*') => {
193                    components.push(Component::Simple(SimpleSelector::Universal));
194                    self.advance();
195                }
196                TokenKind::Delim('.') => {
197                    self.advance();
198                    match self.current_kind() {
199                        TokenKind::Ident(name) => {
200                            let name = resolve_escapes(name).to_string();
201                            components.push(Component::Simple(SimpleSelector::Class(name)));
202                            self.advance();
203                        }
204                        _ => {
205                            return Err(SelectorParseError::UnexpectedToken {
206                                loc: self.current_token().map(|t| t.loc).unwrap_or_default(),
207                                message: "expected class name after '.'".into(),
208                            });
209                        }
210                    }
211                }
212                TokenKind::Hash { value, .. } => {
213                    let value = resolve_escapes(value).to_string();
214                    components.push(Component::Simple(SimpleSelector::Id(value)));
215                    self.advance();
216                }
217                TokenKind::OpenSquare => {
218                    components.push(Component::Simple(self.parse_attribute_selector()?));
219                }
220                TokenKind::Colon => {
221                    self.advance();
222                    if matches!(self.current_kind(), TokenKind::Colon) {
223                        self.advance();
224                        components.push(Component::Simple(self.parse_pseudo_element()?));
225                    } else {
226                        components.push(Component::Simple(self.parse_pseudo_class()?));
227                    }
228                }
229                TokenKind::Delim('&') => {
230                    components.push(Component::Simple(SimpleSelector::Nesting));
231                    self.advance();
232                }
233                _ => break,
234            }
235        }
236
237        if components.is_empty() {
238            return Err(SelectorParseError::EmptySelector);
239        }
240
241        Ok(components)
242    }
243
244    fn parse_attribute_selector(&mut self) -> Result<SimpleSelector, SelectorParseError> {
245        self.advance(); // [
246        self.skip_whitespace();
247
248        let name = match self.current_kind() {
249            TokenKind::Ident(n) => {
250                let n = resolve_escapes(n).to_string();
251                self.advance();
252                n
253            }
254            _ => {
255                return Err(SelectorParseError::UnexpectedToken {
256                    loc: self.current_token().map(|t| t.loc).unwrap_or_default(),
257                    message: "expected attribute name".into(),
258                });
259            }
260        };
261
262        self.skip_whitespace();
263
264        if matches!(self.current_kind(), TokenKind::CloseSquare) {
265            self.advance();
266            return Ok(SimpleSelector::Attribute {
267                name,
268                operator: None,
269                value: None,
270                case_sensitivity: CaseSensitivity::Default,
271            });
272        }
273
274        let operator = self.parse_attribute_operator()?;
275        self.skip_whitespace();
276
277        let value = match self.current_kind() {
278            TokenKind::String(s) => {
279                let s = resolve_escapes(s).to_string();
280                self.advance();
281                s
282            }
283            TokenKind::Ident(s) => {
284                let s = resolve_escapes(s).to_string();
285                self.advance();
286                s
287            }
288            _ => {
289                return Err(SelectorParseError::UnexpectedToken {
290                    loc: self.current_token().map(|t| t.loc).unwrap_or_default(),
291                    message: "expected attribute value".into(),
292                });
293            }
294        };
295
296        self.skip_whitespace();
297
298        let case_sensitivity = match self.current_kind() {
299            TokenKind::Ident(flag) if flag.eq_ignore_ascii_case("i") => {
300                self.advance();
301                self.skip_whitespace();
302                CaseSensitivity::CaseInsensitive
303            }
304            TokenKind::Ident(flag) if flag.eq_ignore_ascii_case("s") => {
305                self.advance();
306                self.skip_whitespace();
307                CaseSensitivity::CaseSensitive
308            }
309            _ => CaseSensitivity::Default,
310        };
311
312        if matches!(self.current_kind(), TokenKind::CloseSquare) {
313            self.advance();
314        }
315
316        Ok(SimpleSelector::Attribute {
317            name,
318            operator: Some(operator),
319            value: Some(value),
320            case_sensitivity,
321        })
322    }
323
324    fn parse_attribute_operator(&mut self) -> Result<AttributeOperator, SelectorParseError> {
325        match self.current_kind() {
326            TokenKind::Delim('=') => {
327                self.advance();
328                Ok(AttributeOperator::Exact)
329            }
330            TokenKind::Delim('~') => {
331                self.advance();
332                if matches!(self.current_kind(), TokenKind::Delim('=')) {
333                    self.advance();
334                }
335                Ok(AttributeOperator::Includes)
336            }
337            TokenKind::Delim('|') => {
338                self.advance();
339                if matches!(self.current_kind(), TokenKind::Delim('=')) {
340                    self.advance();
341                }
342                Ok(AttributeOperator::DashMatch)
343            }
344            TokenKind::Delim('^') => {
345                self.advance();
346                if matches!(self.current_kind(), TokenKind::Delim('=')) {
347                    self.advance();
348                }
349                Ok(AttributeOperator::Prefix)
350            }
351            TokenKind::Delim('$') => {
352                self.advance();
353                if matches!(self.current_kind(), TokenKind::Delim('=')) {
354                    self.advance();
355                }
356                Ok(AttributeOperator::Suffix)
357            }
358            TokenKind::Delim('*') => {
359                self.advance();
360                if matches!(self.current_kind(), TokenKind::Delim('=')) {
361                    self.advance();
362                }
363                Ok(AttributeOperator::Substring)
364            }
365            _ => Err(SelectorParseError::UnexpectedToken {
366                loc: self.current_token().map(|t| t.loc).unwrap_or_default(),
367                message: "expected attribute operator".into(),
368            }),
369        }
370    }
371
372    fn parse_pseudo_class(&mut self) -> Result<SimpleSelector, SelectorParseError> {
373        match self.current_kind() {
374            TokenKind::Ident(name) => {
375                let name_lower = name.to_ascii_lowercase();
376                self.advance();
377                let pc = match name_lower.as_str() {
378                    "hover" => PseudoClass::Hover,
379                    "active" => PseudoClass::Active,
380                    "focus" => PseudoClass::Focus,
381                    "focus-within" => PseudoClass::FocusWithin,
382                    "focus-visible" => PseudoClass::FocusVisible,
383                    "link" => PseudoClass::Link,
384                    "visited" => PseudoClass::Visited,
385                    "any-link" => PseudoClass::AnyLink,
386                    "target" => PseudoClass::Target,
387                    "enabled" => PseudoClass::Enabled,
388                    "disabled" => PseudoClass::Disabled,
389                    "checked" => PseudoClass::Checked,
390                    "default" => PseudoClass::Default,
391                    "indeterminate" => PseudoClass::Indeterminate,
392                    "required" => PseudoClass::Required,
393                    "optional" => PseudoClass::Optional,
394                    "valid" => PseudoClass::Valid,
395                    "invalid" => PseudoClass::Invalid,
396                    "in-range" => PseudoClass::InRange,
397                    "out-of-range" => PseudoClass::OutOfRange,
398                    "read-write" => PseudoClass::ReadWrite,
399                    "read-only" => PseudoClass::ReadOnly,
400                    "placeholder-shown" => PseudoClass::PlaceholderShown,
401                    "root" => PseudoClass::Root,
402                    "empty" => PseudoClass::Empty,
403                    "first-child" => PseudoClass::FirstChild,
404                    "last-child" => PseudoClass::LastChild,
405                    "only-child" => PseudoClass::OnlyChild,
406                    "first-of-type" => PseudoClass::FirstOfType,
407                    "last-of-type" => PseudoClass::LastOfType,
408                    "only-of-type" => PseudoClass::OnlyOfType,
409                    _ => {
410                        return Err(SelectorParseError::UnsupportedPseudoClass(name_lower));
411                    }
412                };
413                Ok(SimpleSelector::PseudoClass(pc))
414            }
415            TokenKind::Function(name) => {
416                let name_lower = name.to_ascii_lowercase();
417                self.advance();
418
419                let result = match name_lower.as_str() {
420                    "nth-child" => self.parse_nth_function(false, false),
421                    "nth-last-child" => self.parse_nth_function(true, false),
422                    "nth-of-type" => self.parse_nth_function(false, true),
423                    "nth-last-of-type" => self.parse_nth_function(true, true),
424                    "not" => self.parse_functional_pseudo(PseudoClass::Not),
425                    "is" => self.parse_functional_pseudo_forgiving(PseudoClass::Is),
426                    "where" => self.parse_functional_pseudo_forgiving(PseudoClass::Where),
427                    "has" => self.parse_has_pseudo(),
428                    "lang" => self.parse_lang_pseudo(),
429                    _ => Err(SelectorParseError::UnsupportedPseudoClass(name_lower)),
430                };
431
432                if matches!(self.current_kind(), TokenKind::CloseParen) {
433                    self.advance();
434                }
435
436                Ok(SimpleSelector::PseudoClass(result?))
437            }
438            _ => Err(SelectorParseError::UnexpectedToken {
439                loc: self.current_token().map(|t| t.loc).unwrap_or_default(),
440                message: "expected pseudo-class name".into(),
441            }),
442        }
443    }
444
445    fn parse_nth_function(
446        &mut self,
447        from_end: bool,
448        of_type: bool,
449    ) -> Result<PseudoClass, SelectorParseError> {
450        let mut nth_tokens = Vec::new();
451        loop {
452            match self.current_kind() {
453                TokenKind::CloseParen | TokenKind::Eof => break,
454                TokenKind::Ident(name) if name.eq_ignore_ascii_case("of") && !of_type => {
455                    self.advance();
456                    break;
457                }
458                _ => {
459                    if let Some(t) = self.current_token() {
460                        nth_tokens.push(t.clone());
461                    }
462                    self.advance();
463                }
464            }
465        }
466
467        let nth = parse_nth(&nth_tokens)?;
468
469        if of_type {
470            if from_end {
471                Ok(PseudoClass::NthLastOfType(nth))
472            } else {
473                Ok(PseudoClass::NthOfType(nth))
474            }
475        } else {
476            self.skip_whitespace();
477            let selector_list =
478                (!matches!(self.current_kind(), TokenKind::CloseParen | TokenKind::Eof)).then(
479                    || {
480                        let mut inner_tokens = Vec::new();
481                        let mut depth = 0;
482                        loop {
483                            match self.current_kind() {
484                                TokenKind::CloseParen if depth == 0 => break,
485                                TokenKind::Eof => break,
486                                TokenKind::OpenParen => {
487                                    depth += 1;
488                                    if let Some(t) = self.current_token() {
489                                        inner_tokens.push(t.clone());
490                                    }
491                                    self.advance();
492                                }
493                                TokenKind::CloseParen => {
494                                    depth -= 1;
495                                    if let Some(t) = self.current_token() {
496                                        inner_tokens.push(t.clone());
497                                    }
498                                    self.advance();
499                                }
500                                _ => {
501                                    if let Some(t) = self.current_token() {
502                                        inner_tokens.push(t.clone());
503                                    }
504                                    self.advance();
505                                }
506                            }
507                        }
508                        let mut inner_parser = SelectorParser::from_tokens(inner_tokens);
509                        inner_parser.parse_selector_list().unwrap_or_default()
510                    },
511                );
512
513            if from_end {
514                Ok(PseudoClass::NthLastChild(nth, selector_list))
515            } else {
516                Ok(PseudoClass::NthChild(nth, selector_list))
517            }
518        }
519    }
520
521    fn parse_functional_pseudo(
522        &mut self,
523        constructor: impl FnOnce(SelectorList) -> PseudoClass,
524    ) -> Result<PseudoClass, SelectorParseError> {
525        let inner_tokens = self.collect_until_close_paren();
526        let mut inner_parser = SelectorParser::from_tokens(inner_tokens);
527        let list = inner_parser.parse_selector_list()?;
528        Ok(constructor(list))
529    }
530
531    fn parse_functional_pseudo_forgiving(
532        &mut self,
533        constructor: impl FnOnce(SelectorList) -> PseudoClass,
534    ) -> Result<PseudoClass, SelectorParseError> {
535        let inner_tokens = self.collect_until_close_paren();
536        let mut inner_parser = SelectorParser::from_tokens(inner_tokens);
537        let list = inner_parser.parse_selector_list_forgiving();
538        Ok(constructor(list))
539    }
540
541    fn parse_has_pseudo(&mut self) -> Result<PseudoClass, SelectorParseError> {
542        let inner_tokens = self.collect_until_close_paren();
543        let mut inner_parser = SelectorParser::from_tokens(inner_tokens);
544        let list = inner_parser.parse_selector_list()?;
545        let relatives = list
546            .into_iter()
547            .map(|s| RelativeSelector {
548                combinator: None,
549                selector: s,
550            })
551            .collect();
552        Ok(PseudoClass::Has(relatives))
553    }
554
555    fn parse_lang_pseudo(&mut self) -> Result<PseudoClass, SelectorParseError> {
556        let mut langs = Vec::new();
557        loop {
558            self.skip_whitespace();
559            match self.current_kind() {
560                TokenKind::Ident(name) => {
561                    langs.push(name.to_string());
562                    self.advance();
563                }
564                TokenKind::String(s) => {
565                    langs.push(s.to_string());
566                    self.advance();
567                }
568                TokenKind::Comma => {
569                    self.advance();
570                }
571                _ => break,
572            }
573        }
574        Ok(PseudoClass::Lang(langs))
575    }
576
577    fn parse_pseudo_element(&mut self) -> Result<SimpleSelector, SelectorParseError> {
578        match self.current_kind() {
579            TokenKind::Ident(name) => {
580                let name_lower = name.to_ascii_lowercase();
581                self.advance();
582                let pe = match name_lower.as_str() {
583                    "before" => PseudoElement::Before,
584                    "after" => PseudoElement::After,
585                    "first-line" => PseudoElement::FirstLine,
586                    "first-letter" => PseudoElement::FirstLetter,
587                    "placeholder" => PseudoElement::Placeholder,
588                    "selection" => PseudoElement::Selection,
589                    _ => PseudoElement::Custom(name_lower),
590                };
591                Ok(SimpleSelector::PseudoElement(pe))
592            }
593            _ => Err(SelectorParseError::UnexpectedToken {
594                loc: self.current_token().map(|t| t.loc).unwrap_or_default(),
595                message: "expected pseudo-element name".into(),
596            }),
597        }
598    }
599
600    fn collect_until_close_paren(&mut self) -> Vec<Token<'a>> {
601        let mut tokens = Vec::new();
602        let mut depth = 0;
603        loop {
604            match self.current_kind() {
605                TokenKind::CloseParen if depth == 0 => break,
606                TokenKind::Eof => break,
607                TokenKind::OpenParen => {
608                    depth += 1;
609                    if let Some(t) = self.current_token() {
610                        tokens.push(t.clone());
611                    }
612                    self.advance();
613                }
614                TokenKind::CloseParen => {
615                    depth -= 1;
616                    if let Some(t) = self.current_token() {
617                        tokens.push(t.clone());
618                    }
619                    self.advance();
620                }
621                _ => {
622                    if let Some(t) = self.current_token() {
623                        tokens.push(t.clone());
624                    }
625                    self.advance();
626                }
627            }
628        }
629        tokens
630    }
631}
632
633fn compute_specificity_from_components(components: &[Component]) -> Specificity {
634    let mut spec = Specificity::default();
635    for c in components {
636        if let Component::Simple(s) = c {
637            spec += crate::css_selectors::specificity::simple_specificity_pub(s);
638        }
639    }
640    spec
641}
642
643#[cfg(test)]
644mod tests {
645    use super::*;
646
647    #[test]
648    fn parse_type_selector() {
649        let list = parse_selector_list("div").unwrap();
650        assert_eq!(list.len(), 1);
651        assert_eq!(
652            list[0].components(),
653            &[Component::Simple(SimpleSelector::Type("div".into()))]
654        );
655    }
656
657    #[test]
658    fn parse_class_selector() {
659        let list = parse_selector_list(".foo").unwrap();
660        assert_eq!(list.len(), 1);
661        assert_eq!(
662            list[0].components(),
663            &[Component::Simple(SimpleSelector::Class("foo".into()))]
664        );
665    }
666
667    #[test]
668    fn parse_id_selector() {
669        let list = parse_selector_list("#bar").unwrap();
670        assert_eq!(list.len(), 1);
671        assert_eq!(
672            list[0].components(),
673            &[Component::Simple(SimpleSelector::Id("bar".into()))]
674        );
675    }
676
677    #[test]
678    fn parse_compound() {
679        let list = parse_selector_list("div.foo#bar").unwrap();
680        let comps = list[0].components();
681        assert_eq!(comps.len(), 3);
682        assert!(matches!(&comps[0], Component::Simple(SimpleSelector::Id(s)) if s == "bar"));
683        assert!(matches!(&comps[1], Component::Simple(SimpleSelector::Class(s)) if s == "foo"));
684        assert!(matches!(&comps[2], Component::Simple(SimpleSelector::Type(s)) if s == "div"));
685    }
686
687    #[test]
688    fn parse_descendant() {
689        let list = parse_selector_list("div span").unwrap();
690        let comps = list[0].components();
691        assert_eq!(comps.len(), 3);
692        assert!(matches!(&comps[0], Component::Simple(SimpleSelector::Type(s)) if s == "span"));
693        assert_eq!(comps[1], Component::Combinator(Combinator::Descendant));
694        assert!(matches!(&comps[2], Component::Simple(SimpleSelector::Type(s)) if s == "div"));
695    }
696
697    #[test]
698    fn parse_child_combinator() {
699        let list = parse_selector_list("div > span").unwrap();
700        let comps = list[0].components();
701        assert_eq!(comps.len(), 3);
702        assert!(matches!(&comps[0], Component::Simple(SimpleSelector::Type(s)) if s == "span"));
703        assert_eq!(comps[1], Component::Combinator(Combinator::Child));
704    }
705
706    #[test]
707    fn parse_selector_list_comma() {
708        let list = parse_selector_list("h1, h2, h3").unwrap();
709        assert_eq!(list.len(), 3);
710    }
711
712    #[test]
713    fn parse_attribute_exact() {
714        let list = parse_selector_list("[type=\"text\"]").unwrap();
715        assert!(matches!(
716            &list[0].components()[0],
717            Component::Simple(SimpleSelector::Attribute {
718                name,
719                operator: Some(AttributeOperator::Exact),
720                value: Some(v),
721                ..
722            }) if name == "type" && v == "text"
723        ));
724    }
725
726    #[test]
727    fn parse_pseudo_class_hover() {
728        let list = parse_selector_list("a:hover").unwrap();
729        let comps = list[0].components();
730        assert!(matches!(
731            &comps[0],
732            Component::Simple(SimpleSelector::PseudoClass(PseudoClass::Hover))
733        ));
734    }
735
736    #[test]
737    fn parse_nth_child() {
738        let list = parse_selector_list(":nth-child(2n+1)").unwrap();
739        assert!(matches!(
740            &list[0].components()[0],
741            Component::Simple(SimpleSelector::PseudoClass(PseudoClass::NthChild(
742                NthExpr { a: 2, b: 1 },
743                None
744            )))
745        ));
746    }
747
748    #[test]
749    fn specificity_computed() {
750        let list = parse_selector_list("#main .content > p:first-child").unwrap();
751        let spec = list[0].specificity();
752        assert_eq!(spec, Specificity::new(1, 2, 1));
753    }
754
755    #[test]
756    fn parse_empty_selector_is_error() {
757        assert!(parse_selector_list("").is_err());
758    }
759}