deft_simplecss/
selector.rs

1// Copyright 2019 the SimpleCSS Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use alloc::{vec, vec::Vec};
5use core::fmt;
6
7use log::warn;
8
9use crate::stream::Stream;
10use crate::Error;
11
12/// An attribute selector operator.
13#[derive(Clone, Copy, PartialEq, Debug)]
14pub enum AttributeOperator<'a> {
15    /// `[attr]`
16    Exists,
17    /// `[attr=value]`
18    Matches(&'a str),
19    /// `[attr~=value]`
20    Contains(&'a str),
21    /// `[attr|=value]`
22    StartsWith(&'a str),
23}
24
25impl AttributeOperator<'_> {
26    /// Checks that value is matching the operator.
27    pub fn matches(&self, value: &str) -> bool {
28        match *self {
29            AttributeOperator::Exists => true,
30            AttributeOperator::Matches(v) => value == v,
31            AttributeOperator::Contains(v) => value.split(' ').any(|s| s == v),
32            AttributeOperator::StartsWith(v) => {
33                // exactly `v` or beginning with `v` immediately followed by `-`
34                if value == v {
35                    true
36                } else if value.starts_with(v) {
37                    value.get(v.len()..v.len() + 1) == Some("-")
38                } else {
39                    false
40                }
41            }
42        }
43    }
44}
45
46/// A pseudo-class.
47#[derive(Clone, Copy, PartialEq, Debug)]
48#[allow(missing_docs)]
49pub enum PseudoClass<'a> {
50    FirstChild,
51    Link,
52    Visited,
53    Hover,
54    Active,
55    Focus,
56    Lang(&'a str),
57}
58
59impl fmt::Display for PseudoClass<'_> {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        match self {
62            PseudoClass::FirstChild => write!(f, "first-child"),
63            PseudoClass::Link => write!(f, "link"),
64            PseudoClass::Visited => write!(f, "visited"),
65            PseudoClass::Hover => write!(f, "hover"),
66            PseudoClass::Active => write!(f, "active"),
67            PseudoClass::Focus => write!(f, "focus"),
68            PseudoClass::Lang(lang) => write!(f, "lang({})", lang),
69        }
70    }
71}
72
73/// A trait to query an element node metadata.
74pub trait Element: Sized {
75    /// Returns a parent element.
76    fn parent_element(&self) -> Option<Self>;
77
78    /// Returns a previous sibling element.
79    fn prev_sibling_element(&self) -> Option<Self>;
80
81    /// Checks that the element has a specified local name.
82    fn has_local_name(&self, name: &str) -> bool;
83
84    /// Checks that the element has s specified class.
85    fn has_class(&self, name: &str) -> bool;
86
87    /// Checks that the element has a specified attribute.
88    fn attribute_matches(&self, local_name: &str, operator: AttributeOperator<'_>) -> bool;
89
90    /// Checks that the element matches a specified pseudo-class.
91    fn pseudo_class_matches(&self, class: PseudoClass<'_>) -> bool;
92
93    /// Checks that the element matches a specified pseudo-element.
94    fn pseudo_element_matches(&self, local_name: &str) -> bool;
95}
96
97#[derive(Clone, Copy, PartialEq, Debug)]
98enum SimpleSelectorType<'a> {
99    Type(&'a str),
100    Universal,
101}
102
103#[derive(Clone, Copy, PartialEq, Debug)]
104enum SubSelector<'a> {
105    Attribute(&'a str, AttributeOperator<'a>),
106    PseudoClass(PseudoClass<'a>),
107    PseudoElement(&'a str),
108    Class(&'a str),
109}
110
111#[derive(Clone, Debug)]
112struct SimpleSelector<'a> {
113    kind: SimpleSelectorType<'a>,
114    subselectors: Vec<SubSelector<'a>>,
115}
116
117#[derive(Clone, Copy, PartialEq, Debug)]
118enum Combinator {
119    None,
120    Descendant,
121    Child,
122    AdjacentSibling,
123}
124
125#[derive(Clone, Debug)]
126struct Component<'a> {
127    /// A combinator that precede the selector.
128    combinator: Combinator,
129    selector: SimpleSelector<'a>,
130}
131
132/// A selector.
133#[derive(Clone, Debug)]
134pub struct Selector<'a> {
135    source: &'a str,
136    components: Vec<Component<'a>>,
137}
138
139impl<'a> Selector<'a> {
140    /// Parses a selector from a string.
141    ///
142    /// Will log any errors as a warnings.
143    ///
144    /// Parsing will be stopped at EOF, `,` or `{`.
145    pub fn parse(text: &'a str) -> Option<Self> {
146        parse(text).0
147    }
148
149    /// Compute the selector's specificity.
150    ///
151    /// Cf. <https://www.w3.org/TR/selectors/#specificity>.
152    pub fn specificity(&self) -> [u8; 3] {
153        let mut spec = [0u8; 3];
154
155        for selector in self.components.iter().map(|c| &c.selector) {
156            if matches!(selector.kind, SimpleSelectorType::Type(_)) {
157                spec[2] = spec[2].saturating_add(1);
158            }
159
160            for sub in &selector.subselectors {
161                match sub {
162                    SubSelector::Attribute("id", _) => spec[0] = spec[0].saturating_add(1),
163                    _ => spec[1] = spec[1].saturating_add(1),
164                }
165            }
166        }
167
168        spec
169    }
170
171    pub fn source(&self) -> &'a str {
172        self.source
173    }
174
175    /// Checks that the provided element matches the current selector.
176    pub fn matches<E: Element>(&self, element: &E) -> bool {
177        assert!(!self.components.is_empty(), "selector must not be empty");
178        assert_eq!(
179            self.components[0].combinator,
180            Combinator::None,
181            "the first component must not have a combinator"
182        );
183
184        self.matches_impl(self.components.len() - 1, element)
185    }
186
187    fn matches_impl<E: Element>(&self, idx: usize, element: &E) -> bool {
188        let component = &self.components[idx];
189
190        if !match_selector(&component.selector, element) {
191            return false;
192        }
193
194        match component.combinator {
195            Combinator::Descendant => {
196                let mut parent = element.parent_element();
197                while let Some(e) = parent {
198                    if self.matches_impl(idx - 1, &e) {
199                        return true;
200                    }
201
202                    parent = e.parent_element();
203                }
204
205                false
206            }
207            Combinator::Child => {
208                if let Some(parent) = element.parent_element() {
209                    if self.matches_impl(idx - 1, &parent) {
210                        return true;
211                    }
212                }
213
214                false
215            }
216            Combinator::AdjacentSibling => {
217                if let Some(prev) = element.prev_sibling_element() {
218                    if self.matches_impl(idx - 1, &prev) {
219                        return true;
220                    }
221                }
222
223                false
224            }
225            Combinator::None => true,
226        }
227    }
228}
229
230fn match_selector<E: Element>(selector: &SimpleSelector<'_>, element: &E) -> bool {
231    if let SimpleSelectorType::Type(ident) = selector.kind {
232        if !element.has_local_name(ident) {
233            return false;
234        }
235    }
236
237    for sub in &selector.subselectors {
238        match sub {
239            SubSelector::Attribute(name, operator) => {
240                if !element.attribute_matches(name, *operator) {
241                    return false;
242                }
243            }
244            SubSelector::PseudoClass(class) => {
245                if !element.pseudo_class_matches(*class) {
246                    return false;
247                }
248            }
249            SubSelector::Class(name) => {
250                if !element.has_class(name) {
251                    return false;
252                }
253            }
254            SubSelector::PseudoElement(name) => {
255                if !element.pseudo_element_matches(*name) {
256                    return false;
257                }
258            }
259        }
260    }
261
262    true
263}
264
265pub(crate) fn parse(text: &str) -> (Option<Selector<'_>>, usize) {
266    let mut components: Vec<Component<'_>> = Vec::new();
267    let mut combinator = Combinator::None;
268
269    let mut tokenizer = SelectorTokenizer::from(text);
270    for token in &mut tokenizer {
271        let mut add_sub = |sub| {
272            if combinator == Combinator::None && !components.is_empty() {
273                if let Some(ref mut component) = components.last_mut() {
274                    component.selector.subselectors.push(sub);
275                }
276            } else {
277                components.push(Component {
278                    selector: SimpleSelector {
279                        kind: SimpleSelectorType::Universal,
280                        subselectors: vec![sub],
281                    },
282                    combinator,
283                });
284
285                combinator = Combinator::None;
286            }
287        };
288
289        let token = match token {
290            Ok(t) => t,
291            Err(e) => {
292                warn!("Selector parsing failed cause {}.", e);
293                return (None, tokenizer.stream.pos());
294            }
295        };
296
297        match token {
298            SelectorToken::UniversalSelector => {
299                components.push(Component {
300                    selector: SimpleSelector {
301                        kind: SimpleSelectorType::Universal,
302                        subselectors: Vec::new(),
303                    },
304                    combinator,
305                });
306
307                combinator = Combinator::None;
308            }
309            SelectorToken::TypeSelector(ident) => {
310                components.push(Component {
311                    selector: SimpleSelector {
312                        kind: SimpleSelectorType::Type(ident),
313                        subselectors: Vec::new(),
314                    },
315                    combinator,
316                });
317
318                combinator = Combinator::None;
319            }
320            SelectorToken::ClassSelector(ident) => {
321                add_sub(SubSelector::Class(ident));
322            }
323            SelectorToken::IdSelector(id) => {
324                add_sub(SubSelector::Attribute("id", AttributeOperator::Matches(id)));
325            }
326            SelectorToken::AttributeSelector(name, op) => {
327                add_sub(SubSelector::Attribute(name, op));
328            }
329            SelectorToken::PseudoClass(ident) => {
330                let class = match ident {
331                    "first-child" => PseudoClass::FirstChild,
332                    "link" => PseudoClass::Link,
333                    "visited" => PseudoClass::Visited,
334                    "hover" => PseudoClass::Hover,
335                    "active" => PseudoClass::Active,
336                    "focus" => PseudoClass::Focus,
337                    _ => {
338                        warn!("':{}' is not supported. Selector skipped.", ident);
339                        return (None, tokenizer.stream.pos());
340                    }
341                };
342
343                // TODO: duplicates
344                // TODO: order
345
346                add_sub(SubSelector::PseudoClass(class));
347            }
348            SelectorToken::PseudoElement(ident) => {
349                add_sub(SubSelector::PseudoElement(ident))
350            }
351            SelectorToken::LangPseudoClass(lang) => {
352                add_sub(SubSelector::PseudoClass(PseudoClass::Lang(lang)));
353            }
354            SelectorToken::DescendantCombinator => {
355                combinator = Combinator::Descendant;
356            }
357            SelectorToken::ChildCombinator => {
358                combinator = Combinator::Child;
359            }
360            SelectorToken::AdjacentCombinator => {
361                combinator = Combinator::AdjacentSibling;
362            }
363        }
364    }
365
366    if components.is_empty() {
367        (None, tokenizer.stream.pos())
368    } else if components[0].combinator != Combinator::None {
369        debug_assert_eq!(
370            components[0].combinator,
371            Combinator::None,
372            "the first component must not have a combinator"
373        );
374
375        (None, tokenizer.stream.pos())
376    } else {
377        let source = &text[0..tokenizer.stream.pos()];
378        (Some(Selector { source, components }), tokenizer.stream.pos())
379    }
380}
381
382impl fmt::Display for Selector<'_> {
383    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
384        for component in &self.components {
385            match component.combinator {
386                Combinator::Descendant => write!(f, " ")?,
387                Combinator::Child => write!(f, " > ")?,
388                Combinator::AdjacentSibling => write!(f, " + ")?,
389                Combinator::None => {}
390            }
391
392            match component.selector.kind {
393                SimpleSelectorType::Universal => write!(f, "*")?,
394                SimpleSelectorType::Type(ident) => write!(f, "{}", ident)?,
395            };
396
397            for sel in &component.selector.subselectors {
398                match sel {
399                    SubSelector::Attribute(name, operator) => {
400                        match operator {
401                            AttributeOperator::Exists => {
402                                write!(f, "[{}]", name)?;
403                            }
404                            AttributeOperator::Matches(value) => {
405                                write!(f, "[{}='{}']", name, value)?;
406                            }
407                            AttributeOperator::Contains(value) => {
408                                write!(f, "[{}~='{}']", name, value)?;
409                            }
410                            AttributeOperator::StartsWith(value) => {
411                                write!(f, "[{}|='{}']", name, value)?;
412                            }
413                        };
414                    }
415                    SubSelector::PseudoClass(class) => write!(f, ":{}", class)?,
416                    SubSelector::Class(class) => write!(f, ".{}", class)?,
417                    SubSelector::PseudoElement(pseudo_element) => write!(f, "::{}", pseudo_element)?,
418                }
419            }
420        }
421
422        Ok(())
423    }
424}
425
426/// A selector token.
427#[derive(Clone, Copy, PartialEq, Debug)]
428pub enum SelectorToken<'a> {
429    /// `*`
430    UniversalSelector,
431
432    /// `div`
433    TypeSelector(&'a str),
434
435    /// `.class`
436    ClassSelector(&'a str),
437
438    /// `#id`
439    IdSelector(&'a str),
440
441    /// `[color=red]`
442    AttributeSelector(&'a str, AttributeOperator<'a>),
443
444    /// `:first-child`
445    PseudoClass(&'a str),
446
447    /// `::before`
448    PseudoElement(&'a str),
449
450    /// `:lang(en)`
451    LangPseudoClass(&'a str),
452
453    /// `a b`
454    DescendantCombinator,
455
456    /// `a > b`
457    ChildCombinator,
458
459    /// `a + b`
460    AdjacentCombinator,
461}
462
463/// A selector tokenizer.
464///
465/// # Example
466///
467/// ```
468/// use simplecss::{SelectorTokenizer, SelectorToken};
469///
470/// let mut t = SelectorTokenizer::from("div > p:first-child");
471/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::TypeSelector("div"));
472/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::ChildCombinator);
473/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::TypeSelector("p"));
474/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::PseudoClass("first-child"));
475/// assert!(t.next().is_none());
476/// ```
477pub struct SelectorTokenizer<'a> {
478    stream: Stream<'a>,
479    after_combinator: bool,
480    finished: bool,
481}
482
483impl<'a> From<&'a str> for SelectorTokenizer<'a> {
484    fn from(text: &'a str) -> Self {
485        SelectorTokenizer {
486            stream: Stream::from(text),
487            after_combinator: true,
488            finished: false,
489        }
490    }
491}
492
493impl<'a> Iterator for SelectorTokenizer<'a> {
494    type Item = Result<SelectorToken<'a>, Error>;
495
496    fn next(&mut self) -> Option<Self::Item> {
497        if self.finished || self.stream.at_end() {
498            if self.after_combinator {
499                self.after_combinator = false;
500                return Some(Err(Error::SelectorMissing));
501            }
502
503            return None;
504        }
505
506        macro_rules! try2 {
507            ($e:expr) => {
508                match $e {
509                    Ok(v) => v,
510                    Err(e) => {
511                        self.finished = true;
512                        return Some(Err(e));
513                    }
514                }
515            };
516        }
517
518        match self.stream.curr_byte_unchecked() {
519            b'*' => {
520                if !self.after_combinator {
521                    self.finished = true;
522                    return Some(Err(Error::UnexpectedSelector));
523                }
524
525                self.after_combinator = false;
526                self.stream.advance(1);
527                Some(Ok(SelectorToken::UniversalSelector))
528            }
529            b'#' => {
530                self.after_combinator = false;
531                self.stream.advance(1);
532                let ident = try2!(self.stream.consume_ident());
533                Some(Ok(SelectorToken::IdSelector(ident)))
534            }
535            b'.' => {
536                self.after_combinator = false;
537                self.stream.advance(1);
538                let ident = try2!(self.stream.consume_ident());
539                Some(Ok(SelectorToken::ClassSelector(ident)))
540            }
541            b'[' => {
542                self.after_combinator = false;
543                self.stream.advance(1);
544                let ident = try2!(self.stream.consume_ident());
545
546                let op = match try2!(self.stream.curr_byte()) {
547                    b']' => AttributeOperator::Exists,
548                    b'=' => {
549                        self.stream.advance(1);
550                        let value = try2!(self.stream.consume_string());
551                        AttributeOperator::Matches(value)
552                    }
553                    b'~' => {
554                        self.stream.advance(1);
555                        try2!(self.stream.consume_byte(b'='));
556                        let value = try2!(self.stream.consume_string());
557                        AttributeOperator::Contains(value)
558                    }
559                    b'|' => {
560                        self.stream.advance(1);
561                        try2!(self.stream.consume_byte(b'='));
562                        let value = try2!(self.stream.consume_string());
563                        AttributeOperator::StartsWith(value)
564                    }
565                    _ => {
566                        self.finished = true;
567                        return Some(Err(Error::InvalidAttributeSelector));
568                    }
569                };
570
571                try2!(self.stream.consume_byte(b']'));
572
573                Some(Ok(SelectorToken::AttributeSelector(ident, op)))
574            }
575            b':' => {
576                self.after_combinator = false;
577                self.stream.advance(1);
578                let is_pseudo_element = Ok(b':') == self.stream.curr_byte();
579                if is_pseudo_element {
580                    self.stream.advance(1);
581                }
582                let ident = try2!(self.stream.consume_ident());
583
584                if ident == "lang" && !is_pseudo_element {
585                    try2!(self.stream.consume_byte(b'('));
586                    let lang = self.stream.consume_bytes(|c| c != b')').trim();
587                    try2!(self.stream.consume_byte(b')'));
588
589                    if lang.is_empty() {
590                        self.finished = true;
591                        return Some(Err(Error::InvalidLanguagePseudoClass));
592                    }
593
594                    Some(Ok(SelectorToken::LangPseudoClass(lang)))
595                } else if is_pseudo_element {
596                    Some(Ok(SelectorToken::PseudoElement(ident)))
597                } else {
598                    Some(Ok(SelectorToken::PseudoClass(ident)))
599                }
600            }
601            b'>' => {
602                if self.after_combinator {
603                    self.after_combinator = false;
604                    self.finished = true;
605                    return Some(Err(Error::UnexpectedCombinator));
606                }
607
608                self.stream.advance(1);
609                self.after_combinator = true;
610                Some(Ok(SelectorToken::ChildCombinator))
611            }
612            b'+' => {
613                if self.after_combinator {
614                    self.after_combinator = false;
615                    self.finished = true;
616                    return Some(Err(Error::UnexpectedCombinator));
617                }
618
619                self.stream.advance(1);
620                self.after_combinator = true;
621                Some(Ok(SelectorToken::AdjacentCombinator))
622            }
623            b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
624                self.stream.skip_spaces();
625
626                if self.after_combinator {
627                    return self.next();
628                }
629
630                while self.stream.curr_byte() == Ok(b'/') {
631                    try2!(self.stream.skip_comment());
632                    self.stream.skip_spaces();
633                }
634
635                match self.stream.curr_byte() {
636                    Ok(b'>') | Ok(b'+') | Ok(b',') | Ok(b'{') | Err(_) => self.next(),
637                    _ => {
638                        if self.after_combinator {
639                            self.after_combinator = false;
640                            self.finished = true;
641                            return Some(Err(Error::UnexpectedSelector));
642                        }
643
644                        self.after_combinator = true;
645                        Some(Ok(SelectorToken::DescendantCombinator))
646                    }
647                }
648            }
649            b'/' => {
650                if self.stream.next_byte() == Ok(b'*') {
651                    try2!(self.stream.skip_comment());
652                } else {
653                    self.finished = true;
654                }
655
656                self.next()
657            }
658            b',' | b'{' => {
659                self.finished = true;
660                self.next()
661            }
662            _ => {
663                let ident = try2!(self.stream.consume_ident());
664
665                if !self.after_combinator {
666                    self.finished = true;
667                    return Some(Err(Error::UnexpectedSelector));
668                }
669
670                self.after_combinator = false;
671                Some(Ok(SelectorToken::TypeSelector(ident)))
672            }
673        }
674    }
675}