deft_simplecss/
selector.rs

1// Copyright 2019 the SimpleCSS Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use alloc::{vec, vec::Vec};
5use core::fmt;
6
7use log::warn;
8
9use crate::stream::Stream;
10use crate::Error;
11
12/// An attribute selector operator.
13#[derive(Clone, Copy, PartialEq, Debug)]
14pub enum AttributeOperator<'a> {
15    /// `[attr]`
16    Exists,
17    /// `[attr=value]`
18    Matches(&'a str),
19    /// `[attr~=value]`
20    Contains(&'a str),
21    /// `[attr|=value]`
22    StartsWith(&'a str),
23}
24
25impl AttributeOperator<'_> {
26    /// Checks that value is matching the operator.
27    pub fn matches(&self, value: &str) -> bool {
28        match *self {
29            AttributeOperator::Exists => true,
30            AttributeOperator::Matches(v) => value == v,
31            AttributeOperator::Contains(v) => value.split(' ').any(|s| s == v),
32            AttributeOperator::StartsWith(v) => {
33                // exactly `v` or beginning with `v` immediately followed by `-`
34                if value == v {
35                    true
36                } else if value.starts_with(v) {
37                    value.get(v.len()..v.len() + 1) == Some("-")
38                } else {
39                    false
40                }
41            }
42        }
43    }
44}
45
46/// A pseudo-class.
47#[derive(Clone, Copy, PartialEq, Debug)]
48#[allow(missing_docs)]
49pub enum PseudoClass<'a> {
50    FirstChild,
51    Link,
52    Visited,
53    Hover,
54    Active,
55    Focus,
56    Lang(&'a str),
57}
58
59impl fmt::Display for PseudoClass<'_> {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        match self {
62            PseudoClass::FirstChild => write!(f, "first-child"),
63            PseudoClass::Link => write!(f, "link"),
64            PseudoClass::Visited => write!(f, "visited"),
65            PseudoClass::Hover => write!(f, "hover"),
66            PseudoClass::Active => write!(f, "active"),
67            PseudoClass::Focus => write!(f, "focus"),
68            PseudoClass::Lang(lang) => write!(f, "lang({})", lang),
69        }
70    }
71}
72
73/// A trait to query an element node metadata.
74pub trait Element: Sized {
75    /// Returns a parent element.
76    fn parent_element(&self) -> Option<Self>;
77
78    /// Returns a previous sibling element.
79    fn prev_sibling_element(&self) -> Option<Self>;
80
81    /// Checks that the element has a specified local name.
82    fn has_local_name(&self, name: &str) -> bool;
83
84    /// Checks that the element has s specified class.
85    fn has_class(&self, name: &str) -> bool;
86
87    /// Checks that the element has a specified attribute.
88    fn attribute_matches(&self, local_name: &str, operator: AttributeOperator<'_>) -> bool;
89
90    /// Checks that the element matches a specified pseudo-class.
91    fn pseudo_class_matches(&self, class: PseudoClass<'_>) -> bool;
92}
93
94#[derive(Clone, Copy, PartialEq, Debug)]
95enum SimpleSelectorType<'a> {
96    Type(&'a str),
97    Universal,
98}
99
100#[derive(Clone, Copy, PartialEq, Debug)]
101enum SubSelector<'a> {
102    Attribute(&'a str, AttributeOperator<'a>),
103    PseudoClass(PseudoClass<'a>),
104    Class(&'a str),
105}
106
107#[derive(Clone, Debug)]
108struct SimpleSelector<'a> {
109    kind: SimpleSelectorType<'a>,
110    subselectors: Vec<SubSelector<'a>>,
111}
112
113#[derive(Clone, Copy, PartialEq, Debug)]
114enum Combinator {
115    None,
116    Descendant,
117    Child,
118    AdjacentSibling,
119}
120
121#[derive(Clone, Debug)]
122struct Component<'a> {
123    /// A combinator that precede the selector.
124    combinator: Combinator,
125    selector: SimpleSelector<'a>,
126}
127
128/// A selector.
129#[derive(Clone, Debug)]
130pub struct Selector<'a> {
131    components: Vec<Component<'a>>,
132}
133
134impl<'a> Selector<'a> {
135    /// Parses a selector from a string.
136    ///
137    /// Will log any errors as a warnings.
138    ///
139    /// Parsing will be stopped at EOF, `,` or `{`.
140    pub fn parse(text: &'a str) -> Option<Self> {
141        parse(text).0
142    }
143
144    /// Compute the selector's specificity.
145    ///
146    /// Cf. <https://www.w3.org/TR/selectors/#specificity>.
147    pub fn specificity(&self) -> [u8; 3] {
148        let mut spec = [0u8; 3];
149
150        for selector in self.components.iter().map(|c| &c.selector) {
151            if matches!(selector.kind, SimpleSelectorType::Type(_)) {
152                spec[2] = spec[2].saturating_add(1);
153            }
154
155            for sub in &selector.subselectors {
156                match sub {
157                    SubSelector::Attribute("id", _) => spec[0] = spec[0].saturating_add(1),
158                    _ => spec[1] = spec[1].saturating_add(1),
159                }
160            }
161        }
162
163        spec
164    }
165
166    /// Checks that the provided element matches the current selector.
167    pub fn matches<E: Element>(&self, element: &E) -> bool {
168        assert!(!self.components.is_empty(), "selector must not be empty");
169        assert_eq!(
170            self.components[0].combinator,
171            Combinator::None,
172            "the first component must not have a combinator"
173        );
174
175        self.matches_impl(self.components.len() - 1, element)
176    }
177
178    fn matches_impl<E: Element>(&self, idx: usize, element: &E) -> bool {
179        let component = &self.components[idx];
180
181        if !match_selector(&component.selector, element) {
182            return false;
183        }
184
185        match component.combinator {
186            Combinator::Descendant => {
187                let mut parent = element.parent_element();
188                while let Some(e) = parent {
189                    if self.matches_impl(idx - 1, &e) {
190                        return true;
191                    }
192
193                    parent = e.parent_element();
194                }
195
196                false
197            }
198            Combinator::Child => {
199                if let Some(parent) = element.parent_element() {
200                    if self.matches_impl(idx - 1, &parent) {
201                        return true;
202                    }
203                }
204
205                false
206            }
207            Combinator::AdjacentSibling => {
208                if let Some(prev) = element.prev_sibling_element() {
209                    if self.matches_impl(idx - 1, &prev) {
210                        return true;
211                    }
212                }
213
214                false
215            }
216            Combinator::None => true,
217        }
218    }
219}
220
221fn match_selector<E: Element>(selector: &SimpleSelector<'_>, element: &E) -> bool {
222    if let SimpleSelectorType::Type(ident) = selector.kind {
223        if !element.has_local_name(ident) {
224            return false;
225        }
226    }
227
228    for sub in &selector.subselectors {
229        match sub {
230            SubSelector::Attribute(name, operator) => {
231                if !element.attribute_matches(name, *operator) {
232                    return false;
233                }
234            }
235            SubSelector::PseudoClass(class) => {
236                if !element.pseudo_class_matches(*class) {
237                    return false;
238                }
239            }
240            SubSelector::Class(name) => {
241                if !element.has_class(name) {
242                    return false;
243                }
244            }
245        }
246    }
247
248    true
249}
250
251pub(crate) fn parse(text: &str) -> (Option<Selector<'_>>, usize) {
252    let mut components: Vec<Component<'_>> = Vec::new();
253    let mut combinator = Combinator::None;
254
255    let mut tokenizer = SelectorTokenizer::from(text);
256    for token in &mut tokenizer {
257        let mut add_sub = |sub| {
258            if combinator == Combinator::None && !components.is_empty() {
259                if let Some(ref mut component) = components.last_mut() {
260                    component.selector.subselectors.push(sub);
261                }
262            } else {
263                components.push(Component {
264                    selector: SimpleSelector {
265                        kind: SimpleSelectorType::Universal,
266                        subselectors: vec![sub],
267                    },
268                    combinator,
269                });
270
271                combinator = Combinator::None;
272            }
273        };
274
275        let token = match token {
276            Ok(t) => t,
277            Err(e) => {
278                warn!("Selector parsing failed cause {}.", e);
279                return (None, tokenizer.stream.pos());
280            }
281        };
282
283        match token {
284            SelectorToken::UniversalSelector => {
285                components.push(Component {
286                    selector: SimpleSelector {
287                        kind: SimpleSelectorType::Universal,
288                        subselectors: Vec::new(),
289                    },
290                    combinator,
291                });
292
293                combinator = Combinator::None;
294            }
295            SelectorToken::TypeSelector(ident) => {
296                components.push(Component {
297                    selector: SimpleSelector {
298                        kind: SimpleSelectorType::Type(ident),
299                        subselectors: Vec::new(),
300                    },
301                    combinator,
302                });
303
304                combinator = Combinator::None;
305            }
306            SelectorToken::ClassSelector(ident) => {
307                add_sub(SubSelector::Class(ident));
308            }
309            SelectorToken::IdSelector(id) => {
310                add_sub(SubSelector::Attribute("id", AttributeOperator::Matches(id)));
311            }
312            SelectorToken::AttributeSelector(name, op) => {
313                add_sub(SubSelector::Attribute(name, op));
314            }
315            SelectorToken::PseudoClass(ident) => {
316                let class = match ident {
317                    "first-child" => PseudoClass::FirstChild,
318                    "link" => PseudoClass::Link,
319                    "visited" => PseudoClass::Visited,
320                    "hover" => PseudoClass::Hover,
321                    "active" => PseudoClass::Active,
322                    "focus" => PseudoClass::Focus,
323                    _ => {
324                        warn!("':{}' is not supported. Selector skipped.", ident);
325                        return (None, tokenizer.stream.pos());
326                    }
327                };
328
329                // TODO: duplicates
330                // TODO: order
331
332                add_sub(SubSelector::PseudoClass(class));
333            }
334            SelectorToken::LangPseudoClass(lang) => {
335                add_sub(SubSelector::PseudoClass(PseudoClass::Lang(lang)));
336            }
337            SelectorToken::DescendantCombinator => {
338                combinator = Combinator::Descendant;
339            }
340            SelectorToken::ChildCombinator => {
341                combinator = Combinator::Child;
342            }
343            SelectorToken::AdjacentCombinator => {
344                combinator = Combinator::AdjacentSibling;
345            }
346        }
347    }
348
349    if components.is_empty() {
350        (None, tokenizer.stream.pos())
351    } else if components[0].combinator != Combinator::None {
352        debug_assert_eq!(
353            components[0].combinator,
354            Combinator::None,
355            "the first component must not have a combinator"
356        );
357
358        (None, tokenizer.stream.pos())
359    } else {
360        (Some(Selector { components }), tokenizer.stream.pos())
361    }
362}
363
364impl fmt::Display for Selector<'_> {
365    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
366        for component in &self.components {
367            match component.combinator {
368                Combinator::Descendant => write!(f, " ")?,
369                Combinator::Child => write!(f, " > ")?,
370                Combinator::AdjacentSibling => write!(f, " + ")?,
371                Combinator::None => {}
372            }
373
374            match component.selector.kind {
375                SimpleSelectorType::Universal => write!(f, "*")?,
376                SimpleSelectorType::Type(ident) => write!(f, "{}", ident)?,
377            };
378
379            for sel in &component.selector.subselectors {
380                match sel {
381                    SubSelector::Attribute(name, operator) => {
382                        match operator {
383                            AttributeOperator::Exists => {
384                                write!(f, "[{}]", name)?;
385                            }
386                            AttributeOperator::Matches(value) => {
387                                write!(f, "[{}='{}']", name, value)?;
388                            }
389                            AttributeOperator::Contains(value) => {
390                                write!(f, "[{}~='{}']", name, value)?;
391                            }
392                            AttributeOperator::StartsWith(value) => {
393                                write!(f, "[{}|='{}']", name, value)?;
394                            }
395                        };
396                    }
397                    SubSelector::PseudoClass(class) => write!(f, ":{}", class)?,
398                    SubSelector::Class(class) => write!(f, ".{}", class)?,
399                }
400            }
401        }
402
403        Ok(())
404    }
405}
406
407/// A selector token.
408#[derive(Clone, Copy, PartialEq, Debug)]
409pub enum SelectorToken<'a> {
410    /// `*`
411    UniversalSelector,
412
413    /// `div`
414    TypeSelector(&'a str),
415
416    /// `.class`
417    ClassSelector(&'a str),
418
419    /// `#id`
420    IdSelector(&'a str),
421
422    /// `[color=red]`
423    AttributeSelector(&'a str, AttributeOperator<'a>),
424
425    /// `:first-child`
426    PseudoClass(&'a str),
427
428    /// `:lang(en)`
429    LangPseudoClass(&'a str),
430
431    /// `a b`
432    DescendantCombinator,
433
434    /// `a > b`
435    ChildCombinator,
436
437    /// `a + b`
438    AdjacentCombinator,
439}
440
441/// A selector tokenizer.
442///
443/// # Example
444///
445/// ```
446/// use simplecss::{SelectorTokenizer, SelectorToken};
447///
448/// let mut t = SelectorTokenizer::from("div > p:first-child");
449/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::TypeSelector("div"));
450/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::ChildCombinator);
451/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::TypeSelector("p"));
452/// assert_eq!(t.next().unwrap().unwrap(), SelectorToken::PseudoClass("first-child"));
453/// assert!(t.next().is_none());
454/// ```
455pub struct SelectorTokenizer<'a> {
456    stream: Stream<'a>,
457    after_combinator: bool,
458    finished: bool,
459}
460
461impl<'a> From<&'a str> for SelectorTokenizer<'a> {
462    fn from(text: &'a str) -> Self {
463        SelectorTokenizer {
464            stream: Stream::from(text),
465            after_combinator: true,
466            finished: false,
467        }
468    }
469}
470
471impl<'a> Iterator for SelectorTokenizer<'a> {
472    type Item = Result<SelectorToken<'a>, Error>;
473
474    fn next(&mut self) -> Option<Self::Item> {
475        if self.finished || self.stream.at_end() {
476            if self.after_combinator {
477                self.after_combinator = false;
478                return Some(Err(Error::SelectorMissing));
479            }
480
481            return None;
482        }
483
484        macro_rules! try2 {
485            ($e:expr) => {
486                match $e {
487                    Ok(v) => v,
488                    Err(e) => {
489                        self.finished = true;
490                        return Some(Err(e));
491                    }
492                }
493            };
494        }
495
496        match self.stream.curr_byte_unchecked() {
497            b'*' => {
498                if !self.after_combinator {
499                    self.finished = true;
500                    return Some(Err(Error::UnexpectedSelector));
501                }
502
503                self.after_combinator = false;
504                self.stream.advance(1);
505                Some(Ok(SelectorToken::UniversalSelector))
506            }
507            b'#' => {
508                self.after_combinator = false;
509                self.stream.advance(1);
510                let ident = try2!(self.stream.consume_ident());
511                Some(Ok(SelectorToken::IdSelector(ident)))
512            }
513            b'.' => {
514                self.after_combinator = false;
515                self.stream.advance(1);
516                let ident = try2!(self.stream.consume_ident());
517                Some(Ok(SelectorToken::ClassSelector(ident)))
518            }
519            b'[' => {
520                self.after_combinator = false;
521                self.stream.advance(1);
522                let ident = try2!(self.stream.consume_ident());
523
524                let op = match try2!(self.stream.curr_byte()) {
525                    b']' => AttributeOperator::Exists,
526                    b'=' => {
527                        self.stream.advance(1);
528                        let value = try2!(self.stream.consume_string());
529                        AttributeOperator::Matches(value)
530                    }
531                    b'~' => {
532                        self.stream.advance(1);
533                        try2!(self.stream.consume_byte(b'='));
534                        let value = try2!(self.stream.consume_string());
535                        AttributeOperator::Contains(value)
536                    }
537                    b'|' => {
538                        self.stream.advance(1);
539                        try2!(self.stream.consume_byte(b'='));
540                        let value = try2!(self.stream.consume_string());
541                        AttributeOperator::StartsWith(value)
542                    }
543                    _ => {
544                        self.finished = true;
545                        return Some(Err(Error::InvalidAttributeSelector));
546                    }
547                };
548
549                try2!(self.stream.consume_byte(b']'));
550
551                Some(Ok(SelectorToken::AttributeSelector(ident, op)))
552            }
553            b':' => {
554                self.after_combinator = false;
555                self.stream.advance(1);
556                let ident = try2!(self.stream.consume_ident());
557
558                if ident == "lang" {
559                    try2!(self.stream.consume_byte(b'('));
560                    let lang = self.stream.consume_bytes(|c| c != b')').trim();
561                    try2!(self.stream.consume_byte(b')'));
562
563                    if lang.is_empty() {
564                        self.finished = true;
565                        return Some(Err(Error::InvalidLanguagePseudoClass));
566                    }
567
568                    Some(Ok(SelectorToken::LangPseudoClass(lang)))
569                } else {
570                    Some(Ok(SelectorToken::PseudoClass(ident)))
571                }
572            }
573            b'>' => {
574                if self.after_combinator {
575                    self.after_combinator = false;
576                    self.finished = true;
577                    return Some(Err(Error::UnexpectedCombinator));
578                }
579
580                self.stream.advance(1);
581                self.after_combinator = true;
582                Some(Ok(SelectorToken::ChildCombinator))
583            }
584            b'+' => {
585                if self.after_combinator {
586                    self.after_combinator = false;
587                    self.finished = true;
588                    return Some(Err(Error::UnexpectedCombinator));
589                }
590
591                self.stream.advance(1);
592                self.after_combinator = true;
593                Some(Ok(SelectorToken::AdjacentCombinator))
594            }
595            b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
596                self.stream.skip_spaces();
597
598                if self.after_combinator {
599                    return self.next();
600                }
601
602                while self.stream.curr_byte() == Ok(b'/') {
603                    try2!(self.stream.skip_comment());
604                    self.stream.skip_spaces();
605                }
606
607                match self.stream.curr_byte() {
608                    Ok(b'>') | Ok(b'+') | Ok(b',') | Ok(b'{') | Err(_) => self.next(),
609                    _ => {
610                        if self.after_combinator {
611                            self.after_combinator = false;
612                            self.finished = true;
613                            return Some(Err(Error::UnexpectedSelector));
614                        }
615
616                        self.after_combinator = true;
617                        Some(Ok(SelectorToken::DescendantCombinator))
618                    }
619                }
620            }
621            b'/' => {
622                if self.stream.next_byte() == Ok(b'*') {
623                    try2!(self.stream.skip_comment());
624                } else {
625                    self.finished = true;
626                }
627
628                self.next()
629            }
630            b',' | b'{' => {
631                self.finished = true;
632                self.next()
633            }
634            _ => {
635                let ident = try2!(self.stream.consume_ident());
636
637                if !self.after_combinator {
638                    self.finished = true;
639                    return Some(Err(Error::UnexpectedSelector));
640                }
641
642                self.after_combinator = false;
643                Some(Ok(SelectorToken::TypeSelector(ident)))
644            }
645        }
646    }
647}