1use alloc::{vec, vec::Vec};
5use core::fmt;
6
7use log::warn;
8
9use crate::stream::Stream;
10use crate::Error;
11
12#[derive(Clone, Copy, PartialEq, Debug)]
14pub enum AttributeOperator<'a> {
15 Exists,
17 Matches(&'a str),
19 Contains(&'a str),
21 StartsWith(&'a str),
23}
24
25impl AttributeOperator<'_> {
26 pub fn matches(&self, value: &str) -> bool {
28 match *self {
29 AttributeOperator::Exists => true,
30 AttributeOperator::Matches(v) => value == v,
31 AttributeOperator::Contains(v) => value.split(' ').any(|s| s == v),
32 AttributeOperator::StartsWith(v) => {
33 if value == v {
35 true
36 } else if value.starts_with(v) {
37 value.get(v.len()..v.len() + 1) == Some("-")
38 } else {
39 false
40 }
41 }
42 }
43 }
44}
45
46#[derive(Clone, Copy, PartialEq, Debug)]
48#[allow(missing_docs)]
49pub enum PseudoClass<'a> {
50 FirstChild,
51 Link,
52 Visited,
53 Hover,
54 Active,
55 Focus,
56 Lang(&'a str),
57}
58
59impl fmt::Display for PseudoClass<'_> {
60 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61 match self {
62 PseudoClass::FirstChild => write!(f, "first-child"),
63 PseudoClass::Link => write!(f, "link"),
64 PseudoClass::Visited => write!(f, "visited"),
65 PseudoClass::Hover => write!(f, "hover"),
66 PseudoClass::Active => write!(f, "active"),
67 PseudoClass::Focus => write!(f, "focus"),
68 PseudoClass::Lang(lang) => write!(f, "lang({})", lang),
69 }
70 }
71}
72
73pub trait Element: Sized {
75 fn parent_element(&self) -> Option<Self>;
77
78 fn prev_sibling_element(&self) -> Option<Self>;
80
81 fn has_local_name(&self, name: &str) -> bool;
83
84 fn has_class(&self, name: &str) -> bool;
86
87 fn attribute_matches(&self, local_name: &str, operator: AttributeOperator<'_>) -> bool;
89
90 fn pseudo_class_matches(&self, class: PseudoClass<'_>) -> bool;
92
93 fn pseudo_element_matches(&self, local_name: &str) -> bool;
95}
96
97#[derive(Clone, Copy, PartialEq, Debug)]
98enum SimpleSelectorType<'a> {
99 Type(&'a str),
100 Universal,
101}
102
103#[derive(Clone, Copy, PartialEq, Debug)]
104enum SubSelector<'a> {
105 Attribute(&'a str, AttributeOperator<'a>),
106 PseudoClass(PseudoClass<'a>),
107 PseudoElement(&'a str),
108 Class(&'a str),
109}
110
111#[derive(Clone, Debug)]
112struct SimpleSelector<'a> {
113 kind: SimpleSelectorType<'a>,
114 subselectors: Vec<SubSelector<'a>>,
115}
116
117#[derive(Clone, Copy, PartialEq, Debug)]
118enum Combinator {
119 None,
120 Descendant,
121 Child,
122 AdjacentSibling,
123}
124
125#[derive(Clone, Debug)]
126struct Component<'a> {
127 combinator: Combinator,
129 selector: SimpleSelector<'a>,
130}
131
132#[derive(Clone, Debug)]
134pub struct Selector<'a> {
135 source: &'a str,
136 components: Vec<Component<'a>>,
137}
138
139impl<'a> Selector<'a> {
140 pub fn parse(text: &'a str) -> Option<Self> {
146 parse(text).0
147 }
148
149 pub fn specificity(&self) -> [u8; 3] {
153 let mut spec = [0u8; 3];
154
155 for selector in self.components.iter().map(|c| &c.selector) {
156 if matches!(selector.kind, SimpleSelectorType::Type(_)) {
157 spec[2] = spec[2].saturating_add(1);
158 }
159
160 for sub in &selector.subselectors {
161 match sub {
162 SubSelector::Attribute("id", _) => spec[0] = spec[0].saturating_add(1),
163 _ => spec[1] = spec[1].saturating_add(1),
164 }
165 }
166 }
167
168 spec
169 }
170
171 pub fn source(&self) -> &'a str {
172 self.source
173 }
174
175 pub fn matches<E: Element>(&self, element: &E) -> bool {
177 assert!(!self.components.is_empty(), "selector must not be empty");
178 assert_eq!(
179 self.components[0].combinator,
180 Combinator::None,
181 "the first component must not have a combinator"
182 );
183
184 self.matches_impl(self.components.len() - 1, element)
185 }
186
187 fn matches_impl<E: Element>(&self, idx: usize, element: &E) -> bool {
188 let component = &self.components[idx];
189
190 if !match_selector(&component.selector, element) {
191 return false;
192 }
193
194 match component.combinator {
195 Combinator::Descendant => {
196 let mut parent = element.parent_element();
197 while let Some(e) = parent {
198 if self.matches_impl(idx - 1, &e) {
199 return true;
200 }
201
202 parent = e.parent_element();
203 }
204
205 false
206 }
207 Combinator::Child => {
208 if let Some(parent) = element.parent_element() {
209 if self.matches_impl(idx - 1, &parent) {
210 return true;
211 }
212 }
213
214 false
215 }
216 Combinator::AdjacentSibling => {
217 if let Some(prev) = element.prev_sibling_element() {
218 if self.matches_impl(idx - 1, &prev) {
219 return true;
220 }
221 }
222
223 false
224 }
225 Combinator::None => true,
226 }
227 }
228}
229
230fn match_selector<E: Element>(selector: &SimpleSelector<'_>, element: &E) -> bool {
231 if let SimpleSelectorType::Type(ident) = selector.kind {
232 if !element.has_local_name(ident) {
233 return false;
234 }
235 }
236
237 for sub in &selector.subselectors {
238 match sub {
239 SubSelector::Attribute(name, operator) => {
240 if !element.attribute_matches(name, *operator) {
241 return false;
242 }
243 }
244 SubSelector::PseudoClass(class) => {
245 if !element.pseudo_class_matches(*class) {
246 return false;
247 }
248 }
249 SubSelector::Class(name) => {
250 if !element.has_class(name) {
251 return false;
252 }
253 }
254 SubSelector::PseudoElement(name) => {
255 if !element.pseudo_element_matches(*name) {
256 return false;
257 }
258 }
259 }
260 }
261
262 true
263}
264
265pub(crate) fn parse(text: &str) -> (Option<Selector<'_>>, usize) {
266 let mut components: Vec<Component<'_>> = Vec::new();
267 let mut combinator = Combinator::None;
268
269 let mut tokenizer = SelectorTokenizer::from(text);
270 for token in &mut tokenizer {
271 let mut add_sub = |sub| {
272 if combinator == Combinator::None && !components.is_empty() {
273 if let Some(ref mut component) = components.last_mut() {
274 component.selector.subselectors.push(sub);
275 }
276 } else {
277 components.push(Component {
278 selector: SimpleSelector {
279 kind: SimpleSelectorType::Universal,
280 subselectors: vec![sub],
281 },
282 combinator,
283 });
284
285 combinator = Combinator::None;
286 }
287 };
288
289 let token = match token {
290 Ok(t) => t,
291 Err(e) => {
292 warn!("Selector parsing failed cause {}.", e);
293 return (None, tokenizer.stream.pos());
294 }
295 };
296
297 match token {
298 SelectorToken::UniversalSelector => {
299 components.push(Component {
300 selector: SimpleSelector {
301 kind: SimpleSelectorType::Universal,
302 subselectors: Vec::new(),
303 },
304 combinator,
305 });
306
307 combinator = Combinator::None;
308 }
309 SelectorToken::TypeSelector(ident) => {
310 components.push(Component {
311 selector: SimpleSelector {
312 kind: SimpleSelectorType::Type(ident),
313 subselectors: Vec::new(),
314 },
315 combinator,
316 });
317
318 combinator = Combinator::None;
319 }
320 SelectorToken::ClassSelector(ident) => {
321 add_sub(SubSelector::Class(ident));
322 }
323 SelectorToken::IdSelector(id) => {
324 add_sub(SubSelector::Attribute("id", AttributeOperator::Matches(id)));
325 }
326 SelectorToken::AttributeSelector(name, op) => {
327 add_sub(SubSelector::Attribute(name, op));
328 }
329 SelectorToken::PseudoClass(ident) => {
330 let class = match ident {
331 "first-child" => PseudoClass::FirstChild,
332 "link" => PseudoClass::Link,
333 "visited" => PseudoClass::Visited,
334 "hover" => PseudoClass::Hover,
335 "active" => PseudoClass::Active,
336 "focus" => PseudoClass::Focus,
337 _ => {
338 warn!("':{}' is not supported. Selector skipped.", ident);
339 return (None, tokenizer.stream.pos());
340 }
341 };
342
343 add_sub(SubSelector::PseudoClass(class));
347 }
348 SelectorToken::PseudoElement(ident) => {
349 add_sub(SubSelector::PseudoElement(ident))
350 }
351 SelectorToken::LangPseudoClass(lang) => {
352 add_sub(SubSelector::PseudoClass(PseudoClass::Lang(lang)));
353 }
354 SelectorToken::DescendantCombinator => {
355 combinator = Combinator::Descendant;
356 }
357 SelectorToken::ChildCombinator => {
358 combinator = Combinator::Child;
359 }
360 SelectorToken::AdjacentCombinator => {
361 combinator = Combinator::AdjacentSibling;
362 }
363 }
364 }
365
366 if components.is_empty() {
367 (None, tokenizer.stream.pos())
368 } else if components[0].combinator != Combinator::None {
369 debug_assert_eq!(
370 components[0].combinator,
371 Combinator::None,
372 "the first component must not have a combinator"
373 );
374
375 (None, tokenizer.stream.pos())
376 } else {
377 let source = &text[0..tokenizer.stream.pos()];
378 (Some(Selector { source, components }), tokenizer.stream.pos())
379 }
380}
381
382impl fmt::Display for Selector<'_> {
383 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
384 for component in &self.components {
385 match component.combinator {
386 Combinator::Descendant => write!(f, " ")?,
387 Combinator::Child => write!(f, " > ")?,
388 Combinator::AdjacentSibling => write!(f, " + ")?,
389 Combinator::None => {}
390 }
391
392 match component.selector.kind {
393 SimpleSelectorType::Universal => write!(f, "*")?,
394 SimpleSelectorType::Type(ident) => write!(f, "{}", ident)?,
395 };
396
397 for sel in &component.selector.subselectors {
398 match sel {
399 SubSelector::Attribute(name, operator) => {
400 match operator {
401 AttributeOperator::Exists => {
402 write!(f, "[{}]", name)?;
403 }
404 AttributeOperator::Matches(value) => {
405 write!(f, "[{}='{}']", name, value)?;
406 }
407 AttributeOperator::Contains(value) => {
408 write!(f, "[{}~='{}']", name, value)?;
409 }
410 AttributeOperator::StartsWith(value) => {
411 write!(f, "[{}|='{}']", name, value)?;
412 }
413 };
414 }
415 SubSelector::PseudoClass(class) => write!(f, ":{}", class)?,
416 SubSelector::Class(class) => write!(f, ".{}", class)?,
417 SubSelector::PseudoElement(pseudo_element) => write!(f, "::{}", pseudo_element)?,
418 }
419 }
420 }
421
422 Ok(())
423 }
424}
425
426#[derive(Clone, Copy, PartialEq, Debug)]
428pub enum SelectorToken<'a> {
429 UniversalSelector,
431
432 TypeSelector(&'a str),
434
435 ClassSelector(&'a str),
437
438 IdSelector(&'a str),
440
441 AttributeSelector(&'a str, AttributeOperator<'a>),
443
444 PseudoClass(&'a str),
446
447 PseudoElement(&'a str),
449
450 LangPseudoClass(&'a str),
452
453 DescendantCombinator,
455
456 ChildCombinator,
458
459 AdjacentCombinator,
461}
462
463pub struct SelectorTokenizer<'a> {
478 stream: Stream<'a>,
479 after_combinator: bool,
480 finished: bool,
481}
482
483impl<'a> From<&'a str> for SelectorTokenizer<'a> {
484 fn from(text: &'a str) -> Self {
485 SelectorTokenizer {
486 stream: Stream::from(text),
487 after_combinator: true,
488 finished: false,
489 }
490 }
491}
492
493impl<'a> Iterator for SelectorTokenizer<'a> {
494 type Item = Result<SelectorToken<'a>, Error>;
495
496 fn next(&mut self) -> Option<Self::Item> {
497 if self.finished || self.stream.at_end() {
498 if self.after_combinator {
499 self.after_combinator = false;
500 return Some(Err(Error::SelectorMissing));
501 }
502
503 return None;
504 }
505
506 macro_rules! try2 {
507 ($e:expr) => {
508 match $e {
509 Ok(v) => v,
510 Err(e) => {
511 self.finished = true;
512 return Some(Err(e));
513 }
514 }
515 };
516 }
517
518 match self.stream.curr_byte_unchecked() {
519 b'*' => {
520 if !self.after_combinator {
521 self.finished = true;
522 return Some(Err(Error::UnexpectedSelector));
523 }
524
525 self.after_combinator = false;
526 self.stream.advance(1);
527 Some(Ok(SelectorToken::UniversalSelector))
528 }
529 b'#' => {
530 self.after_combinator = false;
531 self.stream.advance(1);
532 let ident = try2!(self.stream.consume_ident());
533 Some(Ok(SelectorToken::IdSelector(ident)))
534 }
535 b'.' => {
536 self.after_combinator = false;
537 self.stream.advance(1);
538 let ident = try2!(self.stream.consume_ident());
539 Some(Ok(SelectorToken::ClassSelector(ident)))
540 }
541 b'[' => {
542 self.after_combinator = false;
543 self.stream.advance(1);
544 let ident = try2!(self.stream.consume_ident());
545
546 let op = match try2!(self.stream.curr_byte()) {
547 b']' => AttributeOperator::Exists,
548 b'=' => {
549 self.stream.advance(1);
550 let value = try2!(self.stream.consume_string());
551 AttributeOperator::Matches(value)
552 }
553 b'~' => {
554 self.stream.advance(1);
555 try2!(self.stream.consume_byte(b'='));
556 let value = try2!(self.stream.consume_string());
557 AttributeOperator::Contains(value)
558 }
559 b'|' => {
560 self.stream.advance(1);
561 try2!(self.stream.consume_byte(b'='));
562 let value = try2!(self.stream.consume_string());
563 AttributeOperator::StartsWith(value)
564 }
565 _ => {
566 self.finished = true;
567 return Some(Err(Error::InvalidAttributeSelector));
568 }
569 };
570
571 try2!(self.stream.consume_byte(b']'));
572
573 Some(Ok(SelectorToken::AttributeSelector(ident, op)))
574 }
575 b':' => {
576 self.after_combinator = false;
577 self.stream.advance(1);
578 let is_pseudo_element = Ok(b':') == self.stream.curr_byte();
579 if is_pseudo_element {
580 self.stream.advance(1);
581 }
582 let ident = try2!(self.stream.consume_ident());
583
584 if ident == "lang" && !is_pseudo_element {
585 try2!(self.stream.consume_byte(b'('));
586 let lang = self.stream.consume_bytes(|c| c != b')').trim();
587 try2!(self.stream.consume_byte(b')'));
588
589 if lang.is_empty() {
590 self.finished = true;
591 return Some(Err(Error::InvalidLanguagePseudoClass));
592 }
593
594 Some(Ok(SelectorToken::LangPseudoClass(lang)))
595 } else if is_pseudo_element {
596 Some(Ok(SelectorToken::PseudoElement(ident)))
597 } else {
598 Some(Ok(SelectorToken::PseudoClass(ident)))
599 }
600 }
601 b'>' => {
602 if self.after_combinator {
603 self.after_combinator = false;
604 self.finished = true;
605 return Some(Err(Error::UnexpectedCombinator));
606 }
607
608 self.stream.advance(1);
609 self.after_combinator = true;
610 Some(Ok(SelectorToken::ChildCombinator))
611 }
612 b'+' => {
613 if self.after_combinator {
614 self.after_combinator = false;
615 self.finished = true;
616 return Some(Err(Error::UnexpectedCombinator));
617 }
618
619 self.stream.advance(1);
620 self.after_combinator = true;
621 Some(Ok(SelectorToken::AdjacentCombinator))
622 }
623 b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
624 self.stream.skip_spaces();
625
626 if self.after_combinator {
627 return self.next();
628 }
629
630 while self.stream.curr_byte() == Ok(b'/') {
631 try2!(self.stream.skip_comment());
632 self.stream.skip_spaces();
633 }
634
635 match self.stream.curr_byte() {
636 Ok(b'>') | Ok(b'+') | Ok(b',') | Ok(b'{') | Err(_) => self.next(),
637 _ => {
638 if self.after_combinator {
639 self.after_combinator = false;
640 self.finished = true;
641 return Some(Err(Error::UnexpectedSelector));
642 }
643
644 self.after_combinator = true;
645 Some(Ok(SelectorToken::DescendantCombinator))
646 }
647 }
648 }
649 b'/' => {
650 if self.stream.next_byte() == Ok(b'*') {
651 try2!(self.stream.skip_comment());
652 } else {
653 self.finished = true;
654 }
655
656 self.next()
657 }
658 b',' | b'{' => {
659 self.finished = true;
660 self.next()
661 }
662 _ => {
663 let ident = try2!(self.stream.consume_ident());
664
665 if !self.after_combinator {
666 self.finished = true;
667 return Some(Err(Error::UnexpectedSelector));
668 }
669
670 self.after_combinator = false;
671 Some(Ok(SelectorToken::TypeSelector(ident)))
672 }
673 }
674 }
675}