1use std::{
10 borrow::Borrow,
11 fmt,
12 hash::{Hash, Hasher},
13};
14
15use cssparser::ToCss;
16use selectors::{
17 Element, OpaqueElement, SelectorList,
18 attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint},
19 context::{MatchingForInvalidation, NeedsSelectorFlags, QuirksMode, SelectorCaches},
20 matching::{ElementSelectorFlags, MatchingContext, MatchingMode},
21 parser::{ParseRelative, Parser, SelectorImpl, SelectorParseErrorKind},
22};
23
24use super::error::{QueryError, QueryResult};
25use crate::dom::{Document, NodeId};
26
27#[derive(Debug, Clone, PartialEq, Eq, Default, Hash)]
29pub struct CssString(String);
30
31impl CssString {
32 pub fn new(s: impl Into<String>) -> Self {
34 Self(s.into())
35 }
36
37 pub fn as_str(&self) -> &str {
39 &self.0
40 }
41}
42
43impl From<&str> for CssString {
44 fn from(s: &str) -> Self {
45 Self(s.to_owned())
46 }
47}
48
49impl AsRef<str> for CssString {
50 fn as_ref(&self) -> &str {
51 &self.0
52 }
53}
54
55impl ToCss for CssString {
56 fn to_css<W>(&self, dest: &mut W) -> fmt::Result
57 where
58 W: fmt::Write,
59 {
60 cssparser::serialize_identifier(&self.0, dest)
61 }
62}
63
64impl Borrow<str> for CssString {
65 fn borrow(&self) -> &str {
66 &self.0
67 }
68}
69
70impl precomputed_hash::PrecomputedHash for CssString {
71 #[allow(clippy::cast_possible_truncation)]
72 fn precomputed_hash(&self) -> u32 {
73 use std::collections::hash_map::DefaultHasher;
74
75 let mut hasher = DefaultHasher::new();
76 self.0.hash(&mut hasher);
77 hasher.finish() as u32
79 }
80}
81
82#[derive(Debug, Clone, PartialEq, Eq, Default, Hash)]
84pub struct CssLocalName(String);
85
86impl CssLocalName {
87 pub fn new(s: impl Into<String>) -> Self {
89 Self(s.into().to_ascii_lowercase())
90 }
91
92 pub fn as_str(&self) -> &str {
94 &self.0
95 }
96}
97
98impl From<&str> for CssLocalName {
99 fn from(s: &str) -> Self {
100 Self(s.to_ascii_lowercase())
101 }
102}
103
104impl AsRef<str> for CssLocalName {
105 fn as_ref(&self) -> &str {
106 &self.0
107 }
108}
109
110impl ToCss for CssLocalName {
111 fn to_css<W>(&self, dest: &mut W) -> fmt::Result
112 where
113 W: fmt::Write,
114 {
115 dest.write_str(&self.0)
116 }
117}
118
119impl Borrow<str> for CssLocalName {
120 fn borrow(&self) -> &str {
121 &self.0
122 }
123}
124
125impl precomputed_hash::PrecomputedHash for CssLocalName {
126 #[allow(clippy::cast_possible_truncation)]
127 fn precomputed_hash(&self) -> u32 {
128 use std::collections::hash_map::DefaultHasher;
129
130 let mut hasher = DefaultHasher::new();
131 self.0.hash(&mut hasher);
132 hasher.finish() as u32
134 }
135}
136
137#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct ScrapeSelector;
143
144#[derive(Debug, Clone, PartialEq, Eq)]
149pub enum NonTSPseudoClass {
150 Link,
152 AnyLink,
154}
155
156impl selectors::parser::NonTSPseudoClass for NonTSPseudoClass {
157 type Impl = ScrapeSelector;
158
159 fn is_active_or_hover(&self) -> bool {
160 false
161 }
162
163 fn is_user_action_state(&self) -> bool {
164 false
165 }
166}
167
168impl ToCss for NonTSPseudoClass {
169 fn to_css<W>(&self, dest: &mut W) -> fmt::Result
170 where
171 W: fmt::Write,
172 {
173 match self {
174 Self::Link => dest.write_str(":link"),
175 Self::AnyLink => dest.write_str(":any-link"),
176 }
177 }
178}
179
180#[derive(Debug, Clone, PartialEq, Eq)]
182pub enum PseudoElement {}
183
184impl selectors::parser::PseudoElement for PseudoElement {
185 type Impl = ScrapeSelector;
186}
187
188impl ToCss for PseudoElement {
189 fn to_css<W>(&self, _dest: &mut W) -> fmt::Result
190 where
191 W: fmt::Write,
192 {
193 unreachable!("PseudoElement has no variants")
195 }
196}
197
198impl SelectorImpl for ScrapeSelector {
199 type ExtraMatchingData<'a> = ();
200 type AttrValue = CssString;
201 type Identifier = CssLocalName;
202 type LocalName = CssLocalName;
203 type NamespaceUrl = CssString;
204 type NamespacePrefix = CssLocalName;
205 type BorrowedLocalName = CssLocalName;
206 type BorrowedNamespaceUrl = CssString;
207 type NonTSPseudoClass = NonTSPseudoClass;
208 type PseudoElement = PseudoElement;
209}
210
211struct SelectorParser;
213
214impl<'i> Parser<'i> for SelectorParser {
215 type Impl = ScrapeSelector;
216 type Error = SelectorParseErrorKind<'i>;
217
218 fn parse_non_ts_pseudo_class(
219 &self,
220 location: cssparser::SourceLocation,
221 name: cssparser::CowRcStr<'i>,
222 ) -> Result<NonTSPseudoClass, cssparser::ParseError<'i, Self::Error>> {
223 match name.as_ref() {
224 "link" => Ok(NonTSPseudoClass::Link),
225 "any-link" => Ok(NonTSPseudoClass::AnyLink),
226 _ => Err(cssparser::ParseError {
227 kind: cssparser::ParseErrorKind::Custom(
228 SelectorParseErrorKind::UnsupportedPseudoClassOrElement(name),
229 ),
230 location,
231 }),
232 }
233 }
234}
235
236pub fn parse_selector(selector: &str) -> QueryResult<SelectorList<ScrapeSelector>> {
250 let mut parser_input = cssparser::ParserInput::new(selector);
251 let mut parser = cssparser::Parser::new(&mut parser_input);
252
253 SelectorList::parse(&SelectorParser, &mut parser, ParseRelative::No).map_err(|e| {
254 QueryError::invalid_selector(format!(
257 "invalid selector at line {}, column {}",
258 e.location.line, e.location.column
259 ))
260 })
261}
262
263#[derive(Debug, Clone, Copy)]
268pub struct ElementWrapper<'a> {
269 doc: &'a Document,
270 id: NodeId,
271}
272
273impl<'a> ElementWrapper<'a> {
274 #[must_use]
276 pub fn new(doc: &'a Document, id: NodeId) -> Self {
277 Self { doc, id }
278 }
279
280 #[must_use]
282 pub fn node_id(&self) -> NodeId {
283 self.id
284 }
285
286 #[must_use]
288 pub fn document(&self) -> &'a Document {
289 self.doc
290 }
291}
292
293impl PartialEq for ElementWrapper<'_> {
294 fn eq(&self, other: &Self) -> bool {
295 std::ptr::eq(self.doc, other.doc) && self.id == other.id
300 }
301}
302
303impl Eq for ElementWrapper<'_> {}
304
305impl Element for ElementWrapper<'_> {
306 type Impl = ScrapeSelector;
307
308 fn opaque(&self) -> OpaqueElement {
309 OpaqueElement::new(self)
310 }
311
312 fn parent_element(&self) -> Option<Self> {
313 let parent_id = self.doc.parent(self.id)?;
314 let parent_node = self.doc.get(parent_id)?;
315 if parent_node.kind.is_element() { Some(Self::new(self.doc, parent_id)) } else { None }
316 }
317
318 fn parent_node_is_shadow_root(&self) -> bool {
319 false
320 }
321
322 fn containing_shadow_host(&self) -> Option<Self> {
323 None
324 }
325
326 fn is_pseudo_element(&self) -> bool {
327 false
328 }
329
330 fn prev_sibling_element(&self) -> Option<Self> {
331 let mut current = self.doc.prev_sibling(self.id);
332 while let Some(sibling_id) = current {
333 if let Some(node) = self.doc.get(sibling_id)
334 && node.kind.is_element()
335 {
336 return Some(Self::new(self.doc, sibling_id));
337 }
338 current = self.doc.prev_sibling(sibling_id);
339 }
340 None
341 }
342
343 fn next_sibling_element(&self) -> Option<Self> {
344 let mut current = self.doc.next_sibling(self.id);
345 while let Some(sibling_id) = current {
346 if let Some(node) = self.doc.get(sibling_id)
347 && node.kind.is_element()
348 {
349 return Some(Self::new(self.doc, sibling_id));
350 }
351 current = self.doc.next_sibling(sibling_id);
352 }
353 None
354 }
355
356 fn first_element_child(&self) -> Option<Self> {
357 for child_id in self.doc.children(self.id) {
358 if let Some(node) = self.doc.get(child_id)
359 && node.kind.is_element()
360 {
361 return Some(Self::new(self.doc, child_id));
362 }
363 }
364 None
365 }
366
367 fn is_html_element_in_html_document(&self) -> bool {
368 true
369 }
370
371 fn has_local_name(&self, local_name: &<Self::Impl as SelectorImpl>::BorrowedLocalName) -> bool {
372 self.doc
373 .get(self.id)
374 .and_then(|n| n.kind.tag_name())
375 .is_some_and(|name| name.eq_ignore_ascii_case(local_name.as_str()))
376 }
377
378 fn has_namespace(&self, _ns: &<Self::Impl as SelectorImpl>::BorrowedNamespaceUrl) -> bool {
379 true
381 }
382
383 fn is_same_type(&self, other: &Self) -> bool {
384 self.doc
385 .get(self.id)
386 .and_then(|n| n.kind.tag_name())
387 .zip(other.doc.get(other.id).and_then(|n| n.kind.tag_name()))
388 .is_some_and(|(a, b)| a.eq_ignore_ascii_case(b))
389 }
390
391 fn attr_matches(
392 &self,
393 ns: &NamespaceConstraint<&<Self::Impl as SelectorImpl>::NamespaceUrl>,
394 local_name: &<Self::Impl as SelectorImpl>::BorrowedLocalName,
395 operation: &AttrSelectorOperation<&<Self::Impl as SelectorImpl>::AttrValue>,
396 ) -> bool {
397 let _ = ns;
402
403 let Some(node) = self.doc.get(self.id) else { return false };
404 let Some(attrs) = node.kind.attributes() else { return false };
405
406 let attr_name = local_name.as_str();
408 let value = attrs.iter().find(|(k, _)| k.eq_ignore_ascii_case(attr_name)).map(|(_, v)| v);
409
410 let Some(value) = value else { return false };
411
412 operation.eval_str(value)
413 }
414
415 fn match_non_ts_pseudo_class(
416 &self,
417 pc: &NonTSPseudoClass,
418 _context: &mut MatchingContext<Self::Impl>,
419 ) -> bool {
420 match pc {
421 NonTSPseudoClass::Link | NonTSPseudoClass::AnyLink => {
422 let Some(node) = self.doc.get(self.id) else { return false };
424 let Some(tag_name) = node.kind.tag_name() else { return false };
425 let Some(attrs) = node.kind.attributes() else { return false };
426
427 matches!(tag_name, "a" | "area" | "link") && attrs.contains_key("href")
428 }
429 }
430 }
431
432 fn match_pseudo_element(
433 &self,
434 _pe: &PseudoElement,
435 _context: &mut MatchingContext<Self::Impl>,
436 ) -> bool {
437 false
439 }
440
441 fn is_link(&self) -> bool {
442 let Some(node) = self.doc.get(self.id) else { return false };
443 let Some(tag_name) = node.kind.tag_name() else { return false };
444 let Some(attrs) = node.kind.attributes() else { return false };
445
446 matches!(tag_name, "a" | "area" | "link") && attrs.contains_key("href")
447 }
448
449 fn is_html_slot_element(&self) -> bool {
450 false
451 }
452
453 fn has_id(
454 &self,
455 id: &<Self::Impl as SelectorImpl>::Identifier,
456 case_sensitivity: CaseSensitivity,
457 ) -> bool {
458 let Some(node) = self.doc.get(self.id) else { return false };
459 let Some(attrs) = node.kind.attributes() else { return false };
460 let Some(element_id) = attrs.get("id") else { return false };
461
462 case_sensitivity.eq(element_id.as_bytes(), id.as_str().as_bytes())
463 }
464
465 fn has_class(
466 &self,
467 name: &<Self::Impl as SelectorImpl>::Identifier,
468 case_sensitivity: CaseSensitivity,
469 ) -> bool {
470 let Some(node) = self.doc.get(self.id) else { return false };
471 let Some(attrs) = node.kind.attributes() else { return false };
472 let Some(class_attr) = attrs.get("class") else { return false };
473
474 class_attr
475 .split_whitespace()
476 .any(|class| case_sensitivity.eq(class.as_bytes(), name.as_str().as_bytes()))
477 }
478
479 fn imported_part(
480 &self,
481 _name: &<Self::Impl as SelectorImpl>::Identifier,
482 ) -> Option<<Self::Impl as SelectorImpl>::Identifier> {
483 None
484 }
485
486 fn is_part(&self, _name: &<Self::Impl as SelectorImpl>::Identifier) -> bool {
487 false
488 }
489
490 fn is_empty(&self) -> bool {
491 for child_id in self.doc.children(self.id) {
493 if let Some(node) = self.doc.get(child_id) {
494 match &node.kind {
495 crate::dom::NodeKind::Element { .. } => return false,
496 crate::dom::NodeKind::Text { content } => {
497 if !content.trim().is_empty() {
498 return false;
499 }
500 }
501 crate::dom::NodeKind::Comment { .. } => {}
502 }
503 }
504 }
505 true
506 }
507
508 fn is_root(&self) -> bool {
509 self.doc.root().is_some_and(|_root_id| {
510 self.doc
512 .get(self.id)
513 .is_some_and(|node| node.kind.tag_name().is_some_and(|name| name == "html"))
514 && self.parent_element().is_none()
515 })
516 }
517
518 fn apply_selector_flags(&self, _flags: ElementSelectorFlags) {
519 }
521
522 fn add_element_unique_hashes(&self, _filter: &mut selectors::bloom::BloomFilter) -> bool {
523 false
524 }
525
526 fn has_custom_state(&self, _name: &<Self::Impl as SelectorImpl>::Identifier) -> bool {
527 false
528 }
529}
530
531#[must_use]
557pub fn matches_selector(
558 doc: &Document,
559 id: NodeId,
560 selectors: &SelectorList<ScrapeSelector>,
561) -> bool {
562 let mut caches = SelectorCaches::default();
563 matches_selector_with_caches(doc, id, selectors, &mut caches)
564}
565
566#[must_use]
596pub fn matches_selector_with_caches(
597 doc: &Document,
598 id: NodeId,
599 selectors: &SelectorList<ScrapeSelector>,
600 caches: &mut SelectorCaches,
601) -> bool {
602 let element = ElementWrapper::new(doc, id);
603 let mut context = MatchingContext::new(
604 MatchingMode::Normal,
605 None,
606 caches,
607 QuirksMode::NoQuirks,
608 NeedsSelectorFlags::No,
609 MatchingForInvalidation::No,
610 );
611
612 selectors.slice().iter().any(|selector| {
613 selectors::matching::matches_selector(selector, 0, None, &element, &mut context)
614 })
615}
616
617#[cfg(test)]
618mod tests {
619 use super::*;
620 use crate::parser::{Html5everParser, Parser};
621
622 fn parse_doc(html: &str) -> Document {
623 Html5everParser.parse(html).unwrap()
624 }
625
626 fn find_element_by_tag(doc: &Document, tag: &str) -> Option<NodeId> {
627 doc.nodes().find(|(_, n)| n.kind.tag_name() == Some(tag)).map(|(id, _)| id)
628 }
629
630 #[test]
631 fn test_parse_simple_selector() {
632 let selectors = parse_selector("div").unwrap();
633 assert_eq!(selectors.slice().len(), 1);
634 }
635
636 #[test]
637 fn test_parse_class_selector() {
638 let selectors = parse_selector(".foo").unwrap();
639 assert_eq!(selectors.slice().len(), 1);
640 }
641
642 #[test]
643 fn test_parse_id_selector() {
644 let selectors = parse_selector("#bar").unwrap();
645 assert_eq!(selectors.slice().len(), 1);
646 }
647
648 #[test]
649 fn test_parse_compound_selector() {
650 let selectors = parse_selector("div.foo#bar").unwrap();
651 assert_eq!(selectors.slice().len(), 1);
652 }
653
654 #[test]
655 fn test_parse_descendant_combinator() {
656 let selectors = parse_selector("div span").unwrap();
657 assert_eq!(selectors.slice().len(), 1);
658 }
659
660 #[test]
661 fn test_parse_child_combinator() {
662 let selectors = parse_selector("div > span").unwrap();
663 assert_eq!(selectors.slice().len(), 1);
664 }
665
666 #[test]
667 fn test_parse_adjacent_sibling() {
668 let selectors = parse_selector("h1 + p").unwrap();
669 assert_eq!(selectors.slice().len(), 1);
670 }
671
672 #[test]
673 fn test_parse_general_sibling() {
674 let selectors = parse_selector("h1 ~ p").unwrap();
675 assert_eq!(selectors.slice().len(), 1);
676 }
677
678 #[test]
679 fn test_parse_attribute_exists() {
680 let selectors = parse_selector("[href]").unwrap();
681 assert_eq!(selectors.slice().len(), 1);
682 }
683
684 #[test]
685 fn test_parse_attribute_equals() {
686 let selectors = parse_selector("[type=\"text\"]").unwrap();
687 assert_eq!(selectors.slice().len(), 1);
688 }
689
690 #[test]
691 fn test_parse_multiple_selectors() {
692 let selectors = parse_selector("div, span, p").unwrap();
693 assert_eq!(selectors.slice().len(), 3);
694 }
695
696 #[test]
697 fn test_parse_invalid_selector() {
698 let result = parse_selector("[");
699 assert!(result.is_err());
700 }
701
702 #[test]
703 fn test_match_tag_selector() {
704 let doc = parse_doc("<div><span>text</span></div>");
705 let span_id = find_element_by_tag(&doc, "span").unwrap();
706 let selectors = parse_selector("span").unwrap();
707 assert!(matches_selector(&doc, span_id, &selectors));
708 }
709
710 #[test]
711 fn test_match_class_selector() {
712 let doc = parse_doc("<div class=\"foo bar\">text</div>");
713 let div_id = find_element_by_tag(&doc, "div").unwrap();
714
715 let selectors = parse_selector(".foo").unwrap();
716 assert!(matches_selector(&doc, div_id, &selectors));
717
718 let selectors = parse_selector(".bar").unwrap();
719 assert!(matches_selector(&doc, div_id, &selectors));
720
721 let selectors = parse_selector(".baz").unwrap();
722 assert!(!matches_selector(&doc, div_id, &selectors));
723 }
724
725 #[test]
726 fn test_match_id_selector() {
727 let doc = parse_doc("<div id=\"main\">text</div>");
728 let div_id = find_element_by_tag(&doc, "div").unwrap();
729
730 let selectors = parse_selector("#main").unwrap();
731 assert!(matches_selector(&doc, div_id, &selectors));
732
733 let selectors = parse_selector("#other").unwrap();
734 assert!(!matches_selector(&doc, div_id, &selectors));
735 }
736
737 #[test]
738 fn test_match_compound_selector() {
739 let doc = parse_doc("<div class=\"foo\" id=\"bar\">text</div>");
740 let div_id = find_element_by_tag(&doc, "div").unwrap();
741
742 let selectors = parse_selector("div.foo#bar").unwrap();
743 assert!(matches_selector(&doc, div_id, &selectors));
744
745 let selectors = parse_selector("div.foo#baz").unwrap();
746 assert!(!matches_selector(&doc, div_id, &selectors));
747 }
748
749 #[test]
750 fn test_match_attribute_exists() {
751 let doc = parse_doc("<a href=\"/page\">link</a>");
752 let a_id = find_element_by_tag(&doc, "a").unwrap();
753
754 let node = doc.get(a_id).unwrap();
756 let attrs = node.kind.attributes().unwrap();
757 assert!(attrs.contains_key("href"), "Element should have href attribute: {attrs:?}");
758
759 let selectors = parse_selector("[href]").unwrap();
760 assert_eq!(selectors.slice().len(), 1, "Should have one selector");
761 assert!(matches_selector(&doc, a_id, &selectors), "Element with href should match [href]");
762
763 let selectors = parse_selector("[title]").unwrap();
764 assert!(!matches_selector(&doc, a_id, &selectors));
765 }
766
767 #[test]
768 fn test_match_attribute_equals() {
769 let doc = parse_doc("<input type=\"text\">");
770 let input_id = find_element_by_tag(&doc, "input").unwrap();
771
772 let selectors = parse_selector("[type=\"text\"]").unwrap();
773 assert!(matches_selector(&doc, input_id, &selectors));
774
775 let selectors = parse_selector("[type=\"password\"]").unwrap();
776 assert!(!matches_selector(&doc, input_id, &selectors));
777 }
778
779 #[test]
780 fn test_element_is_empty() {
781 let doc = parse_doc("<div></div><span>text</span>");
782 let div_id = find_element_by_tag(&doc, "div").unwrap();
783 let span_id = find_element_by_tag(&doc, "span").unwrap();
784
785 let selectors = parse_selector(":empty").unwrap();
786 assert!(matches_selector(&doc, div_id, &selectors));
787 assert!(!matches_selector(&doc, span_id, &selectors));
788 }
789
790 #[test]
791 fn test_element_first_child() {
792 let doc = parse_doc("<ul><li>first</li><li>second</li></ul>");
793
794 let first_li =
796 doc.nodes().find(|(_, n)| n.kind.tag_name() == Some("li")).map(|(id, _)| id).unwrap();
797
798 let selectors = parse_selector("li:first-child").unwrap();
799 assert!(matches_selector(&doc, first_li, &selectors));
800 }
801
802 #[test]
803 fn test_match_not_selector() {
804 let doc = parse_doc("<div class=\"foo\">a</div><div class=\"bar\">b</div>");
805
806 let divs: Vec<_> = doc
807 .nodes()
808 .filter(|(_, n)| n.kind.tag_name() == Some("div"))
809 .map(|(id, _)| id)
810 .collect();
811
812 let selectors = parse_selector("div:not(.foo)").unwrap();
813
814 let match_count = divs.iter().filter(|id| matches_selector(&doc, **id, &selectors)).count();
816 assert_eq!(match_count, 1);
817 }
818
819 #[test]
822 fn test_match_attribute_prefix() {
823 let doc = parse_doc(
824 r#"<a href="https://example.com">secure</a><a href="http://example.com">insecure</a>"#,
825 );
826
827 let links: Vec<_> =
828 doc.nodes().filter(|(_, n)| n.kind.tag_name() == Some("a")).map(|(id, _)| id).collect();
829 assert_eq!(links.len(), 2);
830
831 let selectors = parse_selector("[href^=\"https\"]").unwrap();
832 let match_count =
833 links.iter().filter(|id| matches_selector(&doc, **id, &selectors)).count();
834 assert_eq!(match_count, 1, "[attr^=prefix] should match elements starting with prefix");
835 }
836
837 #[test]
838 fn test_match_attribute_suffix() {
839 let doc = parse_doc(r#"<a href="/page.html">html</a><a href="/page.pdf">pdf</a>"#);
840
841 let links: Vec<_> =
842 doc.nodes().filter(|(_, n)| n.kind.tag_name() == Some("a")).map(|(id, _)| id).collect();
843 assert_eq!(links.len(), 2);
844
845 let selectors = parse_selector("[href$=\".html\"]").unwrap();
846 let match_count =
847 links.iter().filter(|id| matches_selector(&doc, **id, &selectors)).count();
848 assert_eq!(match_count, 1, "[attr$=suffix] should match elements ending with suffix");
849 }
850
851 #[test]
852 fn test_match_attribute_contains() {
853 let doc = parse_doc(r#"<a href="/foo/bar/baz">yes</a><a href="/qux">no</a>"#);
854
855 let links: Vec<_> =
856 doc.nodes().filter(|(_, n)| n.kind.tag_name() == Some("a")).map(|(id, _)| id).collect();
857 assert_eq!(links.len(), 2);
858
859 let selectors = parse_selector("[href*=\"bar\"]").unwrap();
860 let match_count =
861 links.iter().filter(|id| matches_selector(&doc, **id, &selectors)).count();
862 assert_eq!(match_count, 1, "[attr*=substring] should match elements containing substring");
863 }
864
865 #[test]
866 fn test_match_attribute_word() {
867 let doc = parse_doc(r#"<div class="foo bar baz">yes</div><div class="foobar">no</div>"#);
868
869 let divs: Vec<_> = doc
870 .nodes()
871 .filter(|(_, n)| n.kind.tag_name() == Some("div"))
872 .map(|(id, _)| id)
873 .collect();
874 assert_eq!(divs.len(), 2);
875
876 let selectors = parse_selector("[class~=\"bar\"]").unwrap();
877 let match_count = divs.iter().filter(|id| matches_selector(&doc, **id, &selectors)).count();
878 assert_eq!(
879 match_count, 1,
880 "[attr~=word] should match elements with word in space-separated list"
881 );
882 }
883
884 #[test]
885 fn test_match_attribute_lang() {
886 let doc = parse_doc(
887 r#"<div lang="en-US">US</div><div lang="en-GB">GB</div><div lang="fr">FR</div>"#,
888 );
889
890 let divs: Vec<_> = doc
891 .nodes()
892 .filter(|(_, n)| n.kind.tag_name() == Some("div"))
893 .map(|(id, _)| id)
894 .collect();
895 assert_eq!(divs.len(), 3);
896
897 let selectors = parse_selector("[lang|=\"en\"]").unwrap();
898 let match_count = divs.iter().filter(|id| matches_selector(&doc, **id, &selectors)).count();
899 assert_eq!(match_count, 2, "[attr|=lang] should match 'en' and 'en-*' values");
900 }
901
902 #[test]
905 fn test_match_nth_child_even() {
906 let doc = parse_doc("<ul><li>1</li><li>2</li><li>3</li><li>4</li></ul>");
907
908 let lis: Vec<_> = doc
909 .nodes()
910 .filter(|(_, n)| n.kind.tag_name() == Some("li"))
911 .map(|(id, _)| id)
912 .collect();
913 assert_eq!(lis.len(), 4);
914
915 let selectors = parse_selector("li:nth-child(even)").unwrap();
916 let match_count = lis.iter().filter(|id| matches_selector(&doc, **id, &selectors)).count();
917 assert_eq!(match_count, 2, ":nth-child(even) should match 2nd and 4th elements");
918 }
919
920 #[test]
921 fn test_match_nth_child_2n_plus_1() {
922 let doc = parse_doc("<ul><li>1</li><li>2</li><li>3</li><li>4</li></ul>");
923
924 let lis: Vec<_> = doc
925 .nodes()
926 .filter(|(_, n)| n.kind.tag_name() == Some("li"))
927 .map(|(id, _)| id)
928 .collect();
929 assert_eq!(lis.len(), 4);
930
931 let selectors = parse_selector("li:nth-child(2n+1)").unwrap();
932 let match_count = lis.iter().filter(|id| matches_selector(&doc, **id, &selectors)).count();
933 assert_eq!(match_count, 2, ":nth-child(2n+1) should match odd elements (1st and 3rd)");
934 }
935
936 #[test]
937 fn test_match_last_child() {
938 let doc = parse_doc("<ul><li id=\"first\">1</li><li id=\"last\">2</li></ul>");
939
940 let lis: Vec<_> = doc
941 .nodes()
942 .filter(|(_, n)| n.kind.tag_name() == Some("li"))
943 .map(|(id, _)| id)
944 .collect();
945 assert_eq!(lis.len(), 2);
946
947 let selectors = parse_selector("li:last-child").unwrap();
948 let matches: Vec<_> =
949 lis.iter().filter(|id| matches_selector(&doc, **id, &selectors)).collect();
950 assert_eq!(matches.len(), 1, ":last-child should match exactly one element");
951
952 let last_id = matches[0];
954 let node = doc.get(*last_id).unwrap();
955 let attrs = node.kind.attributes().unwrap();
956 assert_eq!(attrs.get("id"), Some(&"last".to_string()));
957 }
958
959 #[test]
962 fn test_match_adjacent_sibling() {
963 let doc = parse_doc("<h1>Title</h1><p>First paragraph</p><p>Second paragraph</p>");
964
965 let ps: Vec<_> =
966 doc.nodes().filter(|(_, n)| n.kind.tag_name() == Some("p")).map(|(id, _)| id).collect();
967 assert_eq!(ps.len(), 2);
968
969 let selectors = parse_selector("h1 + p").unwrap();
970 let match_count = ps.iter().filter(|id| matches_selector(&doc, **id, &selectors)).count();
971 assert_eq!(match_count, 1, "h1 + p should match only the immediately adjacent paragraph");
972 }
973
974 #[test]
975 fn test_match_general_sibling() {
976 let doc = parse_doc("<h1>Title</h1><p>First</p><p>Second</p>");
977
978 let ps: Vec<_> =
979 doc.nodes().filter(|(_, n)| n.kind.tag_name() == Some("p")).map(|(id, _)| id).collect();
980 assert_eq!(ps.len(), 2);
981
982 let selectors = parse_selector("h1 ~ p").unwrap();
983 let match_count = ps.iter().filter(|id| matches_selector(&doc, **id, &selectors)).count();
984 assert_eq!(match_count, 2, "h1 ~ p should match all following sibling paragraphs");
985 }
986
987 #[test]
988 fn test_match_general_sibling_not_preceding() {
989 let doc = parse_doc("<p>Before</p><h1>Title</h1><p>After</p>");
990
991 let ps: Vec<_> =
992 doc.nodes().filter(|(_, n)| n.kind.tag_name() == Some("p")).map(|(id, _)| id).collect();
993 assert_eq!(ps.len(), 2);
994
995 let selectors = parse_selector("h1 ~ p").unwrap();
996 let match_count = ps.iter().filter(|id| matches_selector(&doc, **id, &selectors)).count();
997 assert_eq!(match_count, 1, "h1 ~ p should not match paragraphs preceding h1");
998 }
999
1000 #[test]
1001 fn test_match_adjacent_sibling_requires_immediate() {
1002 let doc = parse_doc("<h1>Title</h1><div>Separator</div><p>Paragraph</p>");
1003
1004 let p_id = find_element_by_tag(&doc, "p").unwrap();
1005
1006 let selectors = parse_selector("h1 + p").unwrap();
1007 assert!(
1008 !matches_selector(&doc, p_id, &selectors),
1009 "h1 + p should not match when div is between them"
1010 );
1011 }
1012
1013 #[test]
1016 fn test_matches_selector_with_caches() {
1017 let doc = parse_doc("<ul><li>A</li><li>B</li><li>C</li></ul>");
1018 let selectors = parse_selector("li").unwrap();
1019
1020 let mut caches = SelectorCaches::default();
1021 let count = doc
1022 .nodes()
1023 .filter(|(id, n)| {
1024 n.kind.is_element()
1025 && matches_selector_with_caches(&doc, *id, &selectors, &mut caches)
1026 })
1027 .count();
1028 assert_eq!(count, 3);
1029 }
1030}