1use selectors::{context::SelectorCaches, parser::SelectorList};
11
12use super::{
13 CompiledSelector,
14 error::QueryResult,
15 selector::{ScrapeSelector, matches_selector_with_caches, parse_selector},
16};
17use crate::dom::{Document, NodeId};
18
19pub fn find(doc: &Document, selector: &str) -> QueryResult<Option<NodeId>> {
37 if let Some(id) = selector.strip_prefix('#')
39 && is_simple_selector(id)
40 && let Some(index) = doc.index()
41 {
42 return Ok(index.get_by_id(id));
43 }
44
45 if let Some(class) = selector.strip_prefix('.')
47 && is_simple_selector(class)
48 && let Some(index) = doc.index()
49 {
50 return Ok(index.get_by_class(class).first().copied());
51 }
52
53 let selectors = parse_selector(selector)?;
55 Ok(find_with_selector(doc, &selectors))
56}
57
58pub fn find_all(doc: &Document, selector: &str) -> QueryResult<Vec<NodeId>> {
76 if let Some(id) = selector.strip_prefix('#')
78 && is_simple_selector(id)
79 && let Some(index) = doc.index()
80 {
81 return Ok(index.get_by_id(id).into_iter().collect());
82 }
83
84 if let Some(class) = selector.strip_prefix('.')
86 && is_simple_selector(class)
87 && let Some(index) = doc.index()
88 {
89 return Ok(index.get_by_class(class).to_vec());
90 }
91
92 let selectors = parse_selector(selector)?;
94 Ok(find_all_with_selector(doc, &selectors))
95}
96
97pub fn find_within(doc: &Document, scope: NodeId, selector: &str) -> QueryResult<Option<NodeId>> {
126 let selectors = parse_selector(selector)?;
127 Ok(find_within_with_selector(doc, scope, &selectors))
128}
129
130pub fn find_all_within(doc: &Document, scope: NodeId, selector: &str) -> QueryResult<Vec<NodeId>> {
136 let selectors = parse_selector(selector)?;
137 Ok(find_all_within_with_selector(doc, scope, &selectors))
138}
139
140#[must_use]
144pub fn find_with_selector(
145 doc: &Document,
146 selectors: &SelectorList<ScrapeSelector>,
147) -> Option<NodeId> {
148 let root = doc.root()?;
149 let mut caches = SelectorCaches::default();
150
151 if matches_selector_with_caches(doc, root, selectors, &mut caches) {
153 return Some(root);
154 }
155
156 for id in doc.descendants(root) {
158 if let Some(node) = doc.get(id)
159 && node.kind.is_element()
160 && matches_selector_with_caches(doc, id, selectors, &mut caches)
161 {
162 return Some(id);
163 }
164 }
165
166 None
167}
168
169#[must_use]
171pub fn find_all_with_selector(
172 doc: &Document,
173 selectors: &SelectorList<ScrapeSelector>,
174) -> Vec<NodeId> {
175 let mut results = Vec::new();
176
177 let Some(root) = doc.root() else {
178 return results;
179 };
180
181 let mut caches = SelectorCaches::default();
182
183 if matches_selector_with_caches(doc, root, selectors, &mut caches) {
185 results.push(root);
186 }
187
188 for id in doc.descendants(root) {
190 if let Some(node) = doc.get(id)
191 && node.kind.is_element()
192 && matches_selector_with_caches(doc, id, selectors, &mut caches)
193 {
194 results.push(id);
195 }
196 }
197
198 results
199}
200
201#[must_use]
203pub fn find_within_with_selector(
204 doc: &Document,
205 scope: NodeId,
206 selectors: &SelectorList<ScrapeSelector>,
207) -> Option<NodeId> {
208 let mut caches = SelectorCaches::default();
209
210 for id in doc.descendants(scope) {
211 if let Some(node) = doc.get(id)
212 && node.kind.is_element()
213 && matches_selector_with_caches(doc, id, selectors, &mut caches)
214 {
215 return Some(id);
216 }
217 }
218 None
219}
220
221#[must_use]
223pub fn find_all_within_with_selector(
224 doc: &Document,
225 scope: NodeId,
226 selectors: &SelectorList<ScrapeSelector>,
227) -> Vec<NodeId> {
228 let mut results = Vec::new();
229 let mut caches = SelectorCaches::default();
230
231 for id in doc.descendants(scope) {
232 if let Some(node) = doc.get(id)
233 && node.kind.is_element()
234 && matches_selector_with_caches(doc, id, selectors, &mut caches)
235 {
236 results.push(id);
237 }
238 }
239
240 results
241}
242
243#[must_use]
261pub fn find_compiled(doc: &Document, selector: &CompiledSelector) -> Option<NodeId> {
262 find_with_selector(doc, selector.selector_list())
263}
264
265#[must_use]
283pub fn find_all_compiled(doc: &Document, selector: &CompiledSelector) -> Vec<NodeId> {
284 find_all_with_selector(doc, selector.selector_list())
285}
286
287#[must_use]
314pub fn find_within_compiled(
315 doc: &Document,
316 scope: NodeId,
317 selector: &CompiledSelector,
318) -> Option<NodeId> {
319 find_within_with_selector(doc, scope, selector.selector_list())
320}
321
322#[must_use]
324pub fn find_all_within_compiled(
325 doc: &Document,
326 scope: NodeId,
327 selector: &CompiledSelector,
328) -> Vec<NodeId> {
329 find_all_within_with_selector(doc, scope, selector.selector_list())
330}
331
332#[inline]
338fn is_simple_selector(s: &str) -> bool {
339 !s.is_empty() && !s.contains(['.', '#', '[', ']', ':', ' ', '>', '+', '~', ',', '*', '(', ')'])
340}
341
342#[cfg(test)]
343mod tests {
344 use super::*;
345 use crate::parser::{Html5everParser, Parser};
346
347 fn parse_doc(html: &str) -> Document {
348 Html5everParser.parse(html).unwrap()
349 }
350
351 #[test]
352 fn test_find_by_tag() {
353 let doc = parse_doc("<div><span>text</span></div>");
354 let result = find(&doc, "span").unwrap();
355 assert!(result.is_some());
356
357 let span_id = result.unwrap();
358 assert_eq!(doc.get(span_id).unwrap().kind.tag_name(), Some("span"));
359 }
360
361 #[test]
362 fn test_find_by_class() {
363 let doc = parse_doc("<div class=\"container\"><span class=\"item\">text</span></div>");
364 let result = find(&doc, ".item").unwrap();
365 assert!(result.is_some());
366 }
367
368 #[test]
369 fn test_find_by_id() {
370 let doc = parse_doc("<div id=\"main\">text</div>");
371 let result = find(&doc, "#main").unwrap();
372 assert!(result.is_some());
373 }
374
375 #[test]
376 fn test_find_returns_none_when_not_found() {
377 let doc = parse_doc("<div>text</div>");
378 let result = find(&doc, "span").unwrap();
379 assert!(result.is_none());
380 }
381
382 #[test]
383 fn test_find_invalid_selector() {
384 let doc = parse_doc("<div>text</div>");
385 let result = find(&doc, "[");
386 assert!(result.is_err());
387 }
388
389 #[test]
390 fn test_find_all_by_tag() {
391 let doc = parse_doc("<ul><li>A</li><li>B</li><li>C</li></ul>");
392 let results = find_all(&doc, "li").unwrap();
393 assert_eq!(results.len(), 3);
394 }
395
396 #[test]
397 fn test_find_all_returns_empty_when_not_found() {
398 let doc = parse_doc("<div>text</div>");
399 let results = find_all(&doc, "span").unwrap();
400 assert!(results.is_empty());
401 }
402
403 #[test]
404 fn test_find_all_by_class() {
405 let doc =
406 parse_doc("<div class=\"a\">1</div><div class=\"b\">2</div><div class=\"a\">3</div>");
407 let results = find_all(&doc, ".a").unwrap();
408 assert_eq!(results.len(), 2);
409 }
410
411 #[test]
412 fn test_find_with_compound_selector() {
413 let doc =
414 parse_doc("<div class=\"foo\" id=\"bar\">match</div><div class=\"foo\">no id</div>");
415 let result = find(&doc, "div.foo#bar").unwrap();
416 assert!(result.is_some());
417 }
418
419 #[test]
420 fn test_find_with_descendant_combinator() {
421 let doc = parse_doc("<div><ul><li>item</li></ul></div>");
422 let result = find(&doc, "div li").unwrap();
423 assert!(result.is_some());
424 }
425
426 #[test]
427 fn test_find_with_child_combinator() {
428 let doc =
429 parse_doc("<div><span>direct</span></div><div><ul><span>nested</span></ul></div>");
430 let results = find_all(&doc, "div > span").unwrap();
431 assert_eq!(results.len(), 1);
432 }
433
434 #[test]
435 fn test_find_within_scope() {
436 let doc = parse_doc("<div id=\"a\"><span>A</span></div><div id=\"b\"><span>B</span></div>");
437
438 let scope = doc
440 .nodes()
441 .find(|(_, n)| {
442 n.kind.attributes().and_then(|a| a.get("id")).is_some_and(|id| id == "b")
443 })
444 .map(|(id, _)| id)
445 .unwrap();
446
447 let result = find_within(&doc, scope, "span").unwrap();
449 assert!(result.is_some());
450
451 let span_id = result.unwrap();
453 let span_parent = doc.parent(span_id).unwrap();
454 assert_eq!(span_parent, scope);
455 }
456
457 #[test]
458 fn test_find_all_within_scope() {
459 let doc = parse_doc("<ul id=\"list\"><li>1</li><li>2</li></ul><li>outside</li>");
460
461 let scope = doc
463 .nodes()
464 .find(|(_, n)| {
465 n.kind.attributes().and_then(|a| a.get("id")).is_some_and(|id| id == "list")
466 })
467 .map(|(id, _)| id)
468 .unwrap();
469
470 let results = find_all_within(&doc, scope, "li").unwrap();
471 assert_eq!(results.len(), 2); }
473
474 #[test]
475 fn test_find_returns_first_match() {
476 let doc = parse_doc(
477 "<div class=\"item\" id=\"first\">1</div><div class=\"item\" id=\"second\">2</div>",
478 );
479 let result = find(&doc, ".item").unwrap();
480 assert!(result.is_some());
481
482 let id = result.unwrap();
483 let attrs = doc.get(id).unwrap().kind.attributes().unwrap();
484 assert_eq!(attrs.get("id"), Some(&"first".to_string()));
485 }
486
487 #[test]
488 fn test_find_all_preserves_order() {
489 let doc = parse_doc("<ul><li id=\"a\">A</li><li id=\"b\">B</li><li id=\"c\">C</li></ul>");
490 let results = find_all(&doc, "li").unwrap();
491
492 let ids: Vec<_> = results
493 .iter()
494 .map(|id| {
495 doc.get(*id).and_then(|n| n.kind.attributes()).and_then(|a| a.get("id").cloned())
496 })
497 .collect();
498
499 assert_eq!(ids, vec![Some("a".into()), Some("b".into()), Some("c".into())]);
500 }
501
502 #[test]
503 fn test_find_empty_document() {
504 let doc = Document::new();
505 let result = find(&doc, "div").unwrap();
506 assert!(result.is_none());
507 }
508
509 #[test]
510 fn test_find_all_empty_document() {
511 let doc = Document::new();
512 let results = find_all(&doc, "div").unwrap();
513 assert!(results.is_empty());
514 }
515
516 #[test]
517 fn test_find_with_attribute_selector() {
518 let doc = parse_doc("<input type=\"text\"><input type=\"password\">");
519 let result = find(&doc, "input[type=\"text\"]").unwrap();
520 assert!(result.is_some());
521 }
522
523 #[test]
524 fn test_find_all_multiple_selectors() {
525 let doc = parse_doc("<div>a</div><span>b</span><p>c</p>");
526 let results = find_all(&doc, "div, span").unwrap();
527 assert_eq!(results.len(), 2);
528 }
529
530 #[test]
531 fn test_find_universal_selector() {
532 let doc = parse_doc("<div><span>text</span></div>");
533 let results = find_all(&doc, "*").unwrap();
534 assert!(results.len() >= 2);
536 }
537
538 #[test]
539 fn test_is_simple_selector() {
540 assert!(is_simple_selector("main"));
541 assert!(is_simple_selector("my-id"));
542 assert!(is_simple_selector("my_class"));
543 assert!(is_simple_selector("id123"));
544
545 assert!(!is_simple_selector(""));
546 assert!(!is_simple_selector("foo bar"));
547 assert!(!is_simple_selector("foo.bar"));
548 assert!(!is_simple_selector("foo#bar"));
549 assert!(!is_simple_selector("foo[attr]"));
550 assert!(!is_simple_selector("foo:hover"));
551 assert!(!is_simple_selector("foo>bar"));
552 assert!(!is_simple_selector("foo+bar"));
553 assert!(!is_simple_selector("foo~bar"));
554 assert!(!is_simple_selector("foo,bar"));
555 assert!(!is_simple_selector("*"));
556 assert!(!is_simple_selector("foo(bar)"));
557 }
558
559 #[test]
560 fn test_fast_path_id_selector() {
561 let doc = parse_doc("<div id='main'><span id='inner'>text</span></div>");
562
563 let main = find(&doc, "#main").unwrap();
564 assert!(main.is_some());
565 let main_id = main.unwrap();
566 assert_eq!(doc.get(main_id).unwrap().kind.tag_name(), Some("div"));
567
568 let inner = find(&doc, "#inner").unwrap();
569 assert!(inner.is_some());
570 }
571
572 #[test]
573 fn test_fast_path_class_selector_find() {
574 let doc = parse_doc("<div class='item'>A</div><div class='item'>B</div>");
575
576 let first = find(&doc, ".item").unwrap();
577 assert!(first.is_some());
578 }
579
580 #[test]
581 fn test_fast_path_class_selector_find_all() {
582 let doc = parse_doc(
583 "<div class='item'>A</div><div class='item'>B</div><div class='item'>C</div>",
584 );
585
586 let items = find_all(&doc, ".item").unwrap();
587 assert_eq!(items.len(), 3);
588 }
589
590 #[test]
591 fn test_fast_path_id_not_found() {
592 let doc = parse_doc("<div id='main'>text</div>");
593
594 let result = find(&doc, "#notfound").unwrap();
595 assert!(result.is_none());
596 }
597
598 #[test]
599 fn test_fast_path_class_not_found() {
600 let doc = parse_doc("<div class='foo'>text</div>");
601
602 let results = find_all(&doc, ".notfound").unwrap();
603 assert!(results.is_empty());
604 }
605
606 #[test]
607 fn test_complex_selector_fallback() {
608 let doc = parse_doc("<div id='main' class='container'>text</div>");
609
610 let result = find(&doc, "#main.container").unwrap();
611 assert!(result.is_some());
612
613 let result = find(&doc, "div#main").unwrap();
614 assert!(result.is_some());
615
616 let result = find(&doc, "div > #main").unwrap();
617 assert!(result.is_none());
618 }
619
620 #[test]
621 fn test_fast_path_duplicate_ids() {
622 let doc = parse_doc("<div id='dup'>First</div><div id='dup'>Second</div>");
623
624 let found = find(&doc, "#dup").unwrap();
625 assert!(found.is_some());
626 }
627
628 #[test]
629 fn test_fast_path_multiple_classes() {
630 let doc = parse_doc("<div class='foo bar'>A</div><div class='bar baz'>B</div>");
631
632 let items = find_all(&doc, ".bar").unwrap();
633 assert_eq!(items.len(), 2);
634 }
635
636 #[test]
637 fn test_fast_path_with_no_index() {
638 let mut doc = Document::new();
639 #[allow(clippy::default_trait_access)]
640 let root_id = doc.create_element("html".to_string(), Default::default());
641 doc.set_root(root_id);
642 #[allow(clippy::default_trait_access)]
643 let elem = doc.create_element("div".to_string(), Default::default());
644 doc.append_child(root_id, elem);
645
646 let result = find(&doc, "#test").unwrap();
647 assert!(result.is_none());
648
649 let results = find_all(&doc, ".test").unwrap();
650 assert!(results.is_empty());
651 }
652
653 #[test]
654 fn test_fast_path_unicode_selectors() {
655 let doc = parse_doc("<div id='日本語'>Japanese</div><div class='中文'>Chinese</div>");
656
657 let result = find(&doc, "#日本語").unwrap();
658 assert!(result.is_some());
659
660 let results = find_all(&doc, ".中文").unwrap();
661 assert_eq!(results.len(), 1);
662 }
663
664 #[test]
665 fn test_fast_path_very_long_selector() {
666 let long_id = "a".repeat(1000);
667 let html = format!("<div id='{long_id}'>text</div>");
668 let doc = parse_doc(&html);
669
670 let result = find(&doc, &format!("#{long_id}")).unwrap();
671 assert!(result.is_some());
672 }
673
674 #[test]
675 fn test_fast_path_empty_class_attribute() {
676 let doc = parse_doc("<div class=''>Empty</div><div>No class</div>");
677
678 let results = find_all(&doc, ".foo").unwrap();
679 assert!(results.is_empty());
680 }
681
682 #[test]
683 fn test_fast_path_special_chars_in_selector() {
684 let doc = parse_doc("<div id='test:id'>Colon</div><div class='foo.bar'>Dot</div>");
685
686 let result = find(&doc, "#test\\:id").unwrap();
687 assert!(result.is_some());
688
689 let results = find_all(&doc, ".foo\\.bar").unwrap();
690 assert_eq!(results.len(), 1);
691 }
692
693 #[test]
694 fn test_fast_path_vs_fallback_consistency() {
695 let doc =
696 parse_doc("<div id='main' class='container'>A</div><div class='container'>B</div>");
697
698 let fast_result = find(&doc, "#main").unwrap();
699 let fallback_result = find(&doc, "[id='main']").unwrap();
700 assert_eq!(fast_result, fallback_result);
701
702 let fast_results = find_all(&doc, ".container").unwrap();
703 let fallback_results = find_all(&doc, "[class~='container']").unwrap();
704 assert_eq!(fast_results.len(), fallback_results.len());
705 }
706}