1use selectors::{context::SelectorCaches, parser::SelectorList};
11
12use super::{
13 error::QueryResult,
14 selector::{ScrapeSelector, matches_selector_with_caches, parse_selector},
15};
16use crate::dom::{Document, NodeId};
17
18pub fn find(doc: &Document, selector: &str) -> QueryResult<Option<NodeId>> {
36 let selectors = parse_selector(selector)?;
37 Ok(find_with_selector(doc, &selectors))
38}
39
40pub fn find_all(doc: &Document, selector: &str) -> QueryResult<Vec<NodeId>> {
58 let selectors = parse_selector(selector)?;
59 Ok(find_all_with_selector(doc, &selectors))
60}
61
62pub fn find_within(doc: &Document, scope: NodeId, selector: &str) -> QueryResult<Option<NodeId>> {
91 let selectors = parse_selector(selector)?;
92 Ok(find_within_with_selector(doc, scope, &selectors))
93}
94
95pub fn find_all_within(doc: &Document, scope: NodeId, selector: &str) -> QueryResult<Vec<NodeId>> {
101 let selectors = parse_selector(selector)?;
102 Ok(find_all_within_with_selector(doc, scope, &selectors))
103}
104
105#[must_use]
109pub fn find_with_selector(
110 doc: &Document,
111 selectors: &SelectorList<ScrapeSelector>,
112) -> Option<NodeId> {
113 let root = doc.root()?;
114 let mut caches = SelectorCaches::default();
115
116 if matches_selector_with_caches(doc, root, selectors, &mut caches) {
118 return Some(root);
119 }
120
121 for id in doc.descendants(root) {
123 if let Some(node) = doc.get(id)
124 && node.kind.is_element()
125 && matches_selector_with_caches(doc, id, selectors, &mut caches)
126 {
127 return Some(id);
128 }
129 }
130
131 None
132}
133
134#[must_use]
136pub fn find_all_with_selector(
137 doc: &Document,
138 selectors: &SelectorList<ScrapeSelector>,
139) -> Vec<NodeId> {
140 let mut results = Vec::new();
141
142 let Some(root) = doc.root() else {
143 return results;
144 };
145
146 let mut caches = SelectorCaches::default();
147
148 if matches_selector_with_caches(doc, root, selectors, &mut caches) {
150 results.push(root);
151 }
152
153 for id in doc.descendants(root) {
155 if let Some(node) = doc.get(id)
156 && node.kind.is_element()
157 && matches_selector_with_caches(doc, id, selectors, &mut caches)
158 {
159 results.push(id);
160 }
161 }
162
163 results
164}
165
166#[must_use]
168pub fn find_within_with_selector(
169 doc: &Document,
170 scope: NodeId,
171 selectors: &SelectorList<ScrapeSelector>,
172) -> Option<NodeId> {
173 let mut caches = SelectorCaches::default();
174
175 for id in doc.descendants(scope) {
176 if let Some(node) = doc.get(id)
177 && node.kind.is_element()
178 && matches_selector_with_caches(doc, id, selectors, &mut caches)
179 {
180 return Some(id);
181 }
182 }
183 None
184}
185
186#[must_use]
188pub fn find_all_within_with_selector(
189 doc: &Document,
190 scope: NodeId,
191 selectors: &SelectorList<ScrapeSelector>,
192) -> Vec<NodeId> {
193 let mut results = Vec::new();
194 let mut caches = SelectorCaches::default();
195
196 for id in doc.descendants(scope) {
197 if let Some(node) = doc.get(id)
198 && node.kind.is_element()
199 && matches_selector_with_caches(doc, id, selectors, &mut caches)
200 {
201 results.push(id);
202 }
203 }
204
205 results
206}
207
208#[cfg(test)]
209mod tests {
210 use super::*;
211 use crate::parser::{Html5everParser, Parser};
212
213 fn parse_doc(html: &str) -> Document {
214 Html5everParser.parse(html).unwrap()
215 }
216
217 #[test]
218 fn test_find_by_tag() {
219 let doc = parse_doc("<div><span>text</span></div>");
220 let result = find(&doc, "span").unwrap();
221 assert!(result.is_some());
222
223 let span_id = result.unwrap();
224 assert_eq!(doc.get(span_id).unwrap().kind.tag_name(), Some("span"));
225 }
226
227 #[test]
228 fn test_find_by_class() {
229 let doc = parse_doc("<div class=\"container\"><span class=\"item\">text</span></div>");
230 let result = find(&doc, ".item").unwrap();
231 assert!(result.is_some());
232 }
233
234 #[test]
235 fn test_find_by_id() {
236 let doc = parse_doc("<div id=\"main\">text</div>");
237 let result = find(&doc, "#main").unwrap();
238 assert!(result.is_some());
239 }
240
241 #[test]
242 fn test_find_returns_none_when_not_found() {
243 let doc = parse_doc("<div>text</div>");
244 let result = find(&doc, "span").unwrap();
245 assert!(result.is_none());
246 }
247
248 #[test]
249 fn test_find_invalid_selector() {
250 let doc = parse_doc("<div>text</div>");
251 let result = find(&doc, "[");
252 assert!(result.is_err());
253 }
254
255 #[test]
256 fn test_find_all_by_tag() {
257 let doc = parse_doc("<ul><li>A</li><li>B</li><li>C</li></ul>");
258 let results = find_all(&doc, "li").unwrap();
259 assert_eq!(results.len(), 3);
260 }
261
262 #[test]
263 fn test_find_all_returns_empty_when_not_found() {
264 let doc = parse_doc("<div>text</div>");
265 let results = find_all(&doc, "span").unwrap();
266 assert!(results.is_empty());
267 }
268
269 #[test]
270 fn test_find_all_by_class() {
271 let doc =
272 parse_doc("<div class=\"a\">1</div><div class=\"b\">2</div><div class=\"a\">3</div>");
273 let results = find_all(&doc, ".a").unwrap();
274 assert_eq!(results.len(), 2);
275 }
276
277 #[test]
278 fn test_find_with_compound_selector() {
279 let doc =
280 parse_doc("<div class=\"foo\" id=\"bar\">match</div><div class=\"foo\">no id</div>");
281 let result = find(&doc, "div.foo#bar").unwrap();
282 assert!(result.is_some());
283 }
284
285 #[test]
286 fn test_find_with_descendant_combinator() {
287 let doc = parse_doc("<div><ul><li>item</li></ul></div>");
288 let result = find(&doc, "div li").unwrap();
289 assert!(result.is_some());
290 }
291
292 #[test]
293 fn test_find_with_child_combinator() {
294 let doc =
295 parse_doc("<div><span>direct</span></div><div><ul><span>nested</span></ul></div>");
296 let results = find_all(&doc, "div > span").unwrap();
297 assert_eq!(results.len(), 1);
298 }
299
300 #[test]
301 fn test_find_within_scope() {
302 let doc = parse_doc("<div id=\"a\"><span>A</span></div><div id=\"b\"><span>B</span></div>");
303
304 let scope = doc
306 .nodes()
307 .find(|(_, n)| {
308 n.kind.attributes().and_then(|a| a.get("id")).is_some_and(|id| id == "b")
309 })
310 .map(|(id, _)| id)
311 .unwrap();
312
313 let result = find_within(&doc, scope, "span").unwrap();
315 assert!(result.is_some());
316
317 let span_id = result.unwrap();
319 let span_parent = doc.parent(span_id).unwrap();
320 assert_eq!(span_parent, scope);
321 }
322
323 #[test]
324 fn test_find_all_within_scope() {
325 let doc = parse_doc("<ul id=\"list\"><li>1</li><li>2</li></ul><li>outside</li>");
326
327 let scope = doc
329 .nodes()
330 .find(|(_, n)| {
331 n.kind.attributes().and_then(|a| a.get("id")).is_some_and(|id| id == "list")
332 })
333 .map(|(id, _)| id)
334 .unwrap();
335
336 let results = find_all_within(&doc, scope, "li").unwrap();
337 assert_eq!(results.len(), 2); }
339
340 #[test]
341 fn test_find_returns_first_match() {
342 let doc = parse_doc(
343 "<div class=\"item\" id=\"first\">1</div><div class=\"item\" id=\"second\">2</div>",
344 );
345 let result = find(&doc, ".item").unwrap();
346 assert!(result.is_some());
347
348 let id = result.unwrap();
349 let attrs = doc.get(id).unwrap().kind.attributes().unwrap();
350 assert_eq!(attrs.get("id"), Some(&"first".to_string()));
351 }
352
353 #[test]
354 fn test_find_all_preserves_order() {
355 let doc = parse_doc("<ul><li id=\"a\">A</li><li id=\"b\">B</li><li id=\"c\">C</li></ul>");
356 let results = find_all(&doc, "li").unwrap();
357
358 let ids: Vec<_> = results
359 .iter()
360 .map(|id| {
361 doc.get(*id).and_then(|n| n.kind.attributes()).and_then(|a| a.get("id").cloned())
362 })
363 .collect();
364
365 assert_eq!(ids, vec![Some("a".into()), Some("b".into()), Some("c".into())]);
366 }
367
368 #[test]
369 fn test_find_empty_document() {
370 let doc = Document::new();
371 let result = find(&doc, "div").unwrap();
372 assert!(result.is_none());
373 }
374
375 #[test]
376 fn test_find_all_empty_document() {
377 let doc = Document::new();
378 let results = find_all(&doc, "div").unwrap();
379 assert!(results.is_empty());
380 }
381
382 #[test]
383 fn test_find_with_attribute_selector() {
384 let doc = parse_doc("<input type=\"text\"><input type=\"password\">");
385 let result = find(&doc, "input[type=\"text\"]").unwrap();
386 assert!(result.is_some());
387 }
388
389 #[test]
390 fn test_find_all_multiple_selectors() {
391 let doc = parse_doc("<div>a</div><span>b</span><p>c</p>");
392 let results = find_all(&doc, "div, span").unwrap();
393 assert_eq!(results.len(), 2);
394 }
395
396 #[test]
397 fn test_find_universal_selector() {
398 let doc = parse_doc("<div><span>text</span></div>");
399 let results = find_all(&doc, "*").unwrap();
400 assert!(results.len() >= 2);
402 }
403}