1use std::collections::{HashSet};
2
3use crate::dom::element::HTMLElement;
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6pub enum Combinator {
7 Descendant,
8 Child,
9 Adjacent,
10 Sibling,
11}
12
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub enum AttrOp {
15 Exists,
16 Eq,
17 Prefix,
18 Suffix,
19 Substr,
20 Includes,
21 Dash,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum CaseMode {
26 Sensitive,
27 Insensitive,
28}
29
30#[derive(Debug, Clone)]
31pub struct AttrMatcher {
32 pub name: String,
33 pub op: AttrOp,
34 pub value: String,
35 pub case: CaseMode,
36}
37
38#[derive(Debug, Clone)]
39pub enum NthExpr {
40 Number(i32),
41 Odd,
42 Even,
43 Pattern { a: i32, b: i32 },
44}
45
46#[derive(Debug, Clone)]
47pub enum Pseudo {
48 FirstChild,
49 LastChild,
50 OnlyChild,
51 FirstOfType,
52 LastOfType,
53 OnlyOfType,
54 NthChild(NthExpr),
55 NthLastChild(NthExpr),
56 NthOfType(NthExpr),
57 NthLastOfType(NthExpr),
58 Not(Vec<Selector>),
59 Scope,
60 Is(Vec<Selector>),
61 Where(Vec<Selector>),
62 Has(Vec<Selector>),
63 Empty,
64 Root,
65}
66
67#[derive(Debug, Clone, Default)]
68pub struct CompoundSelector {
69 pub tag: Option<String>,
70 pub id: Option<String>,
71 pub classes: Vec<String>,
72 pub attrs: Vec<AttrMatcher>,
73 pub pseudos: Vec<Pseudo>,
74}
75
76#[derive(Debug, Clone)]
77pub struct Selector(pub Vec<(Option<Combinator>, CompoundSelector)>);
78
79pub fn query_selector_all<'a>(root: &'a HTMLElement, selector_str: &str) -> Vec<&'a HTMLElement> {
80 let selectors = parse_selector_list(selector_str);
81 if selectors.is_empty() {
82 return vec![];
83 }
84 let mut ptr_set: HashSet<*const HTMLElement> = HashSet::new();
85 for sel in selectors {
86 for el in apply_selector(root, &sel) {
87 ptr_set.insert(el as *const HTMLElement);
88 }
89 }
90 let mut ordered = Vec::new();
91 collect_in_order(root, &ptr_set, &mut ordered);
92 ordered
93}
94
95fn collect_in_order<'a>(
96 el: &'a HTMLElement,
97 set: &HashSet<*const HTMLElement>,
98 out: &mut Vec<&'a HTMLElement>,
99) {
100 if !el.is_root() {
101 let p = el as *const HTMLElement;
102 if set.contains(&p) {
103 out.push(el);
104 }
105 }
106 for c in el.iter_elements() {
107 collect_in_order(c, set, out);
108 }
109}
110
111fn apply_selector<'a>(root: &'a HTMLElement, selector: &Selector) -> Vec<&'a HTMLElement> {
112 let mut current: Vec<&HTMLElement> = vec![root];
113 for (idx, (comb, comp)) in selector.0.iter().enumerate() {
114 let mut next_vec: Vec<&HTMLElement> = Vec::new();
115 for base in ¤t {
116 let effective = comb.unwrap_or(Combinator::Descendant);
117 match effective {
118 Combinator::Descendant => {
119 if idx == 0 {
120 if match_compound(root, base, comp) {
121 next_vec.push(base);
122 }
123 }
124 collect_descendants(root, base, comp, &mut next_vec);
125 }
126 Combinator::Child => {
127 for child in base.iter_elements() {
128 if match_compound(root, child, comp) {
129 next_vec.push(child);
130 }
131 }
132 }
133 Combinator::Adjacent => {
134 if let Some(parent) = find_parent(root, base) {
135 let sibs: Vec<&HTMLElement> = parent.iter_elements().collect();
136 if let Some(pos) = sibs.iter().position(|e| std::ptr::eq(*e, *base)) {
137 if pos + 1 < sibs.len() {
138 let n = sibs[pos + 1];
139 if match_compound(root, n, comp) {
140 next_vec.push(n);
141 }
142 }
143 }
144 }
145 }
146 Combinator::Sibling => {
147 if let Some(parent) = find_parent(root, base) {
148 let sibs: Vec<&HTMLElement> = parent.iter_elements().collect();
149 if let Some(pos) = sibs.iter().position(|e| std::ptr::eq(*e, *base)) {
150 for n in sibs.iter().skip(pos + 1) {
151 if match_compound(root, *n, comp) {
152 next_vec.push(*n);
153 }
154 }
155 }
156 }
157 }
158 }
159 }
160 let mut seen: HashSet<*const HTMLElement> = HashSet::new();
161 let mut dedup = Vec::new();
162 for e in next_vec {
163 let p = e as *const HTMLElement;
164 if seen.insert(p) {
165 dedup.push(e);
166 }
167 }
168 current = dedup;
169 }
170 current.into_iter().filter(|e| !e.is_root()).collect()
171}
172
173fn collect_descendants<'a>(
174 root: &'a HTMLElement,
175 el: &'a HTMLElement,
176 comp: &CompoundSelector,
177 out: &mut Vec<&'a HTMLElement>,
178) {
179 for child in el.iter_elements() {
180 if match_compound(root, child, comp) {
181 out.push(child);
182 }
183 collect_descendants(root, child, comp, out);
184 }
185}
186
187fn match_compound<'a>(root: &'a HTMLElement, el: &'a HTMLElement, comp: &CompoundSelector) -> bool {
188 if let Some(tag) = &comp.tag {
189 if !el.name().eq_ignore_ascii_case(tag) {
190 return false;
191 }
192 }
193 if let Some(id) = &comp.id {
194 match el.get_attr("id") {
195 Some(v) if v == id => {}
196 _ => return false,
197 }
198 }
199 for class in &comp.classes {
200 if !el.class_list_view().iter().any(|c| c == class) {
201 return false;
202 }
203 }
204 if !comp.attrs.is_empty() {
205 for matcher in &comp.attrs {
206 let raw_opt = el.get_attr(&matcher.name); match matcher.op {
208 AttrOp::Exists => {
209 if raw_opt.is_none() {
210 return false;
211 }
212 }
213 _ => {
214 if raw_opt.is_none() {
215 return false;
216 }
217 let val = raw_opt.unwrap();
218 let (left, right) = match matcher.case {
219 CaseMode::Insensitive => {
220 (val.to_ascii_lowercase(), matcher.value.to_ascii_lowercase())
221 }
222 CaseMode::Sensitive => (val.to_string(), matcher.value.clone()),
223 };
224 let ok = match matcher.op {
225 AttrOp::Exists => true,
226 AttrOp::Eq => left == right,
227 AttrOp::Prefix => left.starts_with(&right),
228 AttrOp::Suffix => left.ends_with(&right),
229 AttrOp::Substr => left.contains(&right),
230 AttrOp::Includes => left.split_whitespace().any(|t| t == right),
231 AttrOp::Dash => left == right || left.starts_with(&(right + "-")),
232 };
233 if !ok {
234 return false;
235 }
236 }
237 }
238 }
239 }
240 if !comp.pseudos.is_empty() {
241 for p in &comp.pseudos {
242 if !match_pseudo(root, el, p) {
243 return false;
244 }
245 }
246 }
247 true
248}
249
250fn match_pseudo<'a>(root: &'a HTMLElement, el: &'a HTMLElement, pseudo: &Pseudo) -> bool {
251 match pseudo {
252 Pseudo::FirstChild => position_in_parent(root, el)
253 .map(|(i, _, _, _)| i == 0)
254 .unwrap_or(false),
255 Pseudo::LastChild => position_in_parent(root, el)
256 .map(|(i, len, _, _)| i + 1 == len)
257 .unwrap_or(false),
258 Pseudo::OnlyChild => position_in_parent(root, el)
259 .map(|(_, len, _, _)| len == 1)
260 .unwrap_or(false),
261 Pseudo::FirstOfType => position_in_parent(root, el)
262 .map(|(_, _, ti, _)| ti == 0)
263 .unwrap_or(false),
264 Pseudo::LastOfType => position_in_parent(root, el)
265 .map(|(_, _, ti, tlen)| ti + 1 == tlen)
266 .unwrap_or(false),
267 Pseudo::OnlyOfType => position_in_parent(root, el)
268 .map(|(_, _, _, tlen)| tlen == 1)
269 .unwrap_or(false),
270 Pseudo::NthChild(expr) => position_in_parent(root, el)
271 .map(|(i, _, _, _)| match_nth(expr, i as i32 + 1))
272 .unwrap_or(false),
273 Pseudo::NthLastChild(expr) => position_in_parent(root, el)
274 .map(|(i, len, _, _)| {
275 let rev = (len - i - 1) as i32 + 1;
276 match_nth(expr, rev)
277 })
278 .unwrap_or(false),
279 Pseudo::NthOfType(expr) => position_in_parent(root, el)
280 .map(|(_, _, ti, _)| match_nth(expr, ti as i32 + 1))
281 .unwrap_or(false),
282 Pseudo::NthLastOfType(expr) => position_in_parent(root, el)
283 .map(|(_, _, ti, tlen)| {
284 let rev = (tlen - ti - 1) as i32 + 1;
285 match_nth(expr, rev)
286 })
287 .unwrap_or(false),
288 Pseudo::Not(list) => !list.iter().any(|sel| apply_selector_from_el(root, el, sel)),
289 Pseudo::Empty => {
290 el.iter_elements().next().is_none()
291 && el
292 .children
293 .iter()
294 .all(|n| n.as_element().is_some() || n.text().trim().is_empty())
295 }
296 Pseudo::Root => find_parent(root, el).map(|p| p.is_root()).unwrap_or(false),
297 Pseudo::Is(list) => list.iter().any(|sel| apply_selector_from_el(root, el, sel)),
298 Pseudo::Where(list) => list.iter().any(|sel| apply_selector_from_el(root, el, sel)),
299 Pseudo::Has(list) => {
300 fn any_desc<'a>(
302 cur: &'a HTMLElement,
303 root: &'a HTMLElement,
304 list: &Vec<Selector>,
305 ) -> bool {
306 for child in cur.iter_elements() {
307 for sel in list {
308 if apply_selector_from_el(root, child, sel) {
309 return true;
310 }
311 }
312 if any_desc(child, root, list) {
313 return true;
314 }
315 }
316 false
317 }
318 any_desc(el, root, list)
319 }
320 Pseudo::Scope => true,
321 }
322}
323
324fn match_nth(expr: &NthExpr, index_one: i32) -> bool {
325 match expr {
326 NthExpr::Number(n) => index_one == *n,
327 NthExpr::Odd => (index_one % 2) == 1,
328 NthExpr::Even => (index_one % 2) == 0,
329 NthExpr::Pattern { a, b } => {
330 let a = *a;
331 let b = *b;
332 if a == 0 {
333 return index_one == b;
334 }
335 if a > 0 {
336 if index_one < b {
337 return false;
338 }
339 (index_one - b) % a == 0
340 } else {
341 let mut k = 0;
342 loop {
343 let val = a * k + b;
344 if val == index_one {
345 return true;
346 }
347 if val < 1 {
348 return false;
349 }
350 if val < index_one {
351 k += 1;
352 continue;
353 }
354 return false;
355 }
356 }
357 }
358 }
359}
360
361fn position_in_parent<'a>(
362 root: &'a HTMLElement,
363 el: &'a HTMLElement,
364) -> Option<(usize, usize, usize, usize)> {
365 let parent = find_parent(root, el)?;
366 let list: Vec<&HTMLElement> = parent.iter_elements().collect();
367 if list.is_empty() {
368 return None;
369 }
370 let same: Vec<&HTMLElement> = list
371 .iter()
372 .copied()
373 .filter(|c| c.name() == el.name())
374 .collect();
375 let idx = list.iter().position(|e| std::ptr::eq(*e, el))?;
376 let tidx = same.iter().position(|e| std::ptr::eq(*e, el))?;
377 Some((idx, list.len(), tidx, same.len()))
378}
379
380fn find_parent<'a>(root: &'a HTMLElement, target: &'a HTMLElement) -> Option<&'a HTMLElement> {
381 if std::ptr::eq(root, target) {
382 return None;
383 }
384 fn dfs<'a>(cur: &'a HTMLElement, target: &'a HTMLElement) -> Option<&'a HTMLElement> {
385 for child in cur.iter_elements() {
386 if std::ptr::eq(child, target) {
387 return Some(cur);
388 }
389 if let Some(p) = dfs(child, target) {
390 return Some(p);
391 }
392 }
393 None
394 }
395 dfs(root, target)
396}
397
398fn parse_selector_list(input: &str) -> Vec<Selector> {
399 let mut parts = Vec::new();
400 let mut buf = String::new();
401 let mut depth = 0; for c in input.chars() {
403 match c {
404 '(' => {
405 depth += 1;
406 buf.push(c);
407 }
408 ')' => {
409 if depth > 0 {
410 depth -= 1;
411 }
412 buf.push(c);
413 }
414 ',' if depth == 0 => {
415 let t = buf.trim();
416 if !t.is_empty() {
417 parts.push(t.to_string());
418 }
419 buf.clear();
420 }
421 _ => buf.push(c),
422 }
423 }
424 let t = buf.trim();
425 if !t.is_empty() {
426 parts.push(t.to_string());
427 }
428 parts
429 .into_iter()
430 .filter_map(|p| {
431 let tt = p.trim();
432 if tt.is_empty() {
433 None
434 } else {
435 Some(parse_single_selector(tt))
436 }
437 })
438 .collect()
439}
440
441fn parse_single_selector(input: &str) -> Selector {
442 let mut chars = input.chars().peekable();
443 let mut parts: Vec<(Option<Combinator>, CompoundSelector)> = Vec::new();
444 let mut current = CompoundSelector::default();
445 let mut pending: Option<Combinator> = None;
446 while let Some(&ch) = chars.peek() {
447 match ch {
448 ' ' | '\t' | '\n' | '\r' => {
449 chars.next();
450 if !current_is_empty(¤t) {
451 parts.push((pending.take(), current));
452 current = CompoundSelector::default();
453 }
454 pending.get_or_insert(Combinator::Descendant);
455 while let Some(&c2) = chars.peek() {
456 if c2.is_whitespace() {
457 chars.next();
458 } else {
459 break;
460 }
461 }
462 }
463 '>' => {
464 chars.next();
465 if !current_is_empty(¤t) {
466 parts.push((pending.take(), current));
467 current = CompoundSelector::default();
468 }
469 pending = Some(Combinator::Child);
470 skip_ws(&mut chars);
471 }
472 '+' => {
473 chars.next();
474 if !current_is_empty(¤t) {
475 parts.push((pending.take(), current));
476 current = CompoundSelector::default();
477 }
478 pending = Some(Combinator::Adjacent);
479 skip_ws(&mut chars);
480 }
481 '~' => {
482 chars.next();
483 if !current_is_empty(¤t) {
484 parts.push((pending.take(), current));
485 current = CompoundSelector::default();
486 }
487 pending = Some(Combinator::Sibling);
488 skip_ws(&mut chars);
489 }
490 '#' => {
491 chars.next();
492 let ident = read_ident(&mut chars);
493 current.id = Some(ident);
494 }
495 '.' => {
496 chars.next();
497 let ident = read_ident(&mut chars);
498 current.classes.push(ident);
499 }
500 '[' => {
501 chars.next();
502 let name = read_ident(&mut chars);
503 skip_ws(&mut chars);
504 let mut op = AttrOp::Exists;
505 let mut value = String::new();
506 let mut case = CaseMode::Sensitive;
507 if let Some(&c2) = chars.peek() {
508 match c2 {
509 '=' => {
510 op = AttrOp::Eq;
511 chars.next();
512 skip_ws(&mut chars);
513 value = read_attr_value(&mut chars);
514 }
515 '^' | '$' | '*' | '~' | '|' => {
516 let first = c2;
517 chars.next();
518 if matches!(chars.peek(), Some('=')) {
519 chars.next();
520 op = match first {
521 '^' => AttrOp::Prefix,
522 '$' => AttrOp::Suffix,
523 '*' => AttrOp::Substr,
524 '~' => AttrOp::Includes,
525 '|' => AttrOp::Dash,
526 _ => AttrOp::Eq,
527 };
528 skip_ws(&mut chars);
529 value = read_attr_value(&mut chars);
530 }
531 }
532 _ => {}
533 }
534 }
535 skip_ws(&mut chars);
536 if let Some(&flag) = chars.peek() {
537 if flag == 'i' {
538 case = CaseMode::Insensitive;
539 chars.next();
540 } else if flag == 's' {
541 case = CaseMode::Sensitive;
542 chars.next();
543 }
544 }
545 while let Some(c) = chars.next() {
546 if c == ']' {
547 break;
548 }
549 }
550 current.attrs.push(AttrMatcher {
551 name: name.to_lowercase(),
552 op,
553 value,
554 case,
555 });
556 }
557 '*' => {
558 chars.next();
559 }
560 ':' => {
561 chars.next();
562 let pseudo_name = read_ident(&mut chars).to_ascii_lowercase();
563 let pseudo = if matches!(chars.peek(), Some('(')) {
564 chars.next();
565 let arg = read_until_paren(&mut chars);
566 parse_pseudo_with_arg(&pseudo_name, &arg)
567 } else {
568 parse_pseudo_no_arg(&pseudo_name)
569 };
570 if let Some(p) = pseudo {
571 current.pseudos.push(p);
572 }
573 }
574 _ => {
575 if current.tag.is_none() {
576 let ident = read_ident_starting(ch, &mut chars);
577 current.tag = Some(ident.to_lowercase());
578 } else {
579 chars.next();
580 }
581 }
582 }
583 }
584 if !current_is_empty(¤t) || parts.is_empty() {
585 parts.push((pending.take(), current));
586 }
587 Selector(parts)
588}
589
590fn current_is_empty(c: &CompoundSelector) -> bool {
591 c.tag.is_none()
592 && c.id.is_none()
593 && c.classes.is_empty()
594 && c.attrs.is_empty()
595 && c.pseudos.is_empty()
596}
597fn skip_ws<I: Iterator<Item = char>>(it: &mut std::iter::Peekable<I>) {
598 while matches!(it.peek(), Some(c) if c.is_whitespace()) {
599 it.next();
600 }
601}
602fn read_ident<I: Iterator<Item = char>>(it: &mut std::iter::Peekable<I>) -> String {
603 let mut s = String::new();
604 while let Some(&c) = it.peek() {
605 if c.is_alphanumeric() || c == '-' || c == '_' {
606 s.push(c);
607 it.next();
608 } else {
609 break;
610 }
611 }
612 s
613}
614fn read_ident_starting(first: char, it: &mut std::iter::Peekable<std::str::Chars<'_>>) -> String {
615 let mut s = String::new();
616 s.push(first);
617 it.next();
618 while let Some(&c) = it.peek() {
619 if c.is_alphanumeric() || c == '-' || c == '_' {
620 s.push(c);
621 it.next();
622 } else {
623 break;
624 }
625 }
626 s
627}
628fn read_attr_value<I: Iterator<Item = char>>(it: &mut std::iter::Peekable<I>) -> String {
629 if let Some(&q) = it.peek() {
630 if q == '"' || q == '\'' {
631 it.next();
632 let mut val = String::new();
633 while let Some(&c) = it.peek() {
634 it.next();
635 if c == q {
636 break;
637 }
638 val.push(c);
639 }
640 return val;
641 }
642 }
643 read_ident(it)
644}
645fn read_until_paren<I: Iterator<Item = char>>(it: &mut std::iter::Peekable<I>) -> String {
646 let mut depth = 1;
647 let mut s = String::new();
648 while let Some(&c) = it.peek() {
649 it.next();
650 if c == '(' {
651 depth += 1;
652 } else if c == ')' {
653 depth -= 1;
654 if depth == 0 {
655 break;
656 }
657 }
658 s.push(c);
659 }
660 s.trim().to_string()
661}
662fn parse_pseudo_no_arg(name: &str) -> Option<Pseudo> {
663 Some(match name {
664 "first-child" => Pseudo::FirstChild,
665 "last-child" => Pseudo::LastChild,
666 "only-child" => Pseudo::OnlyChild,
667 "first-of-type" => Pseudo::FirstOfType,
668 "last-of-type" => Pseudo::LastOfType,
669 "only-of-type" => Pseudo::OnlyOfType,
670 "empty" => Pseudo::Empty,
671 "root" => Pseudo::Root,
672 "scope" => Pseudo::Scope,
673 _ => return None,
674 })
675}
676fn parse_pseudo_with_arg(name: &str, arg: &str) -> Option<Pseudo> {
677 match name {
678 "nth-child" => Some(Pseudo::NthChild(parse_nth_expr(arg)?)),
679 "nth-last-child" => Some(Pseudo::NthLastChild(parse_nth_expr(arg)?)),
680 "nth-of-type" => Some(Pseudo::NthOfType(parse_nth_expr(arg)?)),
681 "nth-last-of-type" => Some(Pseudo::NthLastOfType(parse_nth_expr(arg)?)),
682 "not" => {
683 let sels = parse_selector_list(arg);
684 if sels.is_empty() {
685 None
686 } else {
687 Some(Pseudo::Not(sels))
688 }
689 }
690 "is" => {
691 let sels = parse_selector_list(arg);
692 if sels.is_empty() {
693 None
694 } else {
695 Some(Pseudo::Is(sels))
696 }
697 }
698 "where" => {
699 let sels = parse_selector_list(arg);
700 if sels.is_empty() {
701 None
702 } else {
703 Some(Pseudo::Where(sels))
704 }
705 }
706 "has" => {
707 let sels = parse_selector_list(arg);
708 if sels.is_empty() {
709 None
710 } else {
711 Some(Pseudo::Has(sels))
712 }
713 }
714 _ => None,
715 }
716}
717fn parse_nth_expr(s: &str) -> Option<NthExpr> {
718 let t = s.trim().to_ascii_lowercase();
719 if t == "odd" {
720 return Some(NthExpr::Odd);
721 }
722 if t == "even" {
723 return Some(NthExpr::Even);
724 }
725 if let Ok(n) = t.parse::<i32>() {
726 if n > 0 {
727 return Some(NthExpr::Number(n));
728 }
729 }
730 if let Some(pos) = t.find('n') {
731 let (a_part, rest) = t.split_at(pos);
732 let rest = &rest[1..];
733 let a = if a_part.is_empty() || a_part == "+" {
734 1
735 } else if a_part == "-" {
736 -1
737 } else {
738 a_part.parse().ok()?
739 };
740 let b = if rest.is_empty() {
741 0
742 } else {
743 let r = rest.trim();
744 if r.starts_with('+') {
745 r[1..].parse().ok()?
746 } else {
747 r.parse().ok()?
748 }
749 };
750 return Some(NthExpr::Pattern { a, b });
751 }
752 None
753}
754
755pub fn apply_selector_from_el<'a>(
756 root: &'a HTMLElement,
757 el: &'a HTMLElement,
758 selector: &Selector,
759) -> bool {
760 let matches = apply_selector(root, selector);
761 matches.into_iter().any(|e| std::ptr::eq(e, el))
762}
763pub fn parse_selector_list_public(input: &str) -> Vec<Selector> {
764 parse_selector_list(input)
765}
766
767pub fn selector_parts(sel: &Selector) -> &Vec<(Option<Combinator>, CompoundSelector)> {
769 &sel.0
770}