1use fhp_core::error::SelectorError;
19use fhp_core::hash::{class_bloom_bit, selector_hash};
20use fhp_core::tag::Tag;
21
22use crate::ast::{
23 AttrOp, AttrSelector, Combinator, CompoundSelector, Selector, SelectorList, SimpleSelector,
24};
25
26pub fn parse_selector(input: &str) -> Result<SelectorList, SelectorError> {
34 let trimmed = input.trim();
35 if trimmed.is_empty() {
36 return Err(SelectorError::Invalid {
37 reason: "empty selector".to_string(),
38 });
39 }
40 let mut parser = Parser::new(trimmed);
41 let list = parser.parse_selector_list()?;
42 parser.skip_whitespace();
43 if !parser.is_eof() {
44 return Err(SelectorError::Invalid {
45 reason: format!(
46 "unexpected character '{}' at position {}",
47 parser.peek().unwrap() as char,
48 parser.pos
49 ),
50 });
51 }
52 Ok(list)
53}
54
55pub fn parse_single_selector(input: &str) -> Result<Selector, SelectorError> {
57 let trimmed = input.trim();
58 if trimmed.is_empty() {
59 return Err(SelectorError::Invalid {
60 reason: "empty selector".to_string(),
61 });
62 }
63 let mut parser = Parser::new(trimmed);
64 let sel = parser.parse_complex_selector()?;
65 parser.skip_whitespace();
66 if !parser.is_eof() {
67 return Err(SelectorError::Invalid {
68 reason: format!(
69 "unexpected character '{}' at position {}",
70 parser.peek().unwrap() as char,
71 parser.pos
72 ),
73 });
74 }
75 Ok(sel)
76}
77
78struct Parser<'a> {
80 input: &'a [u8],
81 pos: usize,
82}
83
84impl<'a> Parser<'a> {
85 fn new(input: &'a str) -> Self {
86 Self {
87 input: input.as_bytes(),
88 pos: 0,
89 }
90 }
91
92 fn is_eof(&self) -> bool {
93 self.pos >= self.input.len()
94 }
95
96 fn peek(&self) -> Option<u8> {
97 self.input.get(self.pos).copied()
98 }
99
100 fn advance(&mut self) -> Option<u8> {
101 let b = self.input.get(self.pos).copied()?;
102 self.pos += 1;
103 Some(b)
104 }
105
106 fn skip_whitespace(&mut self) {
107 while self.pos < self.input.len() && self.input[self.pos].is_ascii_whitespace() {
108 self.pos += 1;
109 }
110 }
111
112 fn skip_ws_check(&mut self) -> bool {
114 let before = self.pos;
115 self.skip_whitespace();
116 self.pos > before
117 }
118
119 fn expect(&mut self, expected: u8) -> Result<(), SelectorError> {
120 match self.advance() {
121 Some(b) if b == expected => Ok(()),
122 Some(b) => Err(SelectorError::Invalid {
123 reason: format!(
124 "expected '{}', found '{}' at position {}",
125 expected as char,
126 b as char,
127 self.pos - 1
128 ),
129 }),
130 None => Err(SelectorError::Invalid {
131 reason: format!("expected '{}', found end of input", expected as char),
132 }),
133 }
134 }
135
136 fn read_ident(&mut self) -> Result<String, SelectorError> {
138 let start = self.pos;
139 while self.pos < self.input.len() {
141 let b = self.input[self.pos];
142 if b.is_ascii_alphanumeric() || b == b'-' || b == b'_' {
143 self.pos += 1;
144 } else {
145 break;
146 }
147 }
148 if self.pos == start {
149 return Err(SelectorError::Invalid {
150 reason: format!("expected identifier at position {}", self.pos),
151 });
152 }
153 Ok(String::from_utf8_lossy(&self.input[start..self.pos]).into_owned())
155 }
156
157 fn read_value(&mut self) -> Result<String, SelectorError> {
163 self.skip_whitespace();
164 match self.peek() {
165 Some(b'"') | Some(b'\'') => {
166 let quote = self.advance().unwrap();
167 let start = self.pos;
168 while self.pos < self.input.len() && self.input[self.pos] != quote {
169 self.pos += 1;
170 }
171 if self.is_eof() {
172 return Err(SelectorError::Invalid {
173 reason: "unclosed quote in attribute value".to_string(),
174 });
175 }
176 let value = String::from_utf8_lossy(&self.input[start..self.pos]).into_owned();
177 self.pos += 1; Ok(value)
179 }
180 _ => {
181 let start = self.pos;
183 while self.pos < self.input.len() {
184 let b = self.input[self.pos];
185 if b.is_ascii_whitespace() || b == b']' {
186 break;
187 }
188 self.pos += 1;
189 }
190 if self.pos == start {
191 return Err(SelectorError::Invalid {
192 reason: format!("expected value at position {}", self.pos),
193 });
194 }
195 Ok(String::from_utf8_lossy(&self.input[start..self.pos]).into_owned())
196 }
197 }
198 }
199
200 fn parse_selector_list(&mut self) -> Result<SelectorList, SelectorError> {
202 let mut selectors = Vec::new();
203 selectors.push(self.parse_complex_selector()?);
204 loop {
205 self.skip_whitespace();
206 if self.peek() == Some(b',') {
207 self.advance();
208 self.skip_whitespace();
209 selectors.push(self.parse_complex_selector()?);
210 } else {
211 break;
212 }
213 }
214 Ok(SelectorList { selectors })
215 }
216
217 fn parse_complex_selector(&mut self) -> Result<Selector, SelectorError> {
219 self.skip_whitespace();
220 let first = self.parse_compound_selector()?;
221
222 let mut compounds = vec![first];
223 let mut combinators = Vec::new();
224
225 loop {
226 let had_whitespace = self.skip_ws_check();
227
228 if self.is_eof() {
229 break;
230 }
231
232 match self.peek() {
233 Some(b'>') => {
234 self.advance();
235 self.skip_whitespace();
236 combinators.push(Combinator::Child);
237 compounds.push(self.parse_compound_selector()?);
238 }
239 Some(b'+') => {
240 self.advance();
241 self.skip_whitespace();
242 combinators.push(Combinator::AdjacentSibling);
243 compounds.push(self.parse_compound_selector()?);
244 }
245 Some(b'~') => {
246 self.advance();
247 self.skip_whitespace();
248 combinators.push(Combinator::GeneralSibling);
249 compounds.push(self.parse_compound_selector()?);
250 }
251 Some(b',') | Some(b')') => break,
252 _ if had_whitespace && self.is_compound_start() => {
253 combinators.push(Combinator::Descendant);
254 compounds.push(self.parse_compound_selector()?);
255 }
256 _ => break,
257 }
258 }
259
260 let subject = compounds.pop().unwrap();
264 let mut chain = Vec::new();
265 for (compound, combinator) in compounds.into_iter().zip(combinators.into_iter()).rev() {
266 chain.push((combinator, compound));
267 }
268
269 Ok(Selector { subject, chain })
270 }
271
272 fn is_compound_start(&self) -> bool {
274 matches!(
275 self.peek(),
276 Some(b'#' | b'.' | b'[' | b':' | b'*')
277 | Some(b'a'..=b'z')
278 | Some(b'A'..=b'Z')
279 | Some(b'_')
280 )
281 }
282
283 fn parse_compound_selector(&mut self) -> Result<CompoundSelector, SelectorError> {
285 let mut parts = Vec::new();
286
287 loop {
288 if self.is_eof() {
289 break;
290 }
291
292 match self.peek() {
293 Some(b'#') => {
294 self.advance();
295 let id = self.read_ident()?;
296 let hash = selector_hash(id.as_bytes());
297 parts.push(SimpleSelector::Id(id, hash));
298 }
299 Some(b'.') => {
300 self.advance();
301 let class = self.read_ident()?;
302 let bloom = class_bloom_bit(class.as_bytes());
303 parts.push(SimpleSelector::Class(class, bloom));
304 }
305 Some(b'[') => {
306 parts.push(self.parse_attr_selector()?);
307 }
308 Some(b':') => {
309 parts.push(self.parse_pseudo()?);
310 }
311 Some(b'*') => {
312 self.advance();
313 parts.push(SimpleSelector::Universal);
314 }
315 Some(b) if b.is_ascii_alphabetic() || b == b'_' => {
316 let name = self.read_ident()?;
317 let tag = Tag::from_bytes(name.as_bytes());
318 if tag == Tag::Unknown {
319 parts.push(SimpleSelector::UnknownTag(name));
320 } else {
321 parts.push(SimpleSelector::Tag(tag));
322 }
323 }
324 _ => break,
325 }
326 }
327
328 if parts.is_empty() {
329 return Err(SelectorError::Invalid {
330 reason: format!("expected selector at position {}", self.pos),
331 });
332 }
333
334 Ok(CompoundSelector { parts })
335 }
336
337 fn parse_attr_selector(&mut self) -> Result<SimpleSelector, SelectorError> {
339 self.expect(b'[')?;
340 self.skip_whitespace();
341 let name = self.read_ident()?;
342 self.skip_whitespace();
343
344 if self.peek() == Some(b']') {
345 self.advance();
346 return Ok(SimpleSelector::Attr(AttrSelector {
347 name,
348 op: AttrOp::Exists,
349 value: None,
350 }));
351 }
352
353 let op = match self.peek() {
354 Some(b'=') => {
355 self.advance();
356 AttrOp::Equals
357 }
358 Some(b'~') => {
359 self.advance();
360 self.expect(b'=')?;
361 AttrOp::Includes
362 }
363 Some(b'^') => {
364 self.advance();
365 self.expect(b'=')?;
366 AttrOp::StartsWith
367 }
368 Some(b'$') => {
369 self.advance();
370 self.expect(b'=')?;
371 AttrOp::EndsWith
372 }
373 Some(b'*') => {
374 self.advance();
375 self.expect(b'=')?;
376 AttrOp::Substring
377 }
378 _ => {
379 return Err(SelectorError::Invalid {
380 reason: format!("expected attribute operator at position {}", self.pos),
381 });
382 }
383 };
384
385 let value = self.read_value()?;
386 self.skip_whitespace();
387 self.expect(b']')?;
388
389 Ok(SimpleSelector::Attr(AttrSelector {
390 name,
391 op,
392 value: Some(value),
393 }))
394 }
395
396 fn parse_pseudo(&mut self) -> Result<SimpleSelector, SelectorError> {
398 self.expect(b':')?;
399 let name = self.read_ident()?;
400
401 match name.as_str() {
402 "first-child" => Ok(SimpleSelector::PseudoFirstChild),
403 "last-child" => Ok(SimpleSelector::PseudoLastChild),
404 "nth-child" => {
405 self.expect(b'(')?;
406 let (a, b) = self.parse_nth()?;
407 self.skip_whitespace();
408 self.expect(b')')?;
409 Ok(SimpleSelector::PseudoNthChild { a, b })
410 }
411 "not" => {
412 self.expect(b'(')?;
413 self.skip_whitespace();
414 let inner = self.parse_compound_selector()?;
415 self.skip_whitespace();
416 self.expect(b')')?;
417 Ok(SimpleSelector::PseudoNot(Box::new(inner)))
418 }
419 _ => Err(SelectorError::Invalid {
420 reason: format!("unknown pseudo-class ':{name}'"),
421 }),
422 }
423 }
424
425 fn parse_nth(&mut self) -> Result<(i32, i32), SelectorError> {
427 self.skip_whitespace();
428
429 if self.peek_keyword("odd") {
431 self.pos += 3;
432 return Ok((2, 1));
433 }
434 if self.peek_keyword("even") {
435 self.pos += 4;
436 return Ok((2, 0));
437 }
438
439 let mut sign: i32 = 1;
441 if self.peek() == Some(b'-') {
442 sign = -1;
443 self.advance();
444 } else if self.peek() == Some(b'+') {
445 self.advance();
446 }
447
448 let num_start = self.pos;
449 while self.pos < self.input.len() && self.input[self.pos].is_ascii_digit() {
450 self.pos += 1;
451 }
452
453 let has_number = self.pos > num_start;
454 let number = if has_number {
455 let s = std::str::from_utf8(&self.input[num_start..self.pos]).unwrap();
456 sign * s.parse::<i32>().unwrap_or(0)
457 } else {
458 sign };
460
461 if self.peek() == Some(b'n') || self.peek() == Some(b'N') {
462 self.advance();
463 let a = number;
464 self.skip_whitespace();
465
466 let b = match self.peek() {
467 Some(b'+') => {
468 self.advance();
469 self.skip_whitespace();
470 self.read_int()?
471 }
472 Some(b'-') => {
473 self.advance();
474 self.skip_whitespace();
475 -self.read_int()?
476 }
477 _ => 0,
478 };
479
480 Ok((a, b))
481 } else if has_number {
482 Ok((0, number))
484 } else {
485 Err(SelectorError::Invalid {
486 reason: "invalid :nth-child expression".to_string(),
487 })
488 }
489 }
490
491 fn read_int(&mut self) -> Result<i32, SelectorError> {
493 let start = self.pos;
494 while self.pos < self.input.len() && self.input[self.pos].is_ascii_digit() {
495 self.pos += 1;
496 }
497 if self.pos == start {
498 return Err(SelectorError::Invalid {
499 reason: "expected number".to_string(),
500 });
501 }
502 let s = std::str::from_utf8(&self.input[start..self.pos]).unwrap();
503 Ok(s.parse::<i32>().unwrap_or(0))
504 }
505
506 fn peek_keyword(&self, keyword: &str) -> bool {
508 let bytes = keyword.as_bytes();
509 if self.pos + bytes.len() > self.input.len() {
510 return false;
511 }
512 for (i, &b) in bytes.iter().enumerate() {
513 if !self.input[self.pos + i].eq_ignore_ascii_case(&b) {
514 return false;
515 }
516 }
517 let next_pos = self.pos + bytes.len();
519 if next_pos < self.input.len() && self.input[next_pos].is_ascii_alphanumeric() {
520 return false;
521 }
522 true
523 }
524}
525
526#[cfg(test)]
527mod tests {
528 use super::*;
529
530 #[test]
531 fn parse_tag() {
532 let list = parse_selector("div").unwrap();
533 assert_eq!(list.selectors.len(), 1);
534 let sel = &list.selectors[0];
535 assert!(sel.chain.is_empty());
536 assert_eq!(sel.subject.parts.len(), 1);
537 assert!(matches!(
538 sel.subject.parts[0],
539 SimpleSelector::Tag(Tag::Div)
540 ));
541 }
542
543 #[test]
544 fn parse_class() {
545 let list = parse_selector(".foo").unwrap();
546 let sel = &list.selectors[0];
547 match &sel.subject.parts[0] {
548 SimpleSelector::Class(c, _) => assert_eq!(c, "foo"),
549 _ => panic!("expected class selector"),
550 }
551 }
552
553 #[test]
554 fn parse_id() {
555 let list = parse_selector("#bar").unwrap();
556 let sel = &list.selectors[0];
557 match &sel.subject.parts[0] {
558 SimpleSelector::Id(id, _) => assert_eq!(id, "bar"),
559 _ => panic!("expected id selector"),
560 }
561 }
562
563 #[test]
564 fn parse_universal() {
565 let list = parse_selector("*").unwrap();
566 let sel = &list.selectors[0];
567 assert!(matches!(sel.subject.parts[0], SimpleSelector::Universal));
568 }
569
570 #[test]
571 fn parse_compound() {
572 let list = parse_selector("div.active#main").unwrap();
573 let sel = &list.selectors[0];
574 assert_eq!(sel.subject.parts.len(), 3);
575 assert!(matches!(
576 sel.subject.parts[0],
577 SimpleSelector::Tag(Tag::Div)
578 ));
579 assert!(matches!(&sel.subject.parts[1], SimpleSelector::Class(c, _) if c == "active"));
580 assert!(matches!(&sel.subject.parts[2], SimpleSelector::Id(id, _) if id == "main"));
581 }
582
583 #[test]
584 fn parse_descendant() {
585 let list = parse_selector("div p").unwrap();
586 let sel = &list.selectors[0];
587 assert!(matches!(sel.subject.parts[0], SimpleSelector::Tag(Tag::P)));
588 assert_eq!(sel.chain.len(), 1);
589 assert_eq!(sel.chain[0].0, Combinator::Descendant);
590 assert!(matches!(
591 sel.chain[0].1.parts[0],
592 SimpleSelector::Tag(Tag::Div)
593 ));
594 }
595
596 #[test]
597 fn parse_child() {
598 let list = parse_selector("div > p").unwrap();
599 let sel = &list.selectors[0];
600 assert!(matches!(sel.subject.parts[0], SimpleSelector::Tag(Tag::P)));
601 assert_eq!(sel.chain[0].0, Combinator::Child);
602 }
603
604 #[test]
605 fn parse_adjacent_sibling() {
606 let list = parse_selector("h1 + p").unwrap();
607 let sel = &list.selectors[0];
608 assert!(matches!(sel.subject.parts[0], SimpleSelector::Tag(Tag::P)));
609 assert_eq!(sel.chain[0].0, Combinator::AdjacentSibling);
610 }
611
612 #[test]
613 fn parse_general_sibling() {
614 let list = parse_selector("h1 ~ p").unwrap();
615 let sel = &list.selectors[0];
616 assert!(matches!(sel.subject.parts[0], SimpleSelector::Tag(Tag::P)));
617 assert_eq!(sel.chain[0].0, Combinator::GeneralSibling);
618 }
619
620 #[test]
621 fn parse_attr_exists() {
622 let list = parse_selector("[data-x]").unwrap();
623 let sel = &list.selectors[0];
624 match &sel.subject.parts[0] {
625 SimpleSelector::Attr(a) => {
626 assert_eq!(a.name, "data-x");
627 assert_eq!(a.op, AttrOp::Exists);
628 assert!(a.value.is_none());
629 }
630 _ => panic!("expected attr selector"),
631 }
632 }
633
634 #[test]
635 fn parse_attr_equals() {
636 let list = parse_selector("[href=\"url\"]").unwrap();
637 let sel = &list.selectors[0];
638 match &sel.subject.parts[0] {
639 SimpleSelector::Attr(a) => {
640 assert_eq!(a.name, "href");
641 assert_eq!(a.op, AttrOp::Equals);
642 assert_eq!(a.value.as_deref(), Some("url"));
643 }
644 _ => panic!("expected attr selector"),
645 }
646 }
647
648 #[test]
649 fn parse_attr_includes() {
650 let list = parse_selector("[class~=active]").unwrap();
651 let sel = &list.selectors[0];
652 match &sel.subject.parts[0] {
653 SimpleSelector::Attr(a) => {
654 assert_eq!(a.op, AttrOp::Includes);
655 assert_eq!(a.value.as_deref(), Some("active"));
656 }
657 _ => panic!("expected attr selector"),
658 }
659 }
660
661 #[test]
662 fn parse_attr_starts_with() {
663 let list = parse_selector("[href^=https]").unwrap();
664 let sel = &list.selectors[0];
665 match &sel.subject.parts[0] {
666 SimpleSelector::Attr(a) => assert_eq!(a.op, AttrOp::StartsWith),
667 _ => panic!("expected attr selector"),
668 }
669 }
670
671 #[test]
672 fn parse_attr_ends_with() {
673 let list = parse_selector("[href$=.html]").unwrap();
674 let sel = &list.selectors[0];
675 match &sel.subject.parts[0] {
676 SimpleSelector::Attr(a) => assert_eq!(a.op, AttrOp::EndsWith),
677 _ => panic!("expected attr selector"),
678 }
679 }
680
681 #[test]
682 fn parse_attr_substring() {
683 let list = parse_selector("[href*=example]").unwrap();
684 let sel = &list.selectors[0];
685 match &sel.subject.parts[0] {
686 SimpleSelector::Attr(a) => assert_eq!(a.op, AttrOp::Substring),
687 _ => panic!("expected attr selector"),
688 }
689 }
690
691 #[test]
692 fn parse_first_child() {
693 let list = parse_selector(":first-child").unwrap();
694 assert!(matches!(
695 list.selectors[0].subject.parts[0],
696 SimpleSelector::PseudoFirstChild
697 ));
698 }
699
700 #[test]
701 fn parse_last_child() {
702 let list = parse_selector(":last-child").unwrap();
703 assert!(matches!(
704 list.selectors[0].subject.parts[0],
705 SimpleSelector::PseudoLastChild
706 ));
707 }
708
709 #[test]
710 fn parse_nth_child_number() {
711 let list = parse_selector(":nth-child(3)").unwrap();
712 match list.selectors[0].subject.parts[0] {
713 SimpleSelector::PseudoNthChild { a, b } => {
714 assert_eq!(a, 0);
715 assert_eq!(b, 3);
716 }
717 _ => panic!("expected nth-child"),
718 }
719 }
720
721 #[test]
722 fn parse_nth_child_odd() {
723 let list = parse_selector(":nth-child(odd)").unwrap();
724 match list.selectors[0].subject.parts[0] {
725 SimpleSelector::PseudoNthChild { a, b } => {
726 assert_eq!(a, 2);
727 assert_eq!(b, 1);
728 }
729 _ => panic!("expected nth-child"),
730 }
731 }
732
733 #[test]
734 fn parse_nth_child_even() {
735 let list = parse_selector(":nth-child(even)").unwrap();
736 match list.selectors[0].subject.parts[0] {
737 SimpleSelector::PseudoNthChild { a, b } => {
738 assert_eq!(a, 2);
739 assert_eq!(b, 0);
740 }
741 _ => panic!("expected nth-child"),
742 }
743 }
744
745 #[test]
746 fn parse_nth_child_formula() {
747 let list = parse_selector(":nth-child(2n+1)").unwrap();
748 match list.selectors[0].subject.parts[0] {
749 SimpleSelector::PseudoNthChild { a, b } => {
750 assert_eq!(a, 2);
751 assert_eq!(b, 1);
752 }
753 _ => panic!("expected nth-child"),
754 }
755 }
756
757 #[test]
758 fn parse_nth_child_negative() {
759 let list = parse_selector(":nth-child(-n+3)").unwrap();
760 match list.selectors[0].subject.parts[0] {
761 SimpleSelector::PseudoNthChild { a, b } => {
762 assert_eq!(a, -1);
763 assert_eq!(b, 3);
764 }
765 _ => panic!("expected nth-child"),
766 }
767 }
768
769 #[test]
770 fn parse_not() {
771 let list = parse_selector(":not(.hidden)").unwrap();
772 match &list.selectors[0].subject.parts[0] {
773 SimpleSelector::PseudoNot(inner) => {
774 assert!(matches!(&inner.parts[0], SimpleSelector::Class(c, _) if c == "hidden"));
775 }
776 _ => panic!("expected :not"),
777 }
778 }
779
780 #[test]
781 fn parse_comma_list() {
782 let list = parse_selector("div, span, p").unwrap();
783 assert_eq!(list.selectors.len(), 3);
784 }
785
786 #[test]
787 fn parse_complex_chain() {
788 let list = parse_selector("div > ul li > a.link").unwrap();
790 let sel = &list.selectors[0];
791 assert_eq!(sel.subject.parts.len(), 2);
793 assert!(matches!(sel.subject.parts[0], SimpleSelector::Tag(Tag::A)));
794 assert_eq!(sel.chain.len(), 3);
796 assert_eq!(sel.chain[0].0, Combinator::Child); assert_eq!(sel.chain[1].0, Combinator::Descendant); assert_eq!(sel.chain[2].0, Combinator::Child); }
800
801 #[test]
802 fn parse_empty_error() {
803 assert!(parse_selector("").is_err());
804 assert!(parse_selector(" ").is_err());
805 }
806
807 #[test]
808 fn parse_attr_quoted_value() {
809 let list = parse_selector("[data-value='hello world']").unwrap();
810 match &list.selectors[0].subject.parts[0] {
811 SimpleSelector::Attr(a) => {
812 assert_eq!(a.value.as_deref(), Some("hello world"));
813 }
814 _ => panic!("expected attr selector"),
815 }
816 }
817}