1#[cfg(feature = "wasm")]
11mod index;
12
13use serde::Serialize;
14use std::borrow::Cow;
15
16#[derive(Debug, Clone, PartialEq, Serialize)]
22#[cfg_attr(feature = "ts", derive(ts_rs::TS))]
23#[cfg_attr(feature = "flow", derive(flowjs_rs::Flow))]
24#[cfg_attr(feature = "ts", ts(export))]
25#[cfg_attr(feature = "flow", flow(export))]
26#[non_exhaustive]
27pub enum Token {
28 Ident(String),
30 Str(String),
32 Number(f64),
34 Eq,
36 NotEq,
38 WordMatch,
40 PrefixMatch,
42 StartsWith,
44 EndsWith,
46 Contains,
48 Lt,
50 Gt,
52 Lte,
54 Gte,
56 LBracket,
58 RBracket,
60 LParen,
62 RParen,
64 Comma,
66 Dot,
68 Star,
70 Hash,
72 Colon,
74 DoubleColon,
76 Plus,
78 Tilde,
80 Bang,
82 Semicolon,
84 Null,
86 True,
88 False,
90 Not,
92 Is,
94 And,
96 Or,
98 Custom(String),
100}
101
102struct CustomOp {
108 pattern: Vec<u8>,
109 token: Token,
110}
111
112pub struct Tokenizer {
125 custom_ops: Vec<CustomOp>,
126 custom_keywords: Vec<(String, Token)>,
127 transforms: Vec<fn(Vec<Token>) -> Vec<Token>>,
128}
129
130impl Tokenizer {
131 pub fn new() -> Self {
133 Self {
134 custom_ops: Vec::new(),
135 custom_keywords: Vec::new(),
136 transforms: Vec::new(),
137 }
138 }
139
140 pub fn op(mut self, pattern: &str, token: Token) -> Self {
145 self.custom_ops.push(CustomOp {
146 pattern: pattern.as_bytes().to_vec(),
147 token,
148 });
149 self.custom_ops
151 .sort_by(|a, b| b.pattern.len().cmp(&a.pattern.len()));
152 self
153 }
154
155 pub fn keyword(mut self, word: &str, token: Token) -> Self {
160 self.custom_keywords
161 .push((word.to_ascii_lowercase(), token));
162 self
163 }
164
165 pub fn transform(mut self, f: fn(Vec<Token>) -> Vec<Token>) -> Self {
170 self.transforms.push(f);
171 self
172 }
173
174 #[must_use = "tokenizing is useless without inspecting the result"]
176 pub fn tokenize(&self, input: &str) -> Result<Vec<Token>, String> {
177 let mut tokens = Vec::new();
178 let bytes = input.as_bytes();
179 let mut pos = 0;
180
181 while pos < bytes.len() {
182 if bytes[pos].is_ascii_whitespace() {
183 pos += 1;
184 continue;
185 }
186
187 if bytes[pos] == b'"' || bytes[pos] == b'\'' {
189 let (s, new_pos) = lex_quoted_string(bytes, pos)?;
190 tokens.push(Token::Str(s));
191 pos = new_pos;
192 continue;
193 }
194
195 if let Some((len, tok)) = self.match_custom_op(&bytes[pos..]) {
197 tokens.push(tok);
198 pos += len;
199 continue;
200 }
201
202 if pos + 1 < bytes.len() {
204 let two = &bytes[pos..pos + 2];
205 let op = match_builtin_two_char(two);
206 if let Some(tok) = op {
207 tokens.push(tok);
208 pos += 2;
209 continue;
210 }
211 }
212
213 if let Some(tok) = match_builtin_single(bytes[pos]) {
215 tokens.push(tok);
216 pos += 1;
217 continue;
218 }
219
220 if bytes[pos].is_ascii_digit()
222 || (bytes[pos] == b'-' && pos + 1 < bytes.len() && bytes[pos + 1].is_ascii_digit())
223 {
224 let (tok, new_pos) = lex_number(input, bytes, pos)?;
225 tokens.push(tok);
226 pos = new_pos;
227 continue;
228 }
229
230 if is_ident_start(bytes[pos]) {
232 let start = pos;
233 while pos < bytes.len() && is_ident_char(bytes[pos]) {
234 pos += 1;
235 }
236 let word = &input[start..pos];
237 let tok = self.resolve_keyword(word);
238 tokens.push(tok);
239 continue;
240 }
241
242 return Err(format!(
243 "Unexpected character '{}' at position {}",
244 bytes[pos] as char, pos
245 ));
246 }
247
248 for transform in &self.transforms {
250 tokens = transform(tokens);
251 }
252
253 Ok(tokens)
254 }
255
256 fn match_custom_op(&self, remaining: &[u8]) -> Option<(usize, Token)> {
257 for custom in &self.custom_ops {
258 if remaining.starts_with(&custom.pattern) {
259 return Some((custom.pattern.len(), custom.token.clone()));
260 }
261 }
262 None
263 }
264
265 fn resolve_keyword(&self, word: &str) -> Token {
266 let lower = word.to_ascii_lowercase();
267 for (kw, tok) in &self.custom_keywords {
269 if lower == *kw {
270 return tok.clone();
271 }
272 }
273 match lower.as_str() {
275 "null" => Token::Null,
276 "true" => Token::True,
277 "false" => Token::False,
278 "not" => Token::Not,
279 "is" => Token::Is,
280 "and" => Token::And,
281 "or" => Token::Or,
282 _ => Token::Ident(word.to_owned()),
283 }
284 }
285}
286
287impl Default for Tokenizer {
288 fn default() -> Self {
289 Self::new()
290 }
291}
292
293fn match_builtin_two_char(two: &[u8]) -> Option<Token> {
298 match two {
299 b"~=" => Some(Token::WordMatch),
300 b"|=" => Some(Token::PrefixMatch),
301 b"^=" => Some(Token::StartsWith),
302 b"$=" => Some(Token::EndsWith),
303 b"*=" => Some(Token::Contains),
304 b"!=" => Some(Token::NotEq),
305 b"<=" => Some(Token::Lte),
306 b">=" => Some(Token::Gte),
307 b"::" => Some(Token::DoubleColon),
308 _ => None,
309 }
310}
311
312fn match_builtin_single(ch: u8) -> Option<Token> {
313 match ch {
314 b'=' => Some(Token::Eq),
315 b'<' => Some(Token::Lt),
316 b'>' => Some(Token::Gt),
317 b'[' => Some(Token::LBracket),
318 b']' => Some(Token::RBracket),
319 b'(' => Some(Token::LParen),
320 b')' => Some(Token::RParen),
321 b',' => Some(Token::Comma),
322 b'.' => Some(Token::Dot),
323 b'*' => Some(Token::Star),
324 b'#' => Some(Token::Hash),
325 b':' => Some(Token::Colon),
326 b'+' => Some(Token::Plus),
327 b'~' => Some(Token::Tilde),
328 b'!' => Some(Token::Bang),
329 b';' => Some(Token::Semicolon),
330 _ => None,
331 }
332}
333
334fn lex_number(input: &str, bytes: &[u8], start: usize) -> Result<(Token, usize), String> {
335 let mut pos = start;
336 if bytes[pos] == b'-' {
337 pos += 1;
338 }
339 while pos < bytes.len() && bytes[pos].is_ascii_digit() {
340 pos += 1;
341 }
342 if pos < bytes.len() && bytes[pos] == b'.' {
343 pos += 1;
344 while pos < bytes.len() && bytes[pos].is_ascii_digit() {
345 pos += 1;
346 }
347 }
348 let num_str = &input[start..pos];
349 let n = num_str
350 .parse::<f64>()
351 .map_err(|e| format!("Invalid number '{num_str}': {e}"))?;
352 Ok((Token::Number(n), pos))
353}
354
355#[must_use = "tokenizing is useless without inspecting the result"]
363pub fn tokenize(input: &str) -> Result<Vec<Token>, String> {
364 Tokenizer::new().tokenize(input)
367}
368
369fn lex_quoted_string(bytes: &[u8], start: usize) -> Result<(String, usize), String> {
370 let quote = bytes[start];
371 let mut pos = start + 1;
372 let mut result = String::new();
373 while pos < bytes.len() && bytes[pos] != quote {
374 if bytes[pos] == b'\\' {
375 pos += 1;
376 if pos < bytes.len() {
377 result.push(bytes[pos] as char);
378 pos += 1;
379 }
380 } else {
381 result.push(bytes[pos] as char);
382 pos += 1;
383 }
384 }
385 if pos >= bytes.len() {
386 return Err(format!(
387 "Unterminated string starting at position {}",
388 start
389 ));
390 }
391 pos += 1; Ok((result, pos))
393}
394
395#[cfg_attr(feature = "ts", derive(ts_rs::TS))]
401#[cfg_attr(feature = "flow", derive(flowjs_rs::Flow))]
402#[cfg_attr(feature = "ts", ts(export))]
403#[cfg_attr(feature = "flow", flow(export))]
404#[derive(Debug, Clone, PartialEq, Serialize)]
405#[non_exhaustive]
406pub enum PredicateValue {
407 String(String),
409 Number(f64),
411 Bool(bool),
413 Null,
415}
416
417impl PredicateValue {
418 pub fn as_str_repr(&self) -> Cow<'_, str> {
420 match self {
421 PredicateValue::String(s) => Cow::Borrowed(s.as_str()),
422 PredicateValue::Number(n) => Cow::Owned(n.to_string()),
423 PredicateValue::Bool(b) => Cow::Owned(b.to_string()),
424 PredicateValue::Null => Cow::Borrowed(""),
425 }
426 }
427
428 pub fn as_f64(&self) -> Option<f64> {
430 match self {
431 PredicateValue::Number(n) => Some(*n),
432 PredicateValue::String(s) => s.parse().ok(),
433 _ => None,
434 }
435 }
436}
437
438#[cfg_attr(feature = "ts", derive(ts_rs::TS))]
440#[cfg_attr(feature = "flow", derive(flowjs_rs::Flow))]
441#[cfg_attr(feature = "ts", ts(export))]
442#[cfg_attr(feature = "flow", flow(export))]
443#[derive(Debug, Clone, PartialEq, Serialize)]
444pub struct Predicate {
445 pub name: String,
447 #[serde(skip_serializing_if = "Option::is_none")]
449 pub op: Option<PredicateOp>,
450 #[serde(skip_serializing_if = "Option::is_none")]
452 pub value: Option<PredicateValue>,
453}
454
455#[cfg_attr(feature = "ts", derive(ts_rs::TS))]
457#[cfg_attr(feature = "flow", derive(flowjs_rs::Flow))]
458#[cfg_attr(feature = "ts", ts(export))]
459#[cfg_attr(feature = "flow", flow(export))]
460#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
461#[non_exhaustive]
462pub enum PredicateOp {
463 Eq,
465 WordMatch,
467 PrefixMatch,
469 StartsWith,
471 EndsWith,
473 Contains,
475 NotEq,
477 Lt,
479 Gt,
481 Lte,
483 Gte,
485}
486
487pub type AttrPredicate = Predicate;
490pub type AttrOp = PredicateOp;
492
493#[must_use = "parsing a predicate is useless without inspecting the result"]
502pub fn parse_predicate(input: &str) -> Result<Predicate, String> {
503 let tokens = tokenize(input.trim())?;
504 let mut pos = 0;
505 let pred = parse_one_predicate(&tokens, &mut pos)?;
506 if pos < tokens.len() {
507 return Err(format!(
508 "Unexpected token after predicate: {:?}",
509 tokens[pos]
510 ));
511 }
512 Ok(pred)
513}
514
515#[must_use = "parsing predicates is useless without inspecting the result"]
519pub fn parse_predicate_list(input: &str) -> Result<Vec<Predicate>, String> {
520 let tokens = tokenize(input.trim())?;
521 if tokens.is_empty() {
522 return Ok(Vec::new());
523 }
524
525 let mut preds = Vec::new();
526 let mut pos = 0;
527 loop {
528 if pos >= tokens.len() {
529 break;
530 }
531 preds.push(parse_one_predicate(&tokens, &mut pos)?);
532 if pos < tokens.len() && tokens[pos] == Token::Comma {
533 pos += 1;
534 } else {
535 break;
536 }
537 }
538
539 if pos < tokens.len() {
540 return Err(format!(
541 "Unexpected token at position {}: {:?}",
542 pos, tokens[pos]
543 ));
544 }
545
546 Ok(preds)
547}
548
549#[must_use = "evaluating a predicate is useless without inspecting the result"]
553pub fn eval_predicate(op: &str, node_val: &str, pred_val: &str) -> Result<bool, String> {
554 let pred = parse_predicate(&format!("x{op}{pred_val}"))?;
555 let resolved_op = pred.op.ok_or_else(|| "missing operator".to_string())?;
556 Ok(eval_op(resolved_op, node_val, pred_val))
557}
558
559#[must_use = "evaluating an operator is useless without inspecting the result"]
564pub fn eval_op(op: PredicateOp, node_val: &str, pred_val: &str) -> bool {
565 match op {
566 PredicateOp::Eq => node_val == pred_val,
567 PredicateOp::NotEq => node_val != pred_val,
568 PredicateOp::WordMatch => node_val.split_whitespace().any(|w| w == pred_val),
569 PredicateOp::StartsWith => node_val.starts_with(pred_val),
570 PredicateOp::EndsWith => node_val.ends_with(pred_val),
571 PredicateOp::Contains => node_val.contains(pred_val),
572 PredicateOp::PrefixMatch => {
573 node_val == pred_val
574 || (node_val.starts_with(pred_val)
575 && node_val.as_bytes().get(pred_val.len()) == Some(&b'-'))
576 }
577 PredicateOp::Lt | PredicateOp::Gt | PredicateOp::Lte | PredicateOp::Gte => {
578 match (node_val.parse::<f64>(), pred_val.parse::<f64>()) {
579 (Ok(a), Ok(b)) => match op {
580 PredicateOp::Lt => a < b,
581 PredicateOp::Gt => a > b,
582 PredicateOp::Lte => a <= b,
583 PredicateOp::Gte => a >= b,
584 _ => unreachable!(),
585 },
586 _ => false,
587 }
588 }
589 }
590}
591
592#[must_use = "evaluating an operator is useless without inspecting the result"]
597pub fn eval_op_typed(op: PredicateOp, node_val: &str, pred_val: &PredicateValue) -> bool {
598 match op {
599 PredicateOp::Lt | PredicateOp::Gt | PredicateOp::Lte | PredicateOp::Gte => {
600 let node_num = node_val.parse::<f64>().ok();
601 let pred_num = pred_val.as_f64();
602 match (node_num, pred_num) {
603 (Some(a), Some(b)) => match op {
604 PredicateOp::Lt => a < b,
605 PredicateOp::Gt => a > b,
606 PredicateOp::Lte => a <= b,
607 PredicateOp::Gte => a >= b,
608 _ => unreachable!(),
609 },
610 _ => false,
611 }
612 }
613 _ => {
614 let pred_str = pred_val.as_str_repr();
615 eval_op(op, node_val, &pred_str)
616 }
617 }
618}
619
620fn token_to_op(tok: &Token) -> Option<PredicateOp> {
621 match tok {
622 Token::Eq => Some(PredicateOp::Eq),
623 Token::NotEq => Some(PredicateOp::NotEq),
624 Token::WordMatch => Some(PredicateOp::WordMatch),
625 Token::PrefixMatch => Some(PredicateOp::PrefixMatch),
626 Token::StartsWith => Some(PredicateOp::StartsWith),
627 Token::EndsWith => Some(PredicateOp::EndsWith),
628 Token::Contains => Some(PredicateOp::Contains),
629 Token::Lt => Some(PredicateOp::Lt),
630 Token::Gt => Some(PredicateOp::Gt),
631 Token::Lte => Some(PredicateOp::Lte),
632 Token::Gte => Some(PredicateOp::Gte),
633 _ => None,
634 }
635}
636
637fn token_to_value(tok: &Token) -> Option<PredicateValue> {
638 match tok {
639 Token::Str(s) => Some(PredicateValue::String(s.clone())),
640 Token::Number(n) => Some(PredicateValue::Number(*n)),
641 Token::True => Some(PredicateValue::Bool(true)),
642 Token::False => Some(PredicateValue::Bool(false)),
643 Token::Null => Some(PredicateValue::Null),
644 Token::Ident(s) => Some(PredicateValue::String(s.clone())),
645 _ => None,
646 }
647}
648
649fn token_as_name(tok: &Token) -> Option<String> {
650 match tok {
651 Token::Null => Some("null".to_string()),
652 Token::True => Some("true".to_string()),
653 Token::False => Some("false".to_string()),
654 Token::Not => Some("not".to_string()),
655 Token::Is => Some("is".to_string()),
656 Token::And => Some("and".to_string()),
657 Token::Or => Some("or".to_string()),
658 _ => None,
659 }
660}
661
662fn parse_one_predicate(tokens: &[Token], pos: &mut usize) -> Result<Predicate, String> {
663 if *pos >= tokens.len() {
664 return Err("Expected predicate, got end of input".to_string());
665 }
666
667 let name = match &tokens[*pos] {
668 Token::Ident(s) => s.clone(),
669 tok => match token_as_name(tok) {
670 Some(s) => s,
671 None => return Err(format!("Expected attribute name, got {:?}", tok)),
672 },
673 };
674 *pos += 1;
675
676 if *pos >= tokens.len() {
678 return Ok(Predicate {
679 name,
680 op: None,
681 value: None,
682 });
683 }
684
685 let op = match token_to_op(&tokens[*pos]) {
686 Some(op) => op,
687 None => {
688 return Ok(Predicate {
689 name,
690 op: None,
691 value: None,
692 });
693 }
694 };
695 *pos += 1;
696
697 if *pos >= tokens.len() {
699 return Err(format!("Expected value after operator for '{name}'"));
700 }
701
702 let value = token_to_value(&tokens[*pos])
703 .ok_or_else(|| format!("Expected value, got {:?}", tokens[*pos]))?;
704 *pos += 1;
705
706 Ok(Predicate {
707 name,
708 op: Some(op),
709 value: Some(value),
710 })
711}
712
713fn is_ident_start(ch: u8) -> bool {
714 ch.is_ascii_alphabetic() || ch == b'_'
715}
716
717fn is_ident_char(ch: u8) -> bool {
718 ch.is_ascii_alphanumeric() || ch == b'_' || ch == b'-'
719}
720
721#[cfg(test)]
722mod tests {
723 use super::*;
724
725 #[test]
728 fn tokenizes_basic_predicate() {
729 let tokens = tokenize(r#"method="GET""#).unwrap();
731
732 assert_eq!(tokens.len(), 3, "should produce 3 tokens");
734 assert_eq!(tokens[0], Token::Ident("method".to_string()), "ident");
735 assert_eq!(tokens[1], Token::Eq, "eq");
736 assert_eq!(tokens[2], Token::Str("GET".to_string()), "string value");
737 }
738
739 #[test]
740 fn tokenizes_numeric_value() {
741 let tokens = tokenize("count>=42.5").unwrap();
743
744 assert_eq!(tokens.len(), 3, "should produce 3 tokens");
746 assert_eq!(tokens[0], Token::Ident("count".to_string()), "ident");
747 assert_eq!(tokens[1], Token::Gte, "gte op");
748 assert_eq!(tokens[2], Token::Number(42.5), "number value");
749 }
750
751 #[test]
752 fn tokenizes_keywords_case_insensitive() {
753 let lower = tokenize("null true false not is and or").unwrap();
755 let upper = tokenize("NULL TRUE FALSE NOT IS AND OR").unwrap();
756 let mixed = tokenize("Null True False Not Is And Or").unwrap();
757
758 let expected = vec![
760 Token::Null,
761 Token::True,
762 Token::False,
763 Token::Not,
764 Token::Is,
765 Token::And,
766 Token::Or,
767 ];
768 assert_eq!(lower, expected, "lowercase keywords");
769 assert_eq!(upper, expected, "uppercase keywords");
770 assert_eq!(mixed, expected, "mixed-case keywords");
771 }
772
773 #[test]
774 fn tokenizes_keywords_in_expression() {
775 let tokens = tokenize("status is not null and active = true or count < 5").unwrap();
777
778 let expected = vec![
780 Token::Ident("status".to_string()),
781 Token::Is,
782 Token::Not,
783 Token::Null,
784 Token::And,
785 Token::Ident("active".to_string()),
786 Token::Eq,
787 Token::True,
788 Token::Or,
789 Token::Ident("count".to_string()),
790 Token::Lt,
791 Token::Number(5.0),
792 ];
793 assert_eq!(tokens, expected, "keyword tokenization");
794 }
795
796 #[test]
797 fn tokenizes_structural_tokens() {
798 let tokens = tokenize("[a.b, c(d)]").unwrap();
800
801 assert_eq!(tokens[0], Token::LBracket, "lbracket");
803 assert_eq!(tokens[1], Token::Ident("a".to_string()), "a");
804 assert_eq!(tokens[2], Token::Dot, "dot");
805 assert_eq!(tokens[3], Token::Ident("b".to_string()), "b");
806 assert_eq!(tokens[4], Token::Comma, "comma");
807 assert_eq!(tokens[5], Token::Ident("c".to_string()), "c");
808 assert_eq!(tokens[6], Token::LParen, "lparen");
809 assert_eq!(tokens[7], Token::Ident("d".to_string()), "d");
810 assert_eq!(tokens[8], Token::RParen, "rparen");
811 assert_eq!(tokens[9], Token::RBracket, "rbracket");
812 }
813
814 #[test]
815 fn tokenizes_selector_tokens() {
816 let tokens = tokenize("div > .class + #id ~ span::before :hover !important;").unwrap();
818
819 let expected = vec![
821 Token::Ident("div".to_string()),
822 Token::Gt,
823 Token::Dot,
824 Token::Ident("class".to_string()),
825 Token::Plus,
826 Token::Hash,
827 Token::Ident("id".to_string()),
828 Token::Tilde,
829 Token::Ident("span".to_string()),
830 Token::DoubleColon,
831 Token::Ident("before".to_string()),
832 Token::Colon,
833 Token::Ident("hover".to_string()),
834 Token::Bang,
835 Token::Ident("important".to_string()),
836 Token::Semicolon,
837 ];
838 assert_eq!(tokens, expected, "selector-style tokens");
839 }
840
841 #[test]
842 fn tokenizes_all_operators() {
843 let tokens = tokenize("~= |= ^= $= *= != <= >= < > =").unwrap();
845
846 let expected = vec![
848 Token::WordMatch,
849 Token::PrefixMatch,
850 Token::StartsWith,
851 Token::EndsWith,
852 Token::Contains,
853 Token::NotEq,
854 Token::Lte,
855 Token::Gte,
856 Token::Lt,
857 Token::Gt,
858 Token::Eq,
859 ];
860 assert_eq!(tokens, expected, "all operators");
861 }
862
863 #[test]
864 fn tokenizes_negative_number() {
865 let tokens = tokenize("val>-2.5").unwrap();
867
868 assert_eq!(tokens.len(), 3, "should produce 3 tokens");
870 assert_eq!(tokens[2], Token::Number(-2.5), "negative float");
871 }
872
873 #[test]
874 fn tokenizes_standalone_star() {
875 let tokens = tokenize("* a").unwrap();
877
878 assert_eq!(tokens[0], Token::Star, "standalone star");
880 assert_eq!(tokens[1], Token::Ident("a".to_string()), "ident after star");
881 }
882
883 #[test]
886 fn parses_presence_only() {
887 let pred = parse_predicate("async").unwrap();
889
890 assert_eq!(pred.name, "async", "name should be parsed");
892 assert_eq!(pred.op, None, "no operator for presence check");
893 assert_eq!(pred.value, None, "no value for presence check");
894 }
895
896 #[test]
897 fn parses_eq_string() {
898 let pred = parse_predicate(r#"method="GET""#).unwrap();
900
901 assert_eq!(pred.name, "method", "name");
903 assert_eq!(pred.op, Some(PredicateOp::Eq), "operator");
904 assert_eq!(
905 pred.value,
906 Some(PredicateValue::String("GET".to_string())),
907 "typed string value"
908 );
909 }
910
911 #[test]
912 fn parses_numeric_value() {
913 let pred = parse_predicate("count>=5").unwrap();
915
916 assert_eq!(pred.name, "count", "name");
918 assert_eq!(pred.op, Some(PredicateOp::Gte), "operator");
919 assert_eq!(
920 pred.value,
921 Some(PredicateValue::Number(5.0)),
922 "typed numeric value"
923 );
924 }
925
926 #[test]
927 fn parses_bool_value() {
928 let pred = parse_predicate("active=true").unwrap();
930
931 assert_eq!(
933 pred.value,
934 Some(PredicateValue::Bool(true)),
935 "typed bool value"
936 );
937 }
938
939 #[test]
940 fn parses_null_value() {
941 let pred = parse_predicate("status=null").unwrap();
943
944 assert_eq!(pred.value, Some(PredicateValue::Null), "typed null value");
946 }
947
948 #[test]
949 fn parses_css_string_operators() {
950 let starts = parse_predicate(r#"name^="handle""#).unwrap();
952 let ends = parse_predicate(r#"name$="Controller""#).unwrap();
953 let contains = parse_predicate(r#"name*="user""#).unwrap();
954 let word = parse_predicate(r#"class~="active""#).unwrap();
955 let prefix = parse_predicate(r#"lang|="en""#).unwrap();
956
957 assert_eq!(
959 starts.op,
960 Some(PredicateOp::StartsWith),
961 "^= should be StartsWith"
962 );
963 assert_eq!(
964 ends.op,
965 Some(PredicateOp::EndsWith),
966 "$= should be EndsWith"
967 );
968 assert_eq!(
969 contains.op,
970 Some(PredicateOp::Contains),
971 "*= should be Contains"
972 );
973 assert_eq!(
974 word.op,
975 Some(PredicateOp::WordMatch),
976 "~= should be WordMatch"
977 );
978 assert_eq!(
979 prefix.op,
980 Some(PredicateOp::PrefixMatch),
981 "|= should be PrefixMatch"
982 );
983 }
984
985 #[test]
986 fn parses_numeric_operators() {
987 let lt = parse_predicate("count<5").unwrap();
989 let gt = parse_predicate("count>5").unwrap();
990 let lte = parse_predicate("count<=5").unwrap();
991 let gte = parse_predicate("count>=5").unwrap();
992 let neq = parse_predicate("status!=200").unwrap();
993
994 assert_eq!(lt.op, Some(PredicateOp::Lt), "< should be Lt");
996 assert_eq!(gt.op, Some(PredicateOp::Gt), "> should be Gt");
997 assert_eq!(lte.op, Some(PredicateOp::Lte), "<= should be Lte");
998 assert_eq!(gte.op, Some(PredicateOp::Gte), ">= should be Gte");
999 assert_eq!(neq.op, Some(PredicateOp::NotEq), "!= should be NotEq");
1000 assert_eq!(lt.value, Some(PredicateValue::Number(5.0)), "numeric value");
1001 }
1002
1003 #[test]
1004 fn parses_predicate_list() {
1005 let preds = parse_predicate_list(r#"method="POST", async, count>=1"#).unwrap();
1007
1008 assert_eq!(preds.len(), 3, "should parse 3 predicates");
1010 assert_eq!(preds[0].name, "method", "first predicate name");
1011 assert_eq!(preds[1].name, "async", "second predicate name");
1012 assert_eq!(preds[2].name, "count", "third predicate name");
1013 assert_eq!(preds[2].op, Some(PredicateOp::Gte), "third predicate op");
1014 }
1015
1016 #[test]
1017 fn parses_escape_sequences() {
1018 let pred = parse_predicate(r#"name="foo\"bar""#).unwrap();
1020
1021 assert_eq!(
1023 pred.value,
1024 Some(PredicateValue::String(r#"foo"bar"#.to_string())),
1025 "should handle escaped quotes"
1026 );
1027 }
1028
1029 #[test]
1030 fn parses_single_quoted_values() {
1031 let pred = parse_predicate("method='POST'").unwrap();
1033
1034 assert_eq!(
1036 pred.value,
1037 Some(PredicateValue::String("POST".to_string())),
1038 "single quotes should work"
1039 );
1040 }
1041
1042 #[test]
1045 fn keywords_as_attribute_names() {
1046 let null_presence = parse_predicate("null").unwrap();
1048 let true_eq = parse_predicate(r#"true="yes""#).unwrap();
1049 let not_presence = parse_predicate("not").unwrap();
1050 let or_eq = parse_predicate("or=1").unwrap();
1051 let list = parse_predicate_list("null, true, false, not, is, and, or").unwrap();
1052
1053 assert_eq!(null_presence.name, "null", "null as attr name");
1055 assert_eq!(null_presence.op, None, "presence check");
1056
1057 assert_eq!(true_eq.name, "true", "true as attr name");
1058 assert_eq!(true_eq.op, Some(PredicateOp::Eq), "eq operator");
1059 assert_eq!(
1060 true_eq.value,
1061 Some(PredicateValue::String("yes".to_string())),
1062 "string value"
1063 );
1064
1065 assert_eq!(not_presence.name, "not", "not as attr name");
1066 assert_eq!(or_eq.name, "or", "or as attr name");
1067
1068 assert_eq!(list.len(), 7, "all keywords as names");
1069 let names: Vec<&str> = list.iter().map(|p| p.name.as_str()).collect();
1070 assert_eq!(
1071 names,
1072 vec!["null", "true", "false", "not", "is", "and", "or"],
1073 "keyword names in list"
1074 );
1075 }
1076
1077 #[test]
1080 fn eval_string_operators() {
1081 assert!(eval_op(PredicateOp::Eq, "GET", "GET"), "exact match");
1083 assert!(!eval_op(PredicateOp::Eq, "GET", "POST"), "exact mismatch");
1084 assert!(eval_op(PredicateOp::NotEq, "GET", "POST"), "not equal");
1085 assert!(
1086 eval_op(PredicateOp::StartsWith, "handleClick", "handle"),
1087 "starts with"
1088 );
1089 assert!(
1090 eval_op(PredicateOp::EndsWith, "UserController", "Controller"),
1091 "ends with"
1092 );
1093 assert!(
1094 eval_op(PredicateOp::Contains, "createUser", "User"),
1095 "contains"
1096 );
1097 assert!(
1098 eval_op(PredicateOp::WordMatch, "foo bar baz", "bar"),
1099 "word match"
1100 );
1101 assert!(
1102 eval_op(PredicateOp::PrefixMatch, "en-US", "en"),
1103 "prefix match with hyphen"
1104 );
1105 assert!(
1106 eval_op(PredicateOp::PrefixMatch, "en", "en"),
1107 "prefix match exact"
1108 );
1109 assert!(
1110 !eval_op(PredicateOp::PrefixMatch, "energy", "en"),
1111 "prefix match no hyphen"
1112 );
1113 }
1114
1115 #[test]
1116 fn eval_numeric_operators() {
1117 assert!(eval_op(PredicateOp::Lt, "3", "5"), "3 < 5");
1119 assert!(!eval_op(PredicateOp::Lt, "5", "3"), "5 not < 3");
1120 assert!(eval_op(PredicateOp::Gt, "5", "3"), "5 > 3");
1121 assert!(eval_op(PredicateOp::Lte, "5", "5"), "5 <= 5");
1122 assert!(eval_op(PredicateOp::Gte, "5", "5"), "5 >= 5");
1123 assert!(
1124 !eval_op(PredicateOp::Lt, "abc", "5"),
1125 "non-numeric returns false"
1126 );
1127 }
1128
1129 #[test]
1130 fn eval_typed_numeric() {
1131 assert!(
1133 eval_op_typed(PredicateOp::Gt, "10", &PredicateValue::Number(5.0)),
1134 "typed numeric comparison"
1135 );
1136 assert!(
1137 eval_op_typed(
1138 PredicateOp::Eq,
1139 "hello",
1140 &PredicateValue::String("hello".to_string())
1141 ),
1142 "typed string comparison"
1143 );
1144 assert!(
1145 !eval_op_typed(PredicateOp::Lt, "abc", &PredicateValue::Number(5.0)),
1146 "non-numeric node returns false"
1147 );
1148 }
1149
1150 #[test]
1153 fn custom_operator() {
1154 let tok = Tokenizer::new().op("=~", Token::Custom("RegexMatch".into()));
1156
1157 let tokens = tok.tokenize("name =~ 'foo.*'").unwrap();
1159
1160 assert_eq!(tokens.len(), 3, "should produce 3 tokens");
1162 assert_eq!(tokens[0], Token::Ident("name".to_string()), "ident");
1163 assert_eq!(tokens[1], Token::Custom("RegexMatch".into()), "custom op");
1164 assert_eq!(tokens[2], Token::Str("foo.*".to_string()), "pattern");
1165 }
1166
1167 #[test]
1168 fn custom_operator_overrides_builtin() {
1169 let tok = Tokenizer::new().op(">=", Token::Custom("GreaterOrEqual".into()));
1171
1172 let tokens = tok.tokenize("count >= 5").unwrap();
1174
1175 assert_eq!(
1177 tokens[1],
1178 Token::Custom("GreaterOrEqual".into()),
1179 "custom overrides builtin"
1180 );
1181 }
1182
1183 #[test]
1184 fn custom_keyword() {
1185 let tok = Tokenizer::new()
1187 .keyword("where", Token::Custom("Where".into()))
1188 .keyword("select", Token::Custom("Select".into()));
1189
1190 let tokens = tok.tokenize("SELECT name WHERE count > 5").unwrap();
1192
1193 assert_eq!(
1195 tokens[0],
1196 Token::Custom("Select".into()),
1197 "custom keyword select"
1198 );
1199 assert_eq!(tokens[1], Token::Ident("name".to_string()), "ident");
1200 assert_eq!(
1201 tokens[2],
1202 Token::Custom("Where".into()),
1203 "custom keyword where"
1204 );
1205 }
1206
1207 #[test]
1208 fn transform_middleware() {
1209 fn strip_commas(tokens: Vec<Token>) -> Vec<Token> {
1211 tokens.into_iter().filter(|t| *t != Token::Comma).collect()
1212 }
1213
1214 let tok = Tokenizer::new().transform(strip_commas);
1215
1216 let tokens = tok.tokenize("a, b, c").unwrap();
1218
1219 assert_eq!(tokens.len(), 3, "commas removed");
1221 assert_eq!(tokens[0], Token::Ident("a".to_string()), "a");
1222 assert_eq!(tokens[1], Token::Ident("b".to_string()), "b");
1223 assert_eq!(tokens[2], Token::Ident("c".to_string()), "c");
1224 }
1225
1226 #[test]
1227 fn chained_transforms() {
1228 fn strip_commas(tokens: Vec<Token>) -> Vec<Token> {
1230 tokens.into_iter().filter(|t| *t != Token::Comma).collect()
1231 }
1232 fn uppercase_idents(tokens: Vec<Token>) -> Vec<Token> {
1233 tokens
1234 .into_iter()
1235 .map(|tok| match tok {
1236 Token::Ident(s) => Token::Ident(s.to_uppercase()),
1237 other => other,
1238 })
1239 .collect()
1240 }
1241
1242 let tok = Tokenizer::new()
1243 .transform(strip_commas)
1244 .transform(uppercase_idents);
1245
1246 let tokens = tok.tokenize("foo, bar").unwrap();
1248
1249 assert_eq!(tokens.len(), 2, "commas stripped");
1251 assert_eq!(tokens[0], Token::Ident("FOO".to_string()), "uppercased");
1252 assert_eq!(tokens[1], Token::Ident("BAR".to_string()), "uppercased");
1253 }
1254
1255 #[test]
1256 fn multi_char_custom_op_priority() {
1257 let tok = Tokenizer::new().op("<=>", Token::Custom("Spaceship".into()));
1259
1260 let tokens = tok.tokenize("a <=> b").unwrap();
1262
1263 assert_eq!(tokens.len(), 3, "should produce 3 tokens");
1265 assert_eq!(
1266 tokens[1],
1267 Token::Custom("Spaceship".into()),
1268 "3-char custom op"
1269 );
1270 }
1271}
1272