use super::regex;
#[test]
fn test_alternation() {
let re = regex("cat|dog");
assert!(re.is_match("I have a cat"));
assert!(re.is_match("I have a dog"));
assert!(!re.is_match("I have a bird"));
}
#[test]
fn test_dot_star() {
let re = regex("a.*b");
assert!(re.is_match("ab"));
assert!(re.is_match("axb"));
assert!(re.is_match("axxxb"));
assert!(!re.is_match("a"));
assert!(!re.is_match("b"));
}
#[test]
fn test_character_class() {
let re = regex("[0-9]+");
let m = re.find("abc123def").unwrap();
assert_eq!(m.as_str(), "123");
}
#[test]
fn test_caret_in_character_class() {
let re = regex("[a^b]");
assert!(re.is_match("a"));
assert!(re.is_match("^"));
assert!(re.is_match("b"));
assert!(!re.is_match("c"));
let re2 = regex("[ab^]");
assert!(re2.is_match("^"));
let re3 = regex(r"[+\-*/=<>!&|^%]+");
assert!(re3.is_match("^"));
assert!(re3.is_match("+"));
assert!(re3.is_match("^%"));
assert!(re3.is_match("!="));
}
#[test]
fn test_tokenization_pattern() {
let pattern = r#"[a-zA-Z_][a-zA-Z0-9_]*|[0-9]+(?:\.[0-9]+)?|[+\-*/=<>!&|^%]+|[(){}\[\];,.]|"[^"]*"|'[^']*'"#;
let re = regex(pattern);
assert!(re.is_match("foo"));
assert!(re.is_match("_bar"));
assert!(re.is_match("123"));
assert!(re.is_match("3.14"));
assert!(re.is_match("+"));
assert!(re.is_match("^"));
assert!(re.is_match("!="));
assert!(re.is_match("("));
assert!(re.is_match(";"));
assert!(re.is_match(r#""hello""#));
assert!(re.is_match("'world'"));
}
#[test]
fn test_shorthand_digit() {
let re = regex("\\d+");
let m = re.find("abc123def").unwrap();
assert_eq!(m.as_str(), "123");
}
#[test]
fn test_shorthand_word() {
let re = regex("\\w+");
let m = re.find("hello world").unwrap();
assert_eq!(m.as_str(), "hello");
}
#[test]
fn test_shorthand_whitespace() {
let re = regex("\\s+");
let m = re.find("hello world").unwrap();
assert_eq!(m.as_str(), " ");
}
#[test]
fn test_plus() {
let re = regex("a+");
assert!(re.is_match("a"));
assert!(re.is_match("aaa"));
assert!(!re.is_match("b"));
}
#[test]
fn test_optional() {
let re = regex("a?");
assert!(re.is_match("a"));
assert!(re.is_match(""));
}
#[test]
fn test_star() {
let re = regex("a*");
assert!(re.is_match(""));
assert!(re.is_match("aaa"));
}
#[test]
fn test_start_anchor() {
let re = regex("^hello");
assert!(re.is_match("hello world"));
assert!(!re.is_match("say hello")); assert!(!re.is_match(" hello")); }
#[test]
fn test_end_anchor() {
let re = regex("world$");
assert!(re.is_match("hello world"));
assert!(!re.is_match("world is big")); assert!(!re.is_match("world ")); }
#[test]
fn test_both_anchors() {
let re = regex("^hello$");
assert!(re.is_match("hello")); assert!(!re.is_match("hello world")); assert!(!re.is_match("say hello")); assert!(!re.is_match(" hello ")); }
#[test]
fn test_anchored_pattern() {
let re = regex("^[a-z]+$");
assert!(re.is_match("hello"));
assert!(re.is_match("world"));
assert!(!re.is_match("hello world")); assert!(!re.is_match("Hello")); assert!(!re.is_match("123")); }
#[test]
fn test_multiline_start_anchor() {
let re = regex("(?m)^hello");
assert!(re.is_match("hello world")); assert!(re.is_match("first\nhello")); assert!(re.is_match("line1\nline2\nhello")); assert!(!re.is_match("say hello")); }
#[test]
fn test_multiline_end_anchor() {
let re = regex("(?m)world$");
assert!(re.is_match("hello world")); assert!(re.is_match("world\nnext")); assert!(!re.is_match("world hello")); }
#[test]
fn test_empty_with_anchors() {
let re = regex("^$");
assert!(re.is_match("")); assert!(!re.is_match("x")); }
#[test]
fn test_exact_repeat() {
let re = regex("a{3}");
assert!(re.is_match("aaa"));
assert!(re.is_match("xaaax"));
assert!(!re.is_match("a"));
assert!(!re.is_match("aa"));
let m = re.find("aaaa").unwrap();
assert_eq!(m.as_str(), "aaa");
assert_eq!(m.len(), 3);
}
#[test]
fn test_exact_repeat_class() {
let re = regex("[a-z]{3}");
let m = re.find("abcd").unwrap();
assert_eq!(m.len(), 3);
assert!(!re.is_match("ab"));
assert!(!re.is_match("a"));
}
#[test]
fn test_bounded_repeat_range() {
let re = regex("[A-Za-z]{8,13}");
assert!(re.is_match("abcdefgh")); assert!(re.is_match("abcdefghijklm"));
assert!(!re.is_match("hello")); assert!(!re.is_match("testing")); assert!(!re.is_match("abc"));
let m = re.find("abcdefghijklmnopqrstuvwxyz").unwrap();
assert!(
m.len() >= 8 && m.len() <= 13,
"expected 8-13, got {}",
m.len()
);
}
#[test]
fn test_bounded_repeat_min_only() {
let re = regex("a{3,}");
assert!(re.is_match("aaa"));
assert!(re.is_match("aaaa"));
assert!(re.is_match("aaaaaaaa"));
assert!(!re.is_match("a"));
assert!(!re.is_match("aa"));
}
#[test]
fn test_bounded_repeat_in_find_iter() {
let re = regex("[A-Za-z]{8,13}");
let text = "ab hello testing abcdefghij worldtesting xy";
let matches: Vec<_> = re.find_iter(text).collect();
for m in &matches {
assert!(
m.len() >= 8 && m.len() <= 13,
"match {:?} has invalid length {}",
m.as_str(),
m.len()
);
}
}
#[test]
fn test_multiword_alternation() {
let re = regex("Sherlock Holmes|John Watson");
let text = "Sherlock Holmes met John Watson";
let matches: Vec<_> = re.find_iter(text).collect();
assert_eq!(matches.len(), 2);
assert_eq!(matches[0].as_str(), "Sherlock Holmes");
assert_eq!(matches[1].as_str(), "John Watson");
}
#[test]
fn test_multiword_alternation_partial() {
let re = regex("Sherlock Holmes|John Watson|Irene Adler");
let text = "Sherlock is here"; assert!(!re.is_match(text));
let text2 = "Sherlock Holmes is here";
assert!(re.is_match(text2));
let m = re.find(text2).unwrap();
assert_eq!(m.as_str(), "Sherlock Holmes");
}
#[test]
fn test_long_alternation_five_options() {
let re = regex("Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty");
let test_cases = [
("Sherlock Holmes", true),
("John Watson", true),
("Irene Adler", true),
("Inspector Lestrade", true),
("Professor Moriarty", true),
("Sherlock", false), ("Holmes", false), ("John", false), ];
for (text, expected) in test_cases {
assert_eq!(
re.is_match(text),
expected,
"is_match({:?}) should be {}",
text,
expected
);
}
}
#[test]
fn test_alternation_match_length() {
let re = regex("cat|dog|bird");
let m1 = re.find("I have a cat").unwrap();
assert_eq!(m1.as_str(), "cat");
assert_eq!(m1.len(), 3);
let m2 = re.find("I have a bird").unwrap();
assert_eq!(m2.as_str(), "bird");
assert_eq!(m2.len(), 4);
}
#[test]
fn test_utf8_bom_handling() {
let text_with_bom = "\u{feff}Hello";
let re = regex(".*");
let matches: Vec<_> = re.find_iter(text_with_bom).collect();
assert!(!matches.is_empty());
}
#[test]
fn test_utf8_multibyte_iteration() {
let text = "héllo wörld";
let re = regex("");
let count = re.find_iter(text).count();
assert!(count > 0);
}
#[test]
fn test_utf8_emoji_iteration() {
let text = "a😀b";
let re = regex("");
let matches: Vec<_> = re.find_iter(text).collect();
assert!(!matches.is_empty());
}
#[test]
fn test_empty_match_advancement() {
let text = "abc";
let re = regex("");
let matches: Vec<_> = re.find_iter(text).collect();
assert_eq!(matches.len(), 4);
}
#[test]
fn test_sherlock_name_patterns() {
let re_sherlock = regex("Sherlock");
let re_holmes = regex("Holmes");
let re_both = regex("Sherlock Holmes");
let text = "Sherlock Holmes is a detective. Holmes solved the case.";
assert_eq!(re_sherlock.find_iter(text).count(), 1);
assert_eq!(re_holmes.find_iter(text).count(), 2);
assert_eq!(re_both.find_iter(text).count(), 1);
}
#[test]
fn test_sherlock_alternation_patterns() {
let re_alt1 = regex("Sherlock|Street");
let re_alt2 = regex("Sherlock|Holmes");
let re_alt3 = regex("Sherlock|Holmes|Watson");
let text = "Sherlock Holmes and Watson walked down Baker Street";
assert!(re_alt1.find_iter(text).count() >= 2); assert!(re_alt2.find_iter(text).count() >= 2); assert!(re_alt3.find_iter(text).count() >= 3); }
#[test]
fn test_long_literal_over_8_chars() {
let re = regex("Investigating");
let text = "Sherlock was Investigating the crime";
let m = re.find(text).unwrap();
assert_eq!(m.as_str(), "Investigating");
assert_eq!(m.len(), 13);
}
#[test]
fn test_long_alternation_all_over_8_chars() {
let re = regex("Investigating|Encyclopedia|Understanding");
let text = "The Encyclopedia contains Understanding of Investigating";
let matches: Vec<_> = re.find_iter(text).collect();
assert_eq!(matches.len(), 3);
assert_eq!(matches[0].as_str(), "Encyclopedia");
assert_eq!(matches[1].as_str(), "Understanding");
assert_eq!(matches[2].as_str(), "Investigating");
}
#[test]
fn test_case_insensitive_basic() {
let re = regex("(?i)sherlock");
let text = "SHERLOCK and Sherlock and sherlock";
let count = re.find_iter(text).count();
assert_eq!(count, 3, "Should match all case variants");
}
#[test]
fn test_case_insensitive_alternation() {
let re = regex("(?i)sherlock|holmes|watson");
let text = "SHERLOCK HOLMES and Watson";
let count = re.find_iter(text).count();
assert_eq!(count, 3);
}
#[test]
fn test_unicode_letter_property() {
let re = regex(r"\p{L}+");
let text = "Hello Wörld";
let matches: Vec<_> = re.find_iter(text).collect();
assert_eq!(matches.len(), 2);
assert_eq!(matches[0].as_str(), "Hello");
assert_eq!(matches[1].as_str(), "Wörld");
}
#[test]
fn test_unicode_dot_all() {
let re = regex("(?s).*");
let text = "Line1\nLine2";
let m = re.find(text).unwrap();
assert_eq!(m.as_str(), text);
}