pub(crate) use super::*;
#[test]
fn test_extract_emails() {
let extractor = EntityExtractor::new();
let text = "Contact john@example.com or jane@test.org for info";
let entities = extractor.extract(text).expect("should succeed");
assert_eq!(entities.emails.len(), 2);
assert!(entities.emails.contains(&"john@example.com".to_string()));
assert!(entities.emails.contains(&"jane@test.org".to_string()));
}
#[test]
fn test_extract_urls() {
let extractor = EntityExtractor::new();
let text = "Visit https://example.com or http://test.org";
let entities = extractor.extract(text).expect("should succeed");
assert_eq!(entities.urls.len(), 2);
assert!(entities.urls.contains(&"https://example.com".to_string()));
assert!(entities.urls.contains(&"http://test.org".to_string()));
}
#[test]
fn test_extract_phone_numbers() {
let extractor = EntityExtractor::new();
let text = "Call 555-123-4567 or 987-654-3210";
let entities = extractor.extract(text).expect("should succeed");
assert_eq!(entities.phone_numbers.len(), 2);
assert!(entities.phone_numbers.contains(&"555-123-4567".to_string()));
assert!(entities.phone_numbers.contains(&"987-654-3210".to_string()));
}
#[test]
fn test_extract_mentions() {
let extractor = EntityExtractor::new();
let text = "Thanks @john_doe and @jane for your help!";
let entities = extractor.extract(text).expect("should succeed");
assert_eq!(entities.mentions.len(), 2);
assert!(entities.mentions.contains(&"@john_doe".to_string()));
assert!(entities.mentions.contains(&"@jane".to_string()));
}
#[test]
fn test_extract_hashtags() {
let extractor = EntityExtractor::new();
let text = "Check out #rust and #machinelearning topics";
let entities = extractor.extract(text).expect("should succeed");
assert_eq!(entities.hashtags.len(), 2);
assert!(entities.hashtags.contains(&"#rust".to_string()));
assert!(entities.hashtags.contains(&"#machinelearning".to_string()));
}
#[test]
fn test_extract_named_entities() {
let extractor = EntityExtractor::new();
let text = "John Smith visited Paris and London last week";
let entities = extractor.extract(text).expect("should succeed");
assert!(!entities.named_entities.is_empty());
assert!(entities.named_entities.contains(&"John".to_string()));
assert!(entities.named_entities.contains(&"Smith".to_string()));
assert!(entities.named_entities.contains(&"Paris".to_string()));
}
#[test]
fn test_empty_entities() {
let entities = Entities::new();
assert!(entities.is_empty());
assert_eq!(entities.total_count(), 0);
}
#[test]
fn test_with_named_entities_disabled() {
let extractor = EntityExtractor::new().with_named_entities(false);
let text = "John Smith visited Paris and London";
let entities = extractor.extract(text).expect("should succeed");
assert!(
entities.named_entities.is_empty(),
"Named entities should not be extracted when disabled"
);
}
#[test]
fn test_entities_default() {
let entities = Entities::default();
assert!(entities.is_empty());
assert_eq!(entities.total_count(), 0);
}
#[test]
fn test_extractor_default() {
let extractor = EntityExtractor::default();
let entities = extractor.extract("Hello World").expect("should succeed");
assert!(!entities.named_entities.is_empty());
}
#[test]
fn test_is_empty_partial() {
let mut entities = Entities::new();
entities.emails.push("test@example.com".to_string());
assert!(!entities.is_empty());
assert_eq!(entities.total_count(), 1);
}
#[test]
fn test_total_count_mixed() {
let mut entities = Entities::new();
entities.emails.push("a@b.com".to_string());
entities.urls.push("https://x.com".to_string());
entities.phone_numbers.push("555-123-4567".to_string());
entities.mentions.push("@user".to_string());
entities.hashtags.push("#tag".to_string());
entities.named_entities.push("John".to_string());
assert_eq!(entities.total_count(), 6);
assert!(!entities.is_empty());
}
#[test]
fn test_invalid_email_no_at() {
let extractor = EntityExtractor::new();
let text = "Not an email: noatsign.com";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.emails.is_empty());
}
#[test]
fn test_invalid_email_multiple_at() {
let extractor = EntityExtractor::new();
let text = "Bad email: a@@b.com";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.emails.is_empty());
}
#[test]
fn test_invalid_email_empty_local() {
let extractor = EntityExtractor::new();
let text = "Bad email: @domain.com";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.emails.is_empty());
}
#[test]
fn test_invalid_email_no_dot_in_domain() {
let extractor = EntityExtractor::new();
let text = "Bad email: user@localhost";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.emails.is_empty());
}
#[test]
fn test_invalid_email_empty_domain_part() {
let extractor = EntityExtractor::new();
let text = "Bad email: user@.com";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.emails.is_empty());
}
#[test]
fn test_url_with_trailing_punctuation() {
let extractor = EntityExtractor::new();
let text = "Visit https://example.com.";
let entities = extractor.extract(text).expect("should succeed");
assert_eq!(entities.urls.len(), 1);
assert_eq!(entities.urls[0], "https://example.com");
}
#[test]
fn test_url_http_scheme() {
let extractor = EntityExtractor::new();
let text = "Visit http://insecure.example.com/path?q=1";
let entities = extractor.extract(text).expect("should succeed");
assert_eq!(entities.urls.len(), 1);
}
#[test]
fn test_no_url_without_scheme() {
let extractor = EntityExtractor::new();
let text = "Visit www.example.com";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.urls.is_empty());
}
#[test]
fn test_phone_number_wrong_digit_count() {
let extractor = EntityExtractor::new();
let text = "Call 555-123-456";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.phone_numbers.is_empty());
}
#[test]
fn test_phone_number_11_digits() {
let extractor = EntityExtractor::new();
let text = "Call 1-555-123-4567";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.phone_numbers.is_empty());
}
#[test]
fn test_phone_number_wrong_format_10_digits() {
let extractor = EntityExtractor::new();
let text = "Call 55.512.34567";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.phone_numbers.is_empty());
}
#[test]
fn test_mention_with_trailing_punctuation() {
let extractor = EntityExtractor::new();
let text = "Thanks @user123!";
let entities = extractor.extract(text).expect("should succeed");
assert_eq!(entities.mentions.len(), 1);
assert_eq!(entities.mentions[0], "@user123");
}
#[test]
fn test_mention_bare_at_sign() {
let extractor = EntityExtractor::new();
let text = "Just an @ sign";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.mentions.is_empty());
}
#[test]
fn test_mention_with_special_chars() {
let extractor = EntityExtractor::new();
let text = "Hello @user-name";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.mentions.is_empty());
}
#[test]
fn test_hashtag_with_trailing_punctuation() {
let extractor = EntityExtractor::new();
let text = "Check #rustlang!";
let entities = extractor.extract(text).expect("should succeed");
assert_eq!(entities.hashtags.len(), 1);
assert_eq!(entities.hashtags[0], "#rustlang");
}
#[test]
fn test_hashtag_bare_hash() {
let extractor = EntityExtractor::new();
let text = "Just a # sign";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.hashtags.is_empty());
}
#[test]
fn test_hashtag_with_special_chars() {
let extractor = EntityExtractor::new();
let text = "Tag #rust-lang";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.hashtags.is_empty());
}
#[test]
fn test_named_entity_all_caps_excluded() {
let extractor = EntityExtractor::new();
let text = "The NASA and FBI are organizations";
let entities = extractor.extract(text).expect("should succeed");
assert!(!entities.named_entities.contains(&"NASA".to_string()));
assert!(!entities.named_entities.contains(&"FBI".to_string()));
}
#[test]
fn test_named_entity_single_char_excluded() {
let extractor = EntityExtractor::new();
let text = "I went to A store";
let entities = extractor.extract(text).expect("should succeed");
assert!(!entities.named_entities.contains(&"I".to_string()));
assert!(!entities.named_entities.contains(&"A".to_string()));
}
#[test]
fn test_named_entity_deduplication() {
let extractor = EntityExtractor::new();
let text = "John met John and John again";
let entities = extractor.extract(text).expect("should succeed");
let john_count = entities
.named_entities
.iter()
.filter(|e| *e == "John")
.count();
assert_eq!(john_count, 1);
}
#[test]
fn test_named_entity_numbers_only_word() {
let extractor = EntityExtractor::new();
let text = "Number 123 here";
let entities = extractor.extract(text).expect("should succeed");
assert!(!entities.named_entities.contains(&"123".to_string()));
}
#[test]
fn test_entities_clone_eq() {
let mut entities = Entities::new();
entities.emails.push("a@b.com".to_string());
let cloned = entities.clone();
assert_eq!(entities, cloned);
}
#[test]
fn test_entities_debug() {
let entities = Entities::new();
let debug = format!("{:?}", entities);
assert!(debug.contains("Entities"));
}
#[test]
fn test_extractor_debug() {
let extractor = EntityExtractor::new();
let debug = format!("{:?}", extractor);
assert!(debug.contains("EntityExtractor"));
}
#[test]
fn test_empty_text_extraction() {
let extractor = EntityExtractor::new();
let entities = extractor.extract("").expect("should succeed");
assert!(entities.is_empty());
}
#[test]
fn test_extract_all_entity_types() {
let extractor = EntityExtractor::new();
let text = "Contact john@example.com at https://example.com \
and call 555-123-4567 now. Ping @johndoe about #rust. \
John visited Paris.";
let entities = extractor.extract(text).expect("should succeed");
assert!(!entities.emails.is_empty());
assert!(!entities.urls.is_empty());
assert!(!entities.phone_numbers.is_empty());
assert!(!entities.mentions.is_empty());
assert!(!entities.hashtags.is_empty());
assert!(!entities.named_entities.is_empty());
assert!(entities.total_count() >= 6);
}
#[test]
fn test_phone_matches_format_length_mismatch() {
let extractor = EntityExtractor::new();
let text = "Number 12345";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.phone_numbers.is_empty());
}
#[test]
fn test_phone_format_non_digit_in_digit_position() {
let extractor = EntityExtractor::new();
let text = "Call ABC-DEF-GHIJ";
let entities = extractor.extract(text).expect("should succeed");
assert!(entities.phone_numbers.is_empty());
}