use redact_core::{AnalyzerEngine, EntityType};
fn create_engine() -> AnalyzerEngine {
AnalyzerEngine::new()
}
fn assert_entity_detected(text: &str, entity_type: EntityType, min_score: f32) {
let engine = create_engine();
let result = engine.analyze(text, None).unwrap();
let found = result
.detected_entities
.iter()
.any(|e| e.entity_type == entity_type && e.score >= min_score);
assert!(
found,
"Failed to detect {:?} in text: '{}'\nDetected: {:?}",
entity_type, text, result.detected_entities
);
}
#[test]
fn test_email_address() {
assert_entity_detected(
"Contact john.doe@example.com for details",
EntityType::EmailAddress,
0.8,
);
assert_entity_detected("admin+test@sub.domain.co.uk", EntityType::EmailAddress, 0.8);
}
#[test]
fn test_phone_number() {
assert_entity_detected("Call (555) 123-4567", EntityType::PhoneNumber, 0.7);
assert_entity_detected("Mobile: 555-123-4567", EntityType::PhoneNumber, 0.7);
assert_entity_detected("Phone 555 123 4567", EntityType::PhoneNumber, 0.7);
}
#[test]
fn test_ip_address() {
assert_entity_detected("Server at 192.168.1.1", EntityType::IpAddress, 0.8);
assert_entity_detected("Connect to 10.0.0.254", EntityType::IpAddress, 0.8);
}
#[test]
fn test_url() {
assert_entity_detected("Visit https://example.com/path", EntityType::Url, 0.7);
assert_entity_detected("Check http://subdomain.example.org", EntityType::Url, 0.7);
}
#[test]
fn test_domain_name() {
assert_entity_detected("Visit example.com for info", EntityType::DomainName, 0.7);
assert_entity_detected("Host: subdomain.example.org", EntityType::DomainName, 0.7);
}
#[test]
fn test_credit_card() {
assert_entity_detected("Card 4532015112830366", EntityType::CreditCard, 0.9);
assert_entity_detected("Mastercard: 5425233430109903", EntityType::CreditCard, 0.9);
assert_entity_detected("Amex: 374245455400126", EntityType::CreditCard, 0.9);
}
#[test]
fn test_iban_code() {
assert_entity_detected("IBAN: GB82WEST12345698765432", EntityType::IbanCode, 0.75);
assert_entity_detected("DE89370400440532013000", EntityType::IbanCode, 0.75);
}
#[test]
fn test_us_bank_number() {
}
#[test]
fn test_us_ssn() {
assert_entity_detected("SSN: 123-45-6789", EntityType::UsSsn, 0.9);
}
#[test]
fn test_us_driver_license() {
}
#[test]
fn test_us_passport() {
assert_entity_detected("Passport: AB1234567", EntityType::PassportNumber, 0.7);
}
#[test]
fn test_us_zip_code() {
assert_entity_detected("ZIP: 12345", EntityType::UsZipCode, 0.6);
assert_entity_detected("ZIP+4: 12345-6789", EntityType::UsZipCode, 0.6);
}
#[test]
fn test_uk_nhs() {
assert_entity_detected("NHS number 401 023 2137", EntityType::UkNhs, 0.7);
}
#[test]
fn test_uk_nino() {
assert_entity_detected("NINO: AB123456C", EntityType::UkNino, 0.85);
}
#[test]
fn test_uk_postcode() {
assert_entity_detected("Postcode: SW1A 1AA", EntityType::UkPostcode, 0.75);
assert_entity_detected("Code: EC1A 1BB", EntityType::UkPostcode, 0.75);
}
#[test]
#[ignore] fn test_uk_phone_number() {
assert_entity_detected("Call 020-7946-0958", EntityType::UkPhoneNumber, 0.7);
}
#[test]
#[ignore] fn test_uk_mobile_number() {
assert_entity_detected("Mobile: 07700 900123", EntityType::UkMobileNumber, 0.7);
}
#[test]
fn test_uk_sort_code() {
assert_entity_detected("Sort code: 12-34-56", EntityType::UkSortCode, 0.7);
}
#[test]
fn test_uk_driver_license() {
}
#[test]
fn test_uk_passport_number() {
assert_entity_detected("Passport: AB1234567", EntityType::PassportNumber, 0.7);
}
#[test]
fn test_uk_company_number() {
}
#[test]
fn test_medical_license() {
}
#[test]
fn test_medical_record_number() {
assert_entity_detected("MRN: ABC123456789", EntityType::MedicalRecordNumber, 0.85);
assert_entity_detected(
"Medical Record: XYZ987654321",
EntityType::MedicalRecordNumber,
0.85,
);
}
#[test]
fn test_passport_number() {
assert_entity_detected(
"International passport: AB1234567",
EntityType::PassportNumber,
0.7,
);
}
#[test]
fn test_age() {
assert_entity_detected("age: 32", EntityType::Age, 0.8);
assert_entity_detected("aged: 67", EntityType::Age, 0.8);
assert_entity_detected("years old: 25", EntityType::Age, 0.8);
}
#[test]
fn test_isbn() {
assert_entity_detected("ISBN 9780306406157", EntityType::Isbn, 0.8);
assert_entity_detected("Book ISBN: 0306406152", EntityType::Isbn, 0.8);
}
#[test]
fn test_po_box() {
assert_entity_detected("Mail to PO BOX 1234", EntityType::PoBox, 0.85);
assert_entity_detected("Address: P.O. BOX 5678", EntityType::PoBox, 0.85);
assert_entity_detected("POST OFFICE BOX 9999", EntityType::PoBox, 0.85);
}
#[test]
fn test_btc_address() {
assert_entity_detected(
"BTC: 1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa",
EntityType::BtcAddress,
0.85,
);
assert_entity_detected(
"bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq",
EntityType::BtcAddress,
0.85,
);
}
#[test]
fn test_eth_address() {
assert_entity_detected(
"ETH: 0x742d35Cc6634C0532925a3b844Bc9e7595f01234",
EntityType::EthAddress,
0.9,
);
}
#[test]
fn test_guid() {
assert_entity_detected(
"ID: 550e8400-e29b-41d4-a716-446655440000",
EntityType::Guid,
0.9,
);
}
#[test]
fn test_mac_address() {
assert_entity_detected("MAC: 00:1B:44:11:3A:B7", EntityType::MacAddress, 0.85);
assert_entity_detected("Address: 00-1B-44-11-3A-B7", EntityType::MacAddress, 0.85);
}
#[test]
fn test_md5_hash() {
assert_entity_detected(
"MD5: 5d41402abc4b2a76b9719d911017c592",
EntityType::Md5Hash,
0.6,
);
}
#[test]
fn test_sha1_hash() {
assert_entity_detected(
"SHA1: f56a192b7913b04c54574d18c28d46e6395428ab",
EntityType::Sha1Hash,
0.6,
);
}
#[test]
fn test_sha256_hash() {
assert_entity_detected(
"SHA256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
EntityType::Sha256Hash,
0.6,
);
}
#[test]
fn test_date_time() {
assert_entity_detected("Born on 1990-01-15", EntityType::DateTime, 0.5);
assert_entity_detected("Timestamp: 2024-12-25T10:30:00", EntityType::DateTime, 0.5);
assert_entity_detected("Date: 2024-12-25", EntityType::DateTime, 0.5);
}
#[test]
fn test_multiple_entity_types_in_text() {
let engine = create_engine();
let text = "Contact john@example.com at 555-123-4567 or visit https://example.com. \
SSN: 123-45-6789, Card: 4532015112830366";
let result = engine.analyze(text, None).unwrap();
assert!(result.detected_entities.len() >= 4);
assert!(result
.detected_entities
.iter()
.any(|e| e.entity_type == EntityType::EmailAddress));
assert!(result
.detected_entities
.iter()
.any(|e| e.entity_type == EntityType::PhoneNumber));
assert!(result
.detected_entities
.iter()
.any(|e| e.entity_type == EntityType::Url));
assert!(result
.detected_entities
.iter()
.any(|e| e.entity_type == EntityType::UsSsn));
assert!(result
.detected_entities
.iter()
.any(|e| e.entity_type == EntityType::CreditCard));
}
#[test]
fn test_entity_with_surrounding_text() {
let engine = create_engine();
let result1 = engine
.analyze("Email: test@example.com here", None)
.unwrap();
assert!(!result1.detected_entities.is_empty());
let result2 = engine.analyze("notemail@exampletext", None).unwrap();
assert!(result2.detected_entities.len() <= 1);
}
#[test]
fn test_case_insensitivity_where_applicable() {
assert_entity_detected("PO BOX 123", EntityType::PoBox, 0.85);
assert_entity_detected("P.O. BOX 123", EntityType::PoBox, 0.85);
assert_entity_detected("POST OFFICE BOX 456", EntityType::PoBox, 0.85);
}
#[test]
fn test_entities_with_various_separators() {
assert_entity_detected("555-123-4567", EntityType::PhoneNumber, 0.7);
assert_entity_detected("555.123.4567", EntityType::PhoneNumber, 0.7);
assert_entity_detected("(555) 123-4567", EntityType::PhoneNumber, 0.7);
assert_entity_detected("4532015112830366", EntityType::CreditCard, 0.9);
}