use redact_core::{
policy::{PatternRule, Policy, PolicyActions, PolicyStatus, RedactionConfig},
AnalyzerEngine, AnonymizationStrategy, AnonymizerConfig, EntityType,
};
fn create_test_policy(pattern_rules: Vec<PatternRule>) -> Policy {
Policy {
id: "test-policy".to_string(),
name: "test".to_string(),
display_name: "Test Policy".to_string(),
organization_id: "test-org".to_string(),
status: PolicyStatus::Active,
priority: 100,
description: "Integration test policy".to_string(),
conditions: vec![],
pattern_rules,
redaction_config: RedactionConfig {
default_mode: "replace".to_string(),
enabled_categories: vec!["all".to_string()],
},
actions: PolicyActions {
action: "redact".to_string(),
redact_fields: vec!["content".to_string()],
},
}
}
fn create_pattern_rule(pattern_id: &str, confidence: f32) -> PatternRule {
PatternRule {
pattern_id: pattern_id.to_string(),
name: pattern_id.to_lowercase(),
enabled: true,
mode: "replace".to_string(),
strategy: "semantic".to_string(),
confidence,
replacement: format!("[{}]", pattern_id),
}
}
#[test]
fn test_contact_information_entities() {
let engine = AnalyzerEngine::new();
let text = "Contact us at support@example.com or call (555) 123-4567. \
Visit https://example.com or use IP 192.168.1.1";
let policy = create_test_policy(vec![
create_pattern_rule("EMAIL_ADDRESS", 0.8),
create_pattern_rule("PHONE_NUMBER", 0.8),
create_pattern_rule("URL", 0.7),
create_pattern_rule("IP_ADDRESS", 0.7),
]);
let analysis = engine.analyze(text, None).unwrap();
let filtered = policy.apply(analysis.detected_entities);
assert!(
filtered.len() >= 2,
"Expected at least 2 contact entities to be detected"
);
assert!(
filtered
.iter()
.any(|e| e.entity_type == EntityType::EmailAddress),
"Email should always be detected"
);
let config = AnonymizerConfig {
strategy: AnonymizationStrategy::Replace,
..Default::default()
};
let anonymized = engine.anonymize(text, None, &config).unwrap();
assert!(
anonymized.entities.len() >= 2,
"Expected at least 2 entities to be redacted"
);
assert!(anonymized.text.contains("[EMAIL_ADDRESS]"));
assert!(!anonymized.text.contains("support@example.com"));
let has_other_redactions = anonymized.text.contains("[PHONE_NUMBER]")
|| anonymized.text.contains("[URL]")
|| anonymized.text.contains("[IP_ADDRESS]");
assert!(
has_other_redactions,
"Expected at least one other entity type to be redacted"
);
}
#[test]
fn test_financial_entities() {
let engine = AnalyzerEngine::new();
let text = "Credit card: 4532015112830366, IBAN: GB82 WEST 1234 5698 7654 32";
let policy = create_test_policy(vec![
create_pattern_rule("CREDIT_CARD", 0.9),
create_pattern_rule("IBAN_CODE", 0.8),
]);
let analysis = engine.analyze(text, None).unwrap();
let filtered = policy.apply(analysis.detected_entities);
assert!(filtered
.iter()
.any(|e| e.entity_type == EntityType::CreditCard));
let config = AnonymizerConfig {
strategy: AnonymizationStrategy::Mask,
mask_char: '*',
mask_start_chars: 4,
mask_end_chars: 4,
preserve_format: false,
..Default::default()
};
let anonymized = engine.anonymize(text, None, &config).unwrap();
assert!(anonymized.text.contains("4532"));
assert!(anonymized.text.contains("0366"));
assert!(anonymized.text.contains("****"));
}
#[test]
fn test_us_specific_entities() {
let engine = AnalyzerEngine::new();
let text = "SSN: 123-45-6789, Driver License: A1234567, Passport: 123456789";
let policy = create_test_policy(vec![
create_pattern_rule("US_SSN", 0.9),
create_pattern_rule("US_DRIVER_LICENSE", 0.8),
create_pattern_rule("US_PASSPORT", 0.8),
]);
let analysis = engine.analyze(text, None).unwrap();
let filtered = policy.apply(analysis.detected_entities);
assert!(filtered.iter().any(|e| e.entity_type == EntityType::UsSsn));
let config = AnonymizerConfig {
strategy: AnonymizationStrategy::Replace,
..Default::default()
};
let anonymized = engine.anonymize(text, None, &config).unwrap();
assert!(anonymized.text.contains("[US_SSN]"));
assert!(!anonymized.text.contains("123-45-6789"));
}
#[test]
fn test_uk_specific_entities() {
let engine = AnalyzerEngine::new();
let text = "Patient NHS number: 123 456 7890, NINO: AB123456C, Postcode: SW1A 1AA, \
Phone: +44 20 7946 0958, Mobile: 07700 900123, Sort Code: 12-34-56";
let policy = create_test_policy(vec![
create_pattern_rule("UK_NHS", 0.6), create_pattern_rule("UK_NINO", 0.8),
create_pattern_rule("UK_POSTCODE", 0.7),
create_pattern_rule("UK_PHONE_NUMBER", 0.7),
create_pattern_rule("UK_MOBILE_NUMBER", 0.7),
create_pattern_rule("UK_SORT_CODE", 0.7),
]);
let analysis = engine.analyze(text, None).unwrap();
let filtered = policy.apply(analysis.detected_entities);
let entity_types: Vec<EntityType> = filtered.iter().map(|e| e.entity_type.clone()).collect();
let uk_entity_count = entity_types
.iter()
.filter(|e| {
matches!(
e,
EntityType::UkNhs
| EntityType::UkNino
| EntityType::UkPostcode
| EntityType::UkPhoneNumber
| EntityType::UkMobileNumber
| EntityType::UkSortCode
)
})
.count();
assert!(
uk_entity_count >= 1,
"Expected at least 1 UK entity to be detected, found {}",
uk_entity_count
);
let config = AnonymizerConfig {
strategy: AnonymizationStrategy::Replace,
..Default::default()
};
let anonymized = engine.anonymize(text, None, &config).unwrap();
assert!(
!anonymized.entities.is_empty(),
"Expected at least 1 entity to be redacted"
);
}
#[test]
fn test_crypto_entities() {
let engine = AnalyzerEngine::new();
let text = "Bitcoin: 1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa, \
Ethereum: 0x742d35Cc6634C0532925a3b844Bc9e7595f01234";
let policy = create_test_policy(vec![
create_pattern_rule("BTC_ADDRESS", 0.9),
create_pattern_rule("ETH_ADDRESS", 0.9),
]);
let analysis = engine.analyze(text, None).unwrap();
let filtered = policy.apply(analysis.detected_entities);
let has_crypto = filtered.iter().any(|e| {
e.entity_type == EntityType::BtcAddress || e.entity_type == EntityType::EthAddress
});
assert!(
has_crypto,
"Expected at least one crypto address to be detected"
);
let config = AnonymizerConfig {
strategy: AnonymizationStrategy::Hash,
hash_salt: Some("test_salt".to_string()),
..Default::default()
};
let anonymized = engine.anonymize(text, None, &config).unwrap();
assert!(!anonymized
.text
.contains("1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa"));
assert!(!anonymized
.text
.contains("0x742d35Cc6634C0532925a3b844Bc9e7595f01234"));
}
#[test]
fn test_technical_entities() {
let engine = AnalyzerEngine::new();
let text = "GUID: 550e8400-e29b-41d4-a716-446655440000, \
MAC: 00:1B:44:11:3A:B7, \
MD5: 5d41402abc4b2a76b9719d911017c592";
let policy = create_test_policy(vec![
create_pattern_rule("GUID", 0.9),
create_pattern_rule("MAC_ADDRESS", 0.9),
create_pattern_rule("MD5_HASH", 0.8),
]);
let analysis = engine.analyze(text, None).unwrap();
let filtered = policy.apply(analysis.detected_entities);
let tech_count = filtered
.iter()
.filter(|e| {
e.entity_type == EntityType::Guid
|| e.entity_type == EntityType::MacAddress
|| e.entity_type == EntityType::Md5Hash
})
.count();
assert!(
tech_count >= 2,
"Expected at least 2 technical entities to be detected, found {}",
tech_count
);
let config = AnonymizerConfig {
strategy: AnonymizationStrategy::Replace,
..Default::default()
};
let anonymized = engine.anonymize(text, None, &config).unwrap();
assert!(anonymized.text.contains("[GUID]"));
assert!(anonymized.text.contains("[MAC_ADDRESS]"));
assert!(anonymized.text.contains("[MD5_HASH]"));
}
#[test]
fn test_policy_confidence_threshold_filtering() {
let engine = AnalyzerEngine::new();
let text = "Email: test@example.com";
let policy = create_test_policy(vec![
create_pattern_rule("EMAIL_ADDRESS", 0.95), ]);
let analysis = engine.analyze(text, None).unwrap();
let initial_count = analysis.detected_entities.len();
assert!(initial_count > 0, "Should detect email");
let filtered = policy.apply(analysis.detected_entities.clone());
let email_results: Vec<_> = filtered
.iter()
.filter(|e| e.entity_type == EntityType::EmailAddress)
.collect();
for result in email_results {
assert!(
result.score >= 0.95 || result.score >= 0.8,
"Filtered results should meet confidence threshold"
);
}
}
#[test]
fn test_policy_disabled_entity() {
let engine = AnalyzerEngine::new();
let text = "Email: test@example.com, Phone: (555) 123-4567";
let email_rule = create_pattern_rule("EMAIL_ADDRESS", 0.8);
let mut phone_rule = create_pattern_rule("PHONE_NUMBER", 0.8);
phone_rule.enabled = false;
let policy = create_test_policy(vec![email_rule, phone_rule]);
let analysis = engine.analyze(text, None).unwrap();
let filtered = policy.apply(analysis.detected_entities);
assert!(filtered
.iter()
.any(|e| e.entity_type == EntityType::EmailAddress));
}
#[test]
fn test_multiple_anonymization_strategies() {
let engine = AnalyzerEngine::new();
let text = "Email: john.doe@example.com, SSN: 123-45-6789";
let replace_config = AnonymizerConfig {
strategy: AnonymizationStrategy::Replace,
..Default::default()
};
let replaced = engine.anonymize(text, None, &replace_config).unwrap();
assert!(replaced.text.contains("[EMAIL_ADDRESS]"));
assert!(replaced.text.contains("[US_SSN]"));
let mask_config = AnonymizerConfig {
strategy: AnonymizationStrategy::Mask,
mask_char: '*',
mask_start_chars: 2,
mask_end_chars: 4,
preserve_format: false,
..Default::default()
};
let masked = engine.anonymize(text, None, &mask_config).unwrap();
assert!(masked.text.contains("**"));
let hash_config = AnonymizerConfig {
strategy: AnonymizationStrategy::Hash,
hash_salt: Some("secret".to_string()),
..Default::default()
};
let hashed = engine.anonymize(text, None, &hash_config).unwrap();
assert!(!hashed.text.contains("john.doe@example.com"));
assert!(!hashed.text.contains("123-45-6789"));
let encrypt_config = AnonymizerConfig {
strategy: AnonymizationStrategy::Encrypt,
encryption_key: Some("test_key".to_string()),
..Default::default()
};
let encrypted = engine.anonymize(text, None, &encrypt_config).unwrap();
assert!(!encrypted.text.contains("john.doe@example.com"));
assert!(!encrypted.text.contains("123-45-6789"));
assert!(encrypted.tokens.is_some());
}
#[test]
fn test_end_to_end_analyze_and_anonymize() {
let engine = AnalyzerEngine::new();
let text = "Patient John Doe (NHS: 123 456 7890) can be reached at john@example.com \
or (555) 123-4567. SSN: 123-45-6789, Credit Card: 4532015112830366. \
Bitcoin wallet: 1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa";
let policy = create_test_policy(vec![
create_pattern_rule("EMAIL_ADDRESS", 0.8),
create_pattern_rule("PHONE_NUMBER", 0.8),
create_pattern_rule("US_SSN", 0.9),
create_pattern_rule("CREDIT_CARD", 0.9),
create_pattern_rule("UK_NHS", 0.9),
create_pattern_rule("BTC_ADDRESS", 0.9),
]);
let analysis = engine.analyze(text, None).unwrap();
assert!(!analysis.detected_entities.is_empty());
let filtered = policy.apply(analysis.detected_entities);
let entity_types: std::collections::HashSet<_> =
filtered.iter().map(|e| e.entity_type.clone()).collect();
assert!(
entity_types.len() >= 3,
"Should detect at least 3 different entity types"
);
let config = AnonymizerConfig {
strategy: AnonymizationStrategy::Replace,
..Default::default()
};
let result = engine.analyze_and_anonymize(text, None, &config).unwrap();
assert!(!result.detected_entities.is_empty());
assert!(result.anonymized.is_some());
let anonymized = result.anonymized.unwrap();
assert!(!anonymized.text.contains("john@example.com"));
assert!(!anonymized.text.contains("123-45-6789"));
assert!(!anonymized.text.contains("4532015112830366"));
assert!(!anonymized
.text
.contains("1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa"));
assert!(anonymized.text.contains("[EMAIL_ADDRESS]"));
assert!(anonymized.text.contains("[US_SSN]"));
assert!(anonymized.text.contains("[CREDIT_CARD]"));
assert!(anonymized.text.contains("[BTC_ADDRESS]"));
assert!(anonymized.text.contains("Patient"));
assert!(anonymized.text.contains("can be reached at"));
}
#[test]
fn test_inactive_policy_no_filtering() {
let engine = AnalyzerEngine::new();
let text = "Email: test@example.com";
let mut policy = create_test_policy(vec![create_pattern_rule("EMAIL_ADDRESS", 0.95)]);
policy.status = PolicyStatus::Inactive;
let analysis = engine.analyze(text, None).unwrap();
let initial_count = analysis.detected_entities.len();
let filtered = policy.apply(analysis.detected_entities);
assert_eq!(filtered.len(), initial_count);
}
#[test]
fn test_entity_type_specific_analysis() {
let engine = AnalyzerEngine::new();
let text = "Email: test@example.com, Phone: (555) 123-4567, SSN: 123-45-6789";
let target_entities = vec![EntityType::EmailAddress, EntityType::UsSsn];
let result = engine
.analyze_with_entities(text, &target_entities, None)
.unwrap();
for entity in result.detected_entities {
assert!(
entity.entity_type == EntityType::EmailAddress
|| entity.entity_type == EntityType::UsSsn,
"Should only detect targeted entity types"
);
}
}
#[test]
fn test_all_entity_categories_comprehensive() {
let engine = AnalyzerEngine::new();
let text = "
Contact: john@example.com, +44 20 7946 0958, 192.168.1.1, https://example.com
Financial: 4532015112830366, GB82 WEST 1234 5698 7654 32
US IDs: 123-45-6789, A1234567
UK IDs: AB123456C, 123 456 7890, SW1A 1AA
Crypto: 1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa, 0x742d35Cc6634C0532925a3b844Bc9e7595f01234
Technical: 550e8400-e29b-41d4-a716-446655440000, 00:1B:44:11:3A:B7
";
let analysis = engine.analyze(text, None).unwrap();
assert!(
analysis.detected_entities.len() >= 10,
"Should detect at least 10 entities from various categories"
);
let entity_types: std::collections::HashSet<_> = analysis
.detected_entities
.iter()
.map(|e| e.entity_type.clone())
.collect();
assert!(
entity_types.len() >= 8,
"Should detect at least 8 different entity types"
);
let config = AnonymizerConfig {
strategy: AnonymizationStrategy::Replace,
..Default::default()
};
let anonymized = engine.anonymize(text, None, &config).unwrap();
assert!(anonymized.entities.len() >= 10);
assert!(!anonymized.text.contains("john@example.com"));
assert!(!anonymized.text.contains("4532015112830366"));
assert!(!anonymized.text.contains("123-45-6789"));
assert!(!anonymized
.text
.contains("1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa"));
}