use pii::anonymize::{AnonymizeConfig, Anonymizer, Operator};
use pii::decision::resolve;
use pii::nlp::{NlpEngine, SimpleNlpEngine};
use pii::presets::default_recognizers;
use pii::recognizers::dictionary::DictionaryRecognizer;
use pii::recognizers::validator::{imei_check, itin_check, luhn_check, routing_check, ssn_check, tax_id_check};
use pii::{Analyzer, PolicyConfig};
use pii::types::{Detection, DetectionExplanation, EntityType, Language};
use std::collections::HashMap;
#[test]
fn test_token_offsets_roundtrip() {
let engine = SimpleNlpEngine::default();
let text = "Hello Jose.";
let artifacts = engine.analyze(text, &Language::from("en")).unwrap();
for token in artifacts.tokens {
assert_eq!(token.text, &text[token.start..token.end]);
}
}
#[test]
fn test_luhn_check() {
assert!(luhn_check("4539 1488 0343 6467"));
assert!(!luhn_check("4539 1488 0343 6468"));
}
#[test]
fn test_routing_check() {
assert!(routing_check("021000021"));
assert!(!routing_check("021000022"));
}
#[test]
fn test_ssn_check() {
assert!(ssn_check("123-45-6789"));
assert!(!ssn_check("000-12-3456"));
}
#[test]
fn test_itin_check() {
assert!(itin_check("912-70-1234"));
assert!(!itin_check("912-69-1234"));
}
#[test]
fn test_tax_id_check() {
assert!(tax_id_check("12-3456789"));
assert!(!tax_id_check("00-1234567"));
}
#[test]
fn test_imei_check() {
assert!(imei_check("490154203237518"));
assert!(!imei_check("490154203237517"));
}
#[test]
fn test_overlap_resolution_prefers_score() {
let det_a = Detection {
entity_type: EntityType::Email,
start: 0,
end: 10,
score: 0.6,
recognizer: "a".to_string(),
explanation: DetectionExplanation::Regex {
pattern_name: "email".to_string(),
},
};
let det_b = Detection {
entity_type: EntityType::Email,
start: 5,
end: 12,
score: 0.9,
recognizer: "b".to_string(),
explanation: DetectionExplanation::Regex {
pattern_name: "email".to_string(),
},
};
let resolved = resolve(vec![det_a, det_b], &|_| 0.0);
assert_eq!(resolved.len(), 1);
assert_eq!(resolved[0].recognizer, "b");
}
#[test]
fn test_overlap_resolution_prefers_validator_on_tie() {
let det_a = Detection {
entity_type: EntityType::Email,
start: 0,
end: 10,
score: 0.8,
recognizer: "regex".to_string(),
explanation: DetectionExplanation::Regex {
pattern_name: "email".to_string(),
},
};
let det_b = Detection {
entity_type: EntityType::Email,
start: 2,
end: 12,
score: 0.8,
recognizer: "validator".to_string(),
explanation: DetectionExplanation::Validator {
validator: "email".to_string(),
passed: true,
},
};
let resolved = resolve(vec![det_a, det_b], &|_| 0.0);
assert_eq!(resolved.len(), 1);
assert_eq!(resolved[0].recognizer, "validator");
}
#[test]
fn test_anonymizer_mask() {
let detection = Detection {
entity_type: EntityType::Phone,
start: 0,
end: 10,
score: 0.9,
recognizer: "r".to_string(),
explanation: DetectionExplanation::Regex {
pattern_name: "phone".to_string(),
},
};
let mut config = AnonymizeConfig::default();
let mut per_entity = HashMap::new();
per_entity.insert(
EntityType::Phone.as_str(),
Operator::Mask { ch: '*', from_end: 4 },
);
config.per_entity = per_entity;
let result = Anonymizer::anonymize("1234567890", &[detection], &config).unwrap();
assert_eq!(result.text, "******7890");
}
#[test]
fn test_policy_filters_entities() {
let mut policy = PolicyConfig::default();
policy.enabled_entities.insert(EntityType::Email);
let analyzer = Analyzer::new(
Box::new(SimpleNlpEngine::default()),
default_recognizers(),
Vec::new(),
policy,
);
let text = "Email me at jane@example.com or call +1 212-555-0909.";
let result = analyzer.analyze(text, &Language::from("en")).unwrap();
assert!(result
.entities
.iter()
.all(|det| det.entity_type == EntityType::Email));
}
#[test]
fn test_dictionary_recognizer_detects_terms() {
let recognizer = DictionaryRecognizer::new(
"dict_names",
EntityType::Person,
&["Alice".to_string(), "Bob".to_string()],
0.75,
"names",
true,
);
let analyzer = Analyzer::new(
Box::new(SimpleNlpEngine::default()),
vec![Box::new(recognizer)],
Vec::new(),
PolicyConfig::default(),
);
let text = "Alice and Bob met.";
let result = analyzer.analyze(text, &Language::from("en")).unwrap();
assert_eq!(result.entities.len(), 2);
assert_eq!(&text[result.entities[0].start..result.entities[0].end], "Alice");
assert_eq!(&text[result.entities[1].start..result.entities[1].end], "Bob");
}