use pii::analyzer::Analyzer;
use pii::config::PolicyConfig;
use pii::nlp::SimpleNlpEngine;
use pii::presets::default_recognizers;
use pii::types::{EntityType, Language};
use std::collections::HashSet;
struct Lcg {
state: u64,
}
impl Lcg {
fn new(seed: u64) -> Self {
Self { state: seed }
}
fn next_u32(&mut self) -> u32 {
self.state = self
.state
.wrapping_mul(6364136223846793005)
.wrapping_add(1);
(self.state >> 32) as u32
}
fn next_usize(&mut self, max: usize) -> usize {
if max == 0 {
return 0;
}
(self.next_u32() as usize) % max
}
}
#[derive(Clone, Debug)]
struct Expected {
entity: EntityType,
start: usize,
end: usize,
}
#[test]
fn test_injection_generator() {
let mut rng = Lcg::new(0xC0FFEE);
let analyzer = Analyzer::new(
Box::new(SimpleNlpEngine::default()),
default_recognizers(),
Vec::new(),
policy_for(&[
EntityType::Email,
EntityType::IpAddress,
EntityType::CreditCard,
EntityType::Phone,
]),
);
for _case in 0..8 {
let mut text = String::new();
let mut expected: Vec<Expected> = Vec::new();
for idx in 0..30 {
if idx == 5 {
inject(&mut text, &mut expected, EntityType::Email, "user@example.com");
}
if idx == 12 {
inject(&mut text, &mut expected, EntityType::IpAddress, "10.0.0.5");
}
if idx == 18 {
inject(
&mut text,
&mut expected,
EntityType::CreditCard,
"4539 1488 0343 6467",
);
}
if idx == 24 {
inject(
&mut text,
&mut expected,
EntityType::Phone,
"+1 415-555-1212",
);
}
let word = random_word(&mut rng);
text.push_str(&word);
text.push(' ');
}
let result = analyzer.analyze(&text, &Language::from("en")).unwrap();
for entry in expected {
let found = result.entities.iter().find(|det| {
det.entity_type == entry.entity && det.start == entry.start && det.end == entry.end
});
assert!(
found.is_some(),
"missing {:?} at {}..{} in text {:?}",
entry.entity,
entry.start,
entry.end,
text
);
}
}
}
fn policy_for(entities: &[EntityType]) -> PolicyConfig {
let mut policy = PolicyConfig::default();
policy.enabled_entities = entities.iter().cloned().collect::<HashSet<_>>();
policy
}
fn inject(text: &mut String, expected: &mut Vec<Expected>, entity: EntityType, literal: &str) {
let start = text.len();
text.push_str(literal);
let end = text.len();
expected.push(Expected { entity, start, end });
text.push(' ');
}
fn random_word(rng: &mut Lcg) -> String {
let len = 3 + rng.next_usize(6);
let mut word = String::with_capacity(len);
for _ in 0..len {
let ch = (b'a' + rng.next_usize(26) as u8) as char;
word.push(ch);
}
word
}