use super::*;
#[test]
fn test_scan_email_basic() {
let matches = scan_pii("Contact user@example.com for info");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::Email);
assert_eq!(matches[0].matched, "user@example.com");
}
#[test]
fn test_scan_email_with_plus() {
let matches = scan_pii("Send to user+tag@example.com please");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].matched, "user+tag@example.com");
}
#[test]
fn test_scan_email_subdomain() {
let matches = scan_pii("Email: admin@mail.corp.example.com");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].matched, "admin@mail.corp.example.com");
}
#[test]
fn test_scan_no_email_without_dot() {
let matches = scan_pii("Not an email: user@localhost");
assert!(matches.is_empty());
}
#[test]
fn test_scan_phone_paren_format() {
let matches = scan_pii("Call (555) 123-4567 now");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::Phone);
assert_eq!(matches[0].matched, "(555) 123-4567");
}
#[test]
fn test_scan_phone_dash_format() {
let matches = scan_pii("Phone: 555-123-4567");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::Phone);
assert_eq!(matches[0].matched, "555-123-4567");
}
#[test]
fn test_phone_not_ssn() {
let matches = scan_pii("123-45-6789");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::Ssn);
}
#[test]
fn test_scan_ssn() {
let matches = scan_pii("SSN: 123-45-6789");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::Ssn);
assert_eq!(matches[0].matched, "123-45-6789");
}
#[test]
fn test_ssn_not_in_longer_number() {
let matches = scan_pii("ID: 1123-45-67890");
let ssn_matches: Vec<_> = matches
.iter()
.filter(|m| m.pii_type == PiiType::Ssn)
.collect();
assert!(ssn_matches.is_empty());
}
#[test]
fn test_scan_credit_card_16_digits() {
let matches = scan_pii("Card: 4111111111111111");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::CreditCard);
}
#[test]
fn test_scan_credit_card_with_spaces() {
let matches = scan_pii("Card: 4111 1111 1111 1111");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::CreditCard);
}
#[test]
fn test_scan_credit_card_with_dashes() {
let matches = scan_pii("Card: 4111-1111-1111-1111");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::CreditCard);
}
#[test]
fn test_scan_credit_card_13_digits() {
let matches = scan_pii("Card: 4111111111111");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::CreditCard);
}
#[test]
fn test_scan_ip_address() {
let matches = scan_pii("Server at 192.168.1.100");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::IpAddress);
assert_eq!(matches[0].matched, "192.168.1.100");
}
#[test]
fn test_scan_ip_address_boundary() {
let matches = scan_pii("IP: 255.255.255.255");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].matched, "255.255.255.255");
}
#[test]
fn test_scan_ip_invalid_octet() {
let matches = scan_pii("Not IP: 256.1.2.3");
let ip_matches: Vec<_> = matches
.iter()
.filter(|m| m.pii_type == PiiType::IpAddress)
.collect();
assert!(ip_matches.is_empty());
}
#[test]
fn test_scan_ip_zero() {
let matches = scan_pii("Addr: 0.0.0.0");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::IpAddress);
}
#[test]
fn test_filter_pii_email() {
let result = filter_pii("Contact user@example.com for info");
assert_eq!(result, "Contact [EMAIL] for info");
}
#[test]
fn test_filter_pii_phone() {
let result = filter_pii("Call (555) 123-4567 now");
assert_eq!(result, "Call [PHONE] now");
}
#[test]
fn test_filter_pii_ssn() {
let result = filter_pii("SSN: 123-45-6789");
assert_eq!(result, "SSN: [SSN]");
}
#[test]
fn test_filter_pii_credit_card() {
let result = filter_pii("Card: 4111111111111111");
assert_eq!(result, "Card: [CREDIT_CARD]");
}
#[test]
fn test_filter_pii_ip() {
let result = filter_pii("Server: 192.168.1.1");
assert_eq!(result, "Server: [IP_ADDRESS]");
}
#[test]
fn test_filter_pii_no_pii() {
let text = "This is clean text with no PII";
assert_eq!(filter_pii(text), text);
}
#[test]
fn test_filter_pii_multiple() {
let result = filter_pii("Email user@example.com, call (555) 123-4567, SSN 123-45-6789");
assert!(result.contains("[EMAIL]"));
assert!(result.contains("[PHONE]"));
assert!(result.contains("[SSN]"));
assert!(!result.contains("user@example.com"));
assert!(!result.contains("(555) 123-4567"));
assert!(!result.contains("123-45-6789"));
}
#[test]
fn test_filter_pii_jsonl_basic() {
let input = r#"{"text": "Contact user@example.com"}"#;
let (output, count) = filter_pii_jsonl(input);
assert!(output.contains("[EMAIL]"));
assert!(!output.contains("user@example.com"));
assert_eq!(count, 1);
}
#[test]
fn test_filter_pii_jsonl_nested() {
let input = r#"{"data": {"content": "SSN: 123-45-6789"}}"#;
let (output, count) = filter_pii_jsonl(input);
assert!(output.contains("[SSN]"));
assert_eq!(count, 1);
}
#[test]
fn test_filter_pii_jsonl_array() {
let input = r#"{"items": ["user@example.com", "clean text"]}"#;
let (output, count) = filter_pii_jsonl(input);
assert!(output.contains("[EMAIL]"));
assert_eq!(count, 1);
}
#[test]
fn test_filter_pii_jsonl_multi_line() {
let input = r#"{"text": "clean"}
{"text": "user@example.com"}
{"text": "also clean"}"#;
let (output, count) = filter_pii_jsonl(input);
assert_eq!(count, 1);
assert_eq!(output.lines().count(), 3);
}
#[test]
fn test_filter_pii_jsonl_invalid_json() {
let input = "not json at all";
let (output, count) = filter_pii_jsonl(input);
assert_eq!(output.trim(), "not json at all");
assert_eq!(count, 0);
}
#[test]
fn test_filter_pii_jsonl_empty_lines() {
let input = "\n\n{\"text\": \"clean\"}\n\n";
let (output, count) = filter_pii_jsonl(input);
assert_eq!(count, 0);
assert_eq!(output.lines().count(), 1);
}
#[test]
fn test_redaction_tags() {
assert_eq!(PiiType::Email.redaction_tag(), "[EMAIL]");
assert_eq!(PiiType::Phone.redaction_tag(), "[PHONE]");
assert_eq!(PiiType::Ssn.redaction_tag(), "[SSN]");
assert_eq!(PiiType::CreditCard.redaction_tag(), "[CREDIT_CARD]");
assert_eq!(PiiType::IpAddress.redaction_tag(), "[IP_ADDRESS]");
}
#[test]
fn falsify_pii_001_all_types_detected() {
let text = "Email: user@example.com, Phone: (555) 123-4567, \
SSN: 123-45-6789, Card: 4111111111111111, IP: 10.0.0.1";
let matches = scan_pii(text);
let types: Vec<PiiType> = matches.iter().map(|m| m.pii_type).collect();
assert!(types.contains(&PiiType::Email), "Email not detected");
assert!(types.contains(&PiiType::Phone), "Phone not detected");
assert!(types.contains(&PiiType::Ssn), "SSN not detected");
assert!(
types.contains(&PiiType::CreditCard),
"CreditCard not detected"
);
assert!(
types.contains(&PiiType::IpAddress),
"IpAddress not detected"
);
}
#[test]
fn falsify_pii_002_redaction_preserves_text() {
let text = "Hello world! Contact user@example.com for help. Thanks!";
let filtered = filter_pii(text);
assert!(filtered.contains("Hello world!"));
assert!(filtered.contains("for help."));
assert!(filtered.contains("Thanks!"));
assert!(!filtered.contains("user@example.com"));
}
#[test]
fn falsify_pii_003_jsonl_structure() {
let input = r#"{"id": 1, "text": "user@example.com", "score": 0.95}"#;
let (output, _) = filter_pii_jsonl(input);
let parsed: serde_json::Value = serde_json::from_str(output.trim()).unwrap();
assert_eq!(parsed["id"], 1);
assert_eq!(parsed["score"], 0.95);
assert_eq!(parsed["text"], "[EMAIL]");
}
#[test]
fn falsify_evol_001_empty_input() {
assert!(scan_pii("").is_empty());
assert_eq!(filter_pii(""), "");
let (output, count) = filter_pii_jsonl("");
assert!(output.is_empty());
assert_eq!(count, 0);
}
#[test]
fn falsify_evol_002_no_false_positives() {
let clean_texts = [
"The quick brown fox jumps over the lazy dog.",
"Machine learning model achieved 95% accuracy.",
"Version 2.0.1 released on 2024-01-15.",
"Temperature: 98.6 degrees Fahrenheit.",
"The ratio is 3.14159 approximately.",
];
for text in &clean_texts {
let matches = scan_pii(text);
assert!(matches.is_empty(), "False positive in: {text}");
}
}
#[test]
fn test_overlapping_pii_dedup() {
let mut matches = vec![
PiiMatch {
pii_type: PiiType::Phone,
start: 0,
end: 12,
matched: "555-123-4567".to_string(),
},
PiiMatch {
pii_type: PiiType::CreditCard,
start: 0,
end: 16,
matched: "5551234567891234".to_string(),
},
];
deduplicate_overlapping(&mut matches);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::CreditCard);
}
#[test]
fn test_pii_match_debug() {
let m = PiiMatch {
pii_type: PiiType::Email,
start: 0,
end: 16,
matched: "user@example.com".to_string(),
};
let debug = format!("{m:?}");
assert!(debug.contains("Email"));
}
#[test]
fn test_pii_type_clone_eq() {
let a = PiiType::Email;
let b = a;
assert_eq!(a, b);
}
#[test]
fn test_filter_pii_cow_unused() {
let text = "nothing here";
let result = filter_pii(text);
assert_eq!(result, text);
}