#[inline]
pub fn redact<T>() -> Option<T> {
None
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MaskStyle<'a> {
Fixed(&'a str),
PerCharacter(char),
PreservePrefix(usize, char),
PreserveSuffix(usize, char),
}
pub fn mask(value: &str, style: MaskStyle<'_>) -> String {
match style {
MaskStyle::Fixed(placeholder) => placeholder.to_string(),
MaskStyle::PerCharacter(mask_char) => mask_char.to_string().repeat(value.chars().count()),
MaskStyle::PreservePrefix(n, mask_char) => {
let chars: Vec<char> = value.chars().collect();
let preserved: String = chars.iter().take(n).collect();
let masked: String = mask_char.to_string().repeat(chars.len().saturating_sub(n));
format!("{preserved}{masked}")
}
MaskStyle::PreserveSuffix(n, mask_char) => {
let chars: Vec<char> = value.chars().collect();
let total = chars.len();
let masked: String = mask_char.to_string().repeat(total.saturating_sub(n));
let preserved: String = chars.iter().skip(total.saturating_sub(n)).collect();
format!("{masked}{preserved}")
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DatePrecision {
Year,
Month,
Day,
Quarter,
}
pub fn truncate_date(year: u16, month: u8, day: u8, precision: DatePrecision) -> String {
debug_assert!((1..=12).contains(&month), "month must be 1-12");
debug_assert!((1..=31).contains(&day), "day must be 1-31");
match precision {
DatePrecision::Year => format!("{year}"),
DatePrecision::Month => format!("{year}-{month:02}"),
DatePrecision::Day => format!("{year}-{month:02}-{day:02}"),
DatePrecision::Quarter => {
let quarter = (month - 1) / 3 + 1;
format!("{year}-Q{quarter}")
}
}
}
const HIPAA_SAFE_HARBOR_THRESHOLD: u8 = 90;
pub fn generalize_age(age: u8, bucket_size: u8) -> String {
debug_assert!(bucket_size > 0, "bucket_size must be positive");
if age >= HIPAA_SAFE_HARBOR_THRESHOLD {
return format!("{HIPAA_SAFE_HARBOR_THRESHOLD}+");
}
let bucket_start = (age / bucket_size) * bucket_size;
let bucket_end = bucket_start + bucket_size - 1;
format!("{bucket_start}-{bucket_end}")
}
pub fn generalize_zip(zip: &str, preserve_digits: usize) -> String {
debug_assert!(
(1..=5).contains(&preserve_digits),
"preserve_digits must be 1-5"
);
let zip_chars: Vec<char> = zip.chars().take(5).collect();
let preserved: String = zip_chars.iter().take(preserve_digits).collect();
let masked: String = "*".repeat(5_usize.saturating_sub(preserve_digits));
format!("{preserved}{masked}")
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GeoLevel {
Full,
ZipCode,
Zip3,
City,
State,
Country,
Region,
}
pub fn generalize_numeric(value: u64, bucket_size: u64) -> String {
debug_assert!(bucket_size > 0, "bucket_size must be positive");
let bucket_start = (value / bucket_size) * bucket_size;
let bucket_end = bucket_start + bucket_size - 1;
format!("{bucket_start}-{bucket_end}")
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct KAnonymityResult {
pub k: usize,
pub satisfies_target: bool,
pub equivalence_classes: usize,
pub smallest_class_size: usize,
}
pub fn check_k_anonymity(
quasi_identifiers: impl Iterator<Item = String>,
target_k: usize,
) -> KAnonymityResult {
use std::collections::HashMap;
let mut counts: HashMap<String, usize> = HashMap::new();
for qi in quasi_identifiers {
*counts.entry(qi).or_insert(0) += 1;
}
let smallest = counts.values().min().copied().unwrap_or(0);
let k = smallest;
KAnonymityResult {
k,
satisfies_target: k >= target_k,
equivalence_classes: counts.len(),
smallest_class_size: smallest,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn redact_returns_none() {
let value: Option<&str> = redact();
assert!(value.is_none());
let numeric: Option<u64> = redact();
assert!(numeric.is_none());
}
#[test]
fn mask_fixed() {
assert_eq!(mask("secret", MaskStyle::Fixed("[REDACTED]")), "[REDACTED]");
assert_eq!(mask("anything", MaskStyle::Fixed("***")), "***");
}
#[test]
fn mask_per_character() {
assert_eq!(mask("secret", MaskStyle::PerCharacter('*')), "******");
assert_eq!(mask("ab", MaskStyle::PerCharacter('X')), "XX");
assert_eq!(mask("", MaskStyle::PerCharacter('*')), "");
}
#[test]
fn mask_preserve_prefix() {
assert_eq!(
mask("555-123-4567", MaskStyle::PreservePrefix(4, '*')),
"555-********"
);
assert_eq!(mask("short", MaskStyle::PreservePrefix(10, '*')), "short");
assert_eq!(mask("ab", MaskStyle::PreservePrefix(1, '*')), "a*");
}
#[test]
fn mask_preserve_suffix() {
assert_eq!(
mask("4111111111111111", MaskStyle::PreserveSuffix(4, '*')),
"************1111"
);
assert_eq!(mask("short", MaskStyle::PreserveSuffix(10, '*')), "short");
assert_eq!(mask("ab", MaskStyle::PreserveSuffix(1, '*')), "*b");
}
#[test]
fn mask_unicode() {
assert_eq!(mask("日本語", MaskStyle::PerCharacter('*')), "***");
assert_eq!(mask("日本語", MaskStyle::PreservePrefix(1, '*')), "日**");
}
#[test]
fn truncate_date_year() {
assert_eq!(truncate_date(2024, 3, 15, DatePrecision::Year), "2024");
assert_eq!(truncate_date(1999, 12, 31, DatePrecision::Year), "1999");
}
#[test]
fn truncate_date_month() {
assert_eq!(truncate_date(2024, 3, 15, DatePrecision::Month), "2024-03");
assert_eq!(truncate_date(2024, 11, 1, DatePrecision::Month), "2024-11");
}
#[test]
fn truncate_date_day() {
assert_eq!(truncate_date(2024, 3, 15, DatePrecision::Day), "2024-03-15");
assert_eq!(truncate_date(2024, 1, 5, DatePrecision::Day), "2024-01-05");
}
#[test]
fn truncate_date_quarter() {
assert_eq!(
truncate_date(2024, 1, 15, DatePrecision::Quarter),
"2024-Q1"
);
assert_eq!(
truncate_date(2024, 3, 31, DatePrecision::Quarter),
"2024-Q1"
);
assert_eq!(truncate_date(2024, 4, 1, DatePrecision::Quarter), "2024-Q2");
assert_eq!(
truncate_date(2024, 6, 30, DatePrecision::Quarter),
"2024-Q2"
);
assert_eq!(truncate_date(2024, 7, 1, DatePrecision::Quarter), "2024-Q3");
assert_eq!(
truncate_date(2024, 9, 30, DatePrecision::Quarter),
"2024-Q3"
);
assert_eq!(
truncate_date(2024, 10, 1, DatePrecision::Quarter),
"2024-Q4"
);
assert_eq!(
truncate_date(2024, 12, 31, DatePrecision::Quarter),
"2024-Q4"
);
}
#[test]
fn generalize_age_5_year_buckets() {
assert_eq!(generalize_age(0, 5), "0-4");
assert_eq!(generalize_age(4, 5), "0-4");
assert_eq!(generalize_age(5, 5), "5-9");
assert_eq!(generalize_age(23, 5), "20-24");
assert_eq!(generalize_age(45, 5), "45-49");
assert_eq!(generalize_age(89, 5), "85-89");
}
#[test]
fn generalize_age_hipaa_safe_harbor() {
assert_eq!(generalize_age(90, 5), "90+");
assert_eq!(generalize_age(95, 5), "90+");
assert_eq!(generalize_age(100, 5), "90+");
assert_eq!(generalize_age(255, 5), "90+"); }
#[test]
fn generalize_age_10_year_buckets() {
assert_eq!(generalize_age(23, 10), "20-29");
assert_eq!(generalize_age(45, 10), "40-49");
assert_eq!(generalize_age(50, 10), "50-59");
}
#[test]
fn generalize_zip_3_digits() {
assert_eq!(generalize_zip("90210", 3), "902**");
assert_eq!(generalize_zip("02134", 3), "021**");
assert_eq!(generalize_zip("12345", 3), "123**");
}
#[test]
fn generalize_zip_1_digit() {
assert_eq!(generalize_zip("90210", 1), "9****");
assert_eq!(generalize_zip("02134", 1), "0****");
}
#[test]
fn generalize_zip_full() {
assert_eq!(generalize_zip("90210", 5), "90210");
}
#[test]
fn generalize_zip_short_input() {
assert_eq!(generalize_zip("902", 3), "902**");
}
#[test]
fn generalize_numeric_salary() {
assert_eq!(generalize_numeric(75000, 10000), "70000-79999");
assert_eq!(generalize_numeric(50000, 10000), "50000-59999");
assert_eq!(generalize_numeric(99999, 10000), "90000-99999");
assert_eq!(generalize_numeric(100_000, 10000), "100000-109999");
}
#[test]
fn generalize_numeric_weight() {
assert_eq!(generalize_numeric(82, 10), "80-89");
assert_eq!(generalize_numeric(70, 10), "70-79");
assert_eq!(generalize_numeric(5, 10), "0-9");
}
#[test]
fn k_anonymity_satisfied() {
let records = vec![
"20-29,902**".to_string(),
"20-29,902**".to_string(),
"30-39,902**".to_string(),
"30-39,902**".to_string(),
"30-39,902**".to_string(),
];
let result = check_k_anonymity(records.into_iter(), 2);
assert!(result.satisfies_target);
assert_eq!(result.k, 2); assert_eq!(result.equivalence_classes, 2);
assert_eq!(result.smallest_class_size, 2);
}
#[test]
fn k_anonymity_not_satisfied() {
let records = vec![
"20-29,902**".to_string(),
"30-39,902**".to_string(), "40-49,902**".to_string(),
"40-49,902**".to_string(),
];
let result = check_k_anonymity(records.into_iter(), 2);
assert!(!result.satisfies_target);
assert_eq!(result.k, 1); assert_eq!(result.equivalence_classes, 3);
}
#[test]
fn k_anonymity_empty() {
let result = check_k_anonymity(std::iter::empty(), 2);
assert!(!result.satisfies_target);
assert_eq!(result.k, 0);
assert_eq!(result.equivalence_classes, 0);
}
use proptest::prelude::*;
proptest! {
#[test]
fn prop_generalize_age_valid_format(age in 0u8..=255u8, bucket_size in 1u8..=50u8) {
let result = generalize_age(age, bucket_size);
if age >= 90 {
prop_assert_eq!(result, "90+");
} else {
if result != "90+" {
let parts: Vec<&str> = result.split('-').collect();
prop_assert_eq!(parts.len(), 2, "age range must have format X-Y");
let lower: u8 = parts[0].parse().expect("lower bound must be numeric");
let upper: u8 = parts[1].parse().expect("upper bound must be numeric");
prop_assert!(age >= lower && age <= upper,
"age {} must be in range {}-{}", age, lower, upper);
prop_assert_eq!(upper - lower + 1, bucket_size,
"range size must equal bucket size");
}
}
}
#[test]
fn prop_generalize_age_deterministic(age in 0u8..=100u8, bucket_size in 1u8..=20u8) {
let result1 = generalize_age(age, bucket_size);
let result2 = generalize_age(age, bucket_size);
prop_assert_eq!(result1, result2);
}
#[test]
fn prop_generalize_zip_preserves_digits(
zip in "[0-9]{5}",
digits in 1usize..=5usize, ) {
let result = generalize_zip(&zip, digits);
prop_assert_eq!(result.chars().count(), 5);
for (i, (orig, generated)) in zip.chars().zip(result.chars()).enumerate() {
if i < digits {
prop_assert_eq!(orig, generated, "digit {} should be preserved", i);
} else {
prop_assert_eq!(generated, '*', "digit {} should be masked", i);
}
}
}
#[test]
fn prop_generalize_zip_deterministic(
zip in "[0-9]{5}",
digits in 1usize..=5usize, ) {
let result1 = generalize_zip(&zip, digits);
let result2 = generalize_zip(&zip, digits);
prop_assert_eq!(result1, result2);
}
#[test]
fn prop_mask_per_character_length(
value in "\\PC{1,100}",
mask_char in any::<char>().prop_filter("printable char", char::is_ascii_graphic),
) {
let result = mask(&value, MaskStyle::PerCharacter(mask_char));
let value_len = value.chars().count();
let result_len = result.chars().count();
prop_assert_eq!(result_len, value_len);
for ch in result.chars() {
prop_assert_eq!(ch, mask_char);
}
}
#[test]
fn prop_mask_fixed_constant(
value in "\\PC{1,100}",
placeholder in "\\PC{1,20}",
) {
let result = mask(&value, MaskStyle::Fixed(&placeholder));
prop_assert_eq!(result, placeholder);
}
#[test]
fn prop_mask_preserve_prefix(
value in "\\PC{5,100}",
n in 1usize..=4usize,
) {
let result = mask(&value, MaskStyle::PreservePrefix(n, '*'));
let value_chars: Vec<char> = value.chars().collect();
let result_chars: Vec<char> = result.chars().collect();
for i in 0..n.min(value_chars.len()) {
prop_assert_eq!(result_chars[i], value_chars[i],
"char {} should be preserved", i);
}
for (i, &ch) in result_chars.iter().enumerate().skip(n) {
prop_assert_eq!(ch, '*',
"char {} should be masked", i);
}
}
#[test]
fn prop_mask_preserve_suffix(
value in "\\PC{5,100}",
n in 1usize..=4usize,
) {
let result = mask(&value, MaskStyle::PreserveSuffix(n, '*'));
let value_chars: Vec<char> = value.chars().collect();
let result_chars: Vec<char> = result.chars().collect();
let value_len = value_chars.len();
let start_idx = value_len.saturating_sub(n);
for (i, &ch) in value_chars.iter().skip(start_idx).enumerate() {
let result_idx = start_idx + i;
prop_assert_eq!(result_chars[result_idx], ch,
"char {} should be preserved", result_idx);
}
for (i, &ch) in result_chars.iter().enumerate().take(start_idx) {
prop_assert_eq!(ch, '*',
"char {} should be masked", i);
}
}
#[test]
fn prop_generalize_numeric_valid(
value in 0u64..=1_000_000u64,
bucket_size in 1u64..=10_000u64,
) {
let result = generalize_numeric(value, bucket_size);
let parts: Vec<&str> = result.split('-').collect();
prop_assert_eq!(parts.len(), 2, "must have format X-Y");
let lower: u64 = parts[0].parse().expect("lower bound must be numeric");
let upper: u64 = parts[1].parse().expect("upper bound must be numeric");
prop_assert!(value >= lower && value <= upper,
"value {} must be in range {}-{}", value, lower, upper);
prop_assert_eq!(upper - lower + 1, bucket_size,
"range size must equal bucket size");
}
#[test]
fn prop_truncate_date_deterministic(
year in 1900u16..=2100u16,
month in 1u8..=12u8,
day in 1u8..=28u8, precision_idx in 0usize..=3usize,
) {
let precision = match precision_idx {
0 => DatePrecision::Year,
1 => DatePrecision::Month,
2 => DatePrecision::Quarter,
_ => DatePrecision::Day,
};
let result1 = truncate_date(year, month, day, precision);
let result2 = truncate_date(year, month, day, precision);
prop_assert_eq!(result1, result2);
}
#[test]
fn prop_truncate_date_year(
year in 1900u16..=2100u16,
month in 1u8..=12u8,
day in 1u8..=28u8,
) {
let result = truncate_date(year, month, day, DatePrecision::Year);
prop_assert_eq!(result, year.to_string());
}
#[test]
fn prop_truncate_date_month(
year in 1900u16..=2100u16,
month in 1u8..=12u8,
day in 1u8..=28u8,
) {
let result = truncate_date(year, month, day, DatePrecision::Month);
let expected = format!("{year}-{month:02}");
prop_assert_eq!(result, expected);
}
#[test]
fn prop_k_anonymity_monotonic(
group_sizes in prop::collection::vec(1usize..=10usize, 1..20),
) {
let mut records = Vec::new();
for (group_id, &size) in group_sizes.iter().enumerate() {
for _ in 0..size {
records.push(format!("group_{group_id}"));
}
}
let min_group_size = *group_sizes.iter().min().unwrap_or(&0);
let result_pass = check_k_anonymity(records.clone().into_iter(), min_group_size);
prop_assert!(result_pass.satisfies_target);
prop_assert_eq!(result_pass.k, min_group_size);
let result_fail = check_k_anonymity(records.into_iter(), min_group_size + 1);
prop_assert!(!result_fail.satisfies_target);
prop_assert!(result_fail.k <= min_group_size);
}
#[test]
fn prop_k_anonymity_equivalence_classes(
num_classes in 1usize..=20usize,
records_per_class in 1usize..=10usize,
) {
let mut records = Vec::new();
for class_id in 0..num_classes {
for _ in 0..records_per_class {
records.push(format!("class_{class_id}"));
}
}
let result = check_k_anonymity(records.into_iter(), 1);
prop_assert_eq!(result.equivalence_classes, num_classes);
prop_assert_eq!(result.smallest_class_size, records_per_class);
prop_assert_eq!(result.k, records_per_class);
}
}
use test_case::test_case;
#[test_case(0, 5 => "0-4"; "age 0")]
#[test_case(89, 5 => "85-89"; "age 89")]
#[test_case(90, 5 => "90+"; "age 90 HIPAA boundary")]
#[test_case(255, 5 => "90+"; "age 255 max u8")]
fn generalize_age_edge_cases(age: u8, bucket_size: u8) -> String {
generalize_age(age, bucket_size)
}
#[test_case("00000", 1 => "0****"; "only first digit")]
#[test_case("12345", 5 => "12345"; "none masked")]
#[test_case("90210", 3 => "902**"; "first 3")]
fn generalize_zip_edge_cases(zip: &str, digits: usize) -> String {
generalize_zip(zip, digits)
}
#[test]
fn mask_empty_string() {
assert_eq!(mask("", MaskStyle::PerCharacter('*')), "");
assert_eq!(mask("", MaskStyle::PreservePrefix(5, '*')), "");
assert_eq!(mask("", MaskStyle::PreserveSuffix(5, '*')), "");
assert_eq!(mask("", MaskStyle::Fixed("[REDACTED]")), "[REDACTED]");
}
#[test]
fn mask_unicode_characters() {
let emoji_result = mask("😀😁😂", MaskStyle::PerCharacter('*'));
assert_eq!(emoji_result.chars().count(), 3);
let japanese = "日本語";
let result = mask(japanese, MaskStyle::PreservePrefix(1, '*'));
let chars: Vec<char> = result.chars().collect();
assert_eq!(chars[0], '日');
assert_eq!(chars[1], '*');
assert_eq!(chars[2], '*');
}
#[test]
fn generalize_numeric_edge_values() {
assert_eq!(generalize_numeric(0, 10), "0-9");
let large_value = 1_000_000_000_000u64;
let result = generalize_numeric(large_value, 100);
assert!(result.contains("1000000000000"));
}
#[test]
fn truncate_date_quarter_boundaries() {
assert_eq!(truncate_date(2024, 1, 1, DatePrecision::Quarter), "2024-Q1");
assert_eq!(
truncate_date(2024, 3, 31, DatePrecision::Quarter),
"2024-Q1"
);
assert_eq!(truncate_date(2024, 4, 1, DatePrecision::Quarter), "2024-Q2");
assert_eq!(
truncate_date(2024, 6, 30, DatePrecision::Quarter),
"2024-Q2"
);
assert_eq!(truncate_date(2024, 7, 1, DatePrecision::Quarter), "2024-Q3");
assert_eq!(
truncate_date(2024, 9, 30, DatePrecision::Quarter),
"2024-Q3"
);
assert_eq!(
truncate_date(2024, 10, 1, DatePrecision::Quarter),
"2024-Q4"
);
assert_eq!(
truncate_date(2024, 12, 31, DatePrecision::Quarter),
"2024-Q4"
);
}
#[test]
fn k_anonymity_single_record_fails() {
let records: Vec<String> = vec!["unique".to_string()];
let result = check_k_anonymity(records.into_iter(), 2);
assert!(!result.satisfies_target);
assert_eq!(result.k, 1);
assert_eq!(result.equivalence_classes, 1);
}
#[test]
fn k_anonymity_all_identical() {
let records: Vec<String> = vec!["same".to_string(); 100];
let result = check_k_anonymity(records.into_iter(), 2);
assert!(result.satisfies_target);
assert_eq!(result.k, 100);
assert_eq!(result.equivalence_classes, 1);
assert_eq!(result.smallest_class_size, 100);
}
#[test]
fn k_anonymity_exact_boundary() {
let records = vec![
"A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C",
];
let records_owned: Vec<String> = records.iter().map(|&s| s.to_string()).collect();
let result_pass = check_k_anonymity(records_owned.clone().into_iter(), 5);
assert!(result_pass.satisfies_target);
let result_fail = check_k_anonymity(records_owned.into_iter(), 6);
assert!(!result_fail.satisfies_target);
}
#[test]
fn mask_style_consistency() {
let value = "sensitive";
assert_eq!(
mask(value, MaskStyle::PerCharacter('X')),
mask(value, MaskStyle::PerCharacter('X'))
);
assert_eq!(
mask(value, MaskStyle::PreservePrefix(3, '*')),
mask(value, MaskStyle::PreservePrefix(3, '*'))
);
}
#[test]
fn generalize_age_bucket_size_1() {
assert_eq!(generalize_age(25, 1), "25-25");
assert_eq!(generalize_age(50, 1), "50-50");
assert_eq!(generalize_age(90, 1), "90+"); }
#[test]
fn generalize_zip_handles_short_input() {
assert_eq!(generalize_zip("123", 3), "123**");
assert_eq!(generalize_zip("1", 1), "1****");
}
}