mod audit;
mod differential;
mod kanonymity;
pub use audit::*;
pub use differential::*;
pub use kanonymity::*;
use crate::error::{FingerprintError, FingerprintResult};
use crate::models::{
PrivacyAction, PrivacyActionType, PrivacyAudit, PrivacyLevel, PrivacyMetadata,
};
#[derive(Debug, Clone)]
pub struct PrivacyConfig {
pub level: PrivacyLevel,
pub epsilon: f64,
pub k_anonymity: u32,
pub outlier_percentile: f64,
pub min_occurrence: u32,
pub suppressed_fields: Vec<String>,
}
impl PrivacyConfig {
pub fn from_level(level: PrivacyLevel) -> Self {
let metadata = PrivacyMetadata::from_level(level);
Self {
level,
epsilon: metadata.epsilon,
k_anonymity: metadata.k_anonymity,
outlier_percentile: metadata.outlier_percentile,
min_occurrence: metadata.min_occurrence,
suppressed_fields: metadata.suppressed_fields,
}
}
pub fn custom(epsilon: f64, k_anonymity: u32) -> Self {
let level = if epsilon >= 5.0 {
PrivacyLevel::Minimal
} else if epsilon >= 1.0 {
PrivacyLevel::Standard
} else if epsilon >= 0.5 {
PrivacyLevel::High
} else {
PrivacyLevel::Maximum
};
Self {
level,
epsilon,
k_anonymity,
outlier_percentile: 95.0,
min_occurrence: k_anonymity,
suppressed_fields: Vec::new(),
}
}
}
impl Default for PrivacyConfig {
fn default() -> Self {
Self::from_level(PrivacyLevel::Standard)
}
}
pub struct PrivacyEngine {
config: PrivacyConfig,
audit: PrivacyAudit,
laplace: LaplaceMechanism,
kanon: KAnonymity,
}
impl PrivacyEngine {
pub fn new(config: PrivacyConfig) -> Self {
Self {
audit: PrivacyAudit::new(config.epsilon, config.k_anonymity),
laplace: LaplaceMechanism::new(config.epsilon),
kanon: KAnonymity::new(config.k_anonymity, config.min_occurrence),
config,
}
}
pub fn from_level(level: PrivacyLevel) -> Self {
Self::new(PrivacyConfig::from_level(level))
}
pub fn can_spend(&self, epsilon: f64) -> bool {
self.audit.remaining_budget() >= epsilon
}
pub fn add_noise(
&mut self,
value: f64,
sensitivity: f64,
target: &str,
) -> FingerprintResult<f64> {
let epsilon_per_query = self.config.epsilon / 100.0;
if !self.can_spend(epsilon_per_query) {
return Err(FingerprintError::PrivacyBudgetExhausted {
spent: self.audit.total_epsilon_spent,
limit: self.config.epsilon,
});
}
let noised = self
.laplace
.add_noise(value, sensitivity, epsilon_per_query);
let action = PrivacyAction::new(
PrivacyActionType::LaplaceNoise,
target,
format!(
"Added Laplace noise with sensitivity={}, epsilon={}",
sensitivity, epsilon_per_query
),
"Differential privacy protection",
)
.with_epsilon(epsilon_per_query);
self.audit.record_action(action);
Ok(noised)
}
pub fn add_noise_to_count(&mut self, count: u64, target: &str) -> FingerprintResult<u64> {
let noised = self.add_noise(count as f64, 1.0, target)?;
Ok(noised.max(0.0).round() as u64)
}
pub fn filter_categories(
&mut self,
frequencies: Vec<(String, u64)>,
total: u64,
target: &str,
) -> Vec<(String, f64)> {
let (kept, suppressed) = self.kanon.filter_frequencies(frequencies, total);
if suppressed > 0 {
let action = PrivacyAction::new(
PrivacyActionType::Suppression,
target,
format!(
"Suppressed {} rare categories below k={}",
suppressed, self.config.k_anonymity
),
"K-anonymity protection",
);
self.audit.record_action(action);
}
kept
}
pub fn winsorize(&mut self, values: &mut [f64], target: &str) {
let percentile = self.config.outlier_percentile;
let (low_count, high_count) = winsorize_values(values, percentile);
if low_count > 0 || high_count > 0 {
let action = PrivacyAction::new(
PrivacyActionType::Winsorization,
target,
format!(
"Winsorized {} low and {} high outliers at {}th percentile",
low_count, high_count, percentile
),
"Outlier protection",
);
self.audit.record_action(action);
}
}
pub fn should_suppress_field(&self, field: &str) -> bool {
self.config.suppressed_fields.iter().any(|f| f == field)
}
pub fn record_action(&mut self, action: PrivacyAction) {
self.audit.record_action(action);
}
pub fn audit(&self) -> &PrivacyAudit {
&self.audit
}
pub fn into_audit(self) -> PrivacyAudit {
self.audit
}
pub fn remaining_budget(&self) -> f64 {
self.audit.remaining_budget()
}
}
fn winsorize_values(values: &mut [f64], percentile: f64) -> (usize, usize) {
if values.is_empty() {
return (0, 0);
}
let n = values.len();
let low_idx = ((100.0 - percentile) / 100.0 * n as f64).floor() as usize;
let high_idx = (percentile / 100.0 * n as f64).ceil() as usize;
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
let low_threshold = sorted.get(low_idx).copied().unwrap_or(f64::MIN);
let high_threshold = sorted.get(high_idx.min(n - 1)).copied().unwrap_or(f64::MAX);
let mut low_count = 0;
let mut high_count = 0;
for v in values.iter_mut() {
if *v < low_threshold {
*v = low_threshold;
low_count += 1;
} else if *v > high_threshold {
*v = high_threshold;
high_count += 1;
}
}
(low_count, high_count)
}