mod audit;
pub mod budget;
pub mod composition;
mod differential;
mod kanonymity;
pub mod pareto;
pub use audit::*;
pub use composition::{
create_accountant, CompositionMethod, MechanismRecord, NaiveAccountant, PrivacyAccountant,
RenyiDPAccountant, ZeroCDPAccountant,
};
pub use differential::*;
pub use kanonymity::*;
use crate::error::{FingerprintError, FingerprintResult};
use crate::models::{
PrivacyAction, PrivacyActionType, PrivacyAudit, PrivacyLevel, PrivacyMetadata,
};
#[derive(Debug, Clone)]
pub struct PrivacyConfig {
pub level: PrivacyLevel,
pub epsilon: f64,
pub k_anonymity: u32,
pub outlier_percentile: f64,
pub min_occurrence: u32,
pub suppressed_fields: Vec<String>,
pub composition_method: CompositionMethod,
}
impl PrivacyConfig {
pub fn from_level(level: PrivacyLevel) -> Self {
let metadata = PrivacyMetadata::from_level(level);
Self {
level,
epsilon: metadata.epsilon,
k_anonymity: metadata.k_anonymity,
outlier_percentile: metadata.outlier_percentile,
min_occurrence: metadata.min_occurrence,
suppressed_fields: metadata.suppressed_fields,
composition_method: CompositionMethod::Naive,
}
}
pub fn custom(epsilon: f64, k_anonymity: u32) -> Self {
Self {
level: PrivacyLevel::Custom,
epsilon,
k_anonymity,
outlier_percentile: 95.0,
min_occurrence: k_anonymity,
suppressed_fields: Vec::new(),
composition_method: CompositionMethod::Naive,
}
}
pub fn custom_with_delta(
epsilon: f64,
_delta: f64,
k_anonymity: u32,
composition_method: CompositionMethod,
) -> Self {
Self {
level: PrivacyLevel::Custom,
epsilon,
k_anonymity,
outlier_percentile: 95.0,
min_occurrence: k_anonymity,
suppressed_fields: Vec::new(),
composition_method,
}
}
}
impl Default for PrivacyConfig {
fn default() -> Self {
Self::from_level(PrivacyLevel::Standard)
}
}
pub struct PrivacyEngine {
config: PrivacyConfig,
audit: PrivacyAudit,
laplace: LaplaceMechanism,
kanon: KAnonymity,
accountant: Box<dyn PrivacyAccountant>,
}
impl PrivacyEngine {
pub fn new(config: PrivacyConfig) -> Self {
let accountant = create_accountant(config.composition_method, config.epsilon);
Self {
audit: PrivacyAudit::new(config.epsilon, config.k_anonymity),
laplace: LaplaceMechanism::new(config.epsilon),
kanon: KAnonymity::new(config.k_anonymity, config.min_occurrence),
accountant,
config,
}
}
pub fn from_level(level: PrivacyLevel) -> Self {
Self::new(PrivacyConfig::from_level(level))
}
pub fn can_spend(&self, epsilon: f64) -> bool {
match self.config.composition_method {
CompositionMethod::Naive | CompositionMethod::Advanced => {
self.audit.remaining_budget() >= epsilon
}
CompositionMethod::RenyiDP | CompositionMethod::ZeroCDP => {
self.accountant.remaining_budget() >= epsilon
}
}
}
pub fn add_noise(
&mut self,
value: f64,
sensitivity: f64,
target: &str,
) -> FingerprintResult<f64> {
let epsilon_per_query = self.config.epsilon / 100.0;
if !self.can_spend(epsilon_per_query) {
return Err(FingerprintError::PrivacyBudgetExhausted {
spent: self.audit.total_epsilon_spent,
limit: self.config.epsilon,
});
}
let noised = self
.laplace
.add_noise(value, sensitivity, epsilon_per_query);
let mechanism_record = MechanismRecord::new(
epsilon_per_query,
format!("Laplace noise on {target} (sensitivity={sensitivity})"),
);
self.accountant.record_mechanism(mechanism_record);
let action = PrivacyAction::new(
PrivacyActionType::LaplaceNoise,
target,
format!(
"Added Laplace noise with sensitivity={sensitivity}, epsilon={epsilon_per_query}"
),
"Differential privacy protection",
)
.with_epsilon(epsilon_per_query);
self.audit.record_action(action);
Ok(noised)
}
pub fn add_noise_to_count(&mut self, count: u64, target: &str) -> FingerprintResult<u64> {
let noised = self.add_noise(count as f64, 1.0, target)?;
Ok(noised.max(0.0).round() as u64)
}
pub fn filter_categories(
&mut self,
frequencies: Vec<(String, u64)>,
total: u64,
target: &str,
) -> Vec<(String, f64)> {
let (kept, suppressed) = self.kanon.filter_frequencies(frequencies, total);
if suppressed > 0 {
let action = PrivacyAction::new(
PrivacyActionType::Suppression,
target,
format!(
"Suppressed {} rare categories below k={}",
suppressed, self.config.k_anonymity
),
"K-anonymity protection",
);
self.audit.record_action(action);
}
kept
}
pub fn winsorize(&mut self, values: &mut [f64], target: &str) {
let percentile = self.config.outlier_percentile;
let (low_count, high_count) = winsorize_values(values, percentile);
if low_count > 0 || high_count > 0 {
let action = PrivacyAction::new(
PrivacyActionType::Winsorization,
target,
format!(
"Winsorized {low_count} low and {high_count} high outliers at {percentile}th percentile"
),
"Outlier protection",
);
self.audit.record_action(action);
}
}
pub fn should_suppress_field(&self, field: &str) -> bool {
self.config.suppressed_fields.iter().any(|f| f == field)
}
pub fn record_action(&mut self, action: PrivacyAction) {
self.audit.record_action(action);
}
pub fn audit(&self) -> &PrivacyAudit {
&self.audit
}
pub fn into_audit(mut self) -> PrivacyAudit {
self.audit.composition_method = Some(self.accountant.method().to_string());
if let Some(alpha) = self.accountant.optimal_alpha() {
self.audit.rdp_alpha_effective = Some(alpha);
}
self.audit
}
pub fn remaining_budget(&self) -> f64 {
self.audit.remaining_budget()
}
pub fn effective_epsilon(&self) -> f64 {
self.accountant.effective_epsilon()
}
pub fn accountant_remaining_budget(&self) -> f64 {
self.accountant.remaining_budget()
}
pub fn build_privacy_metadata(&self) -> PrivacyMetadata {
let mut meta = PrivacyMetadata::from_level(self.config.level);
meta.epsilon = self.config.epsilon;
meta.k_anonymity = self.config.k_anonymity;
meta.composition_method = Some(self.accountant.method().to_string());
meta.delta = self.accountant.target_delta();
meta
}
pub fn composition_method(&self) -> CompositionMethod {
self.config.composition_method
}
}
fn winsorize_values(values: &mut [f64], percentile: f64) -> (usize, usize) {
if values.is_empty() {
return (0, 0);
}
let n = values.len();
let low_idx = ((100.0 - percentile) / 100.0 * n as f64).floor() as usize;
let high_idx = (percentile / 100.0 * n as f64).ceil() as usize;
let mut sorted = values.to_vec();
sorted.sort_by(f64::total_cmp);
let low_threshold = sorted.get(low_idx).copied().unwrap_or(f64::MIN);
let high_threshold = sorted.get(high_idx.min(n - 1)).copied().unwrap_or(f64::MAX);
let mut low_count = 0;
let mut high_count = 0;
for v in values.iter_mut() {
if *v < low_threshold {
*v = low_threshold;
low_count += 1;
} else if *v > high_threshold {
*v = high_threshold;
high_count += 1;
}
}
(low_count, high_count)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_standard_level_backward_compat_unchanged() {
let mut engine = PrivacyEngine::from_level(PrivacyLevel::Standard);
assert_eq!(engine.composition_method(), CompositionMethod::Naive);
assert!((engine.remaining_budget() - 1.0).abs() < 1e-10);
for i in 0..50 {
engine.add_noise(100.0, 1.0, &format!("col_{}", i)).unwrap();
}
let audit = engine.audit();
assert!((audit.total_epsilon_spent - 0.50).abs() < 1e-10);
assert_eq!(audit.actions.len(), 50);
let remaining = 1.0 - 0.50;
assert!((engine.remaining_budget() - remaining).abs() < 1e-10);
}
#[test]
fn test_naive_engine_into_audit_populates_composition_method() {
let mut engine = PrivacyEngine::from_level(PrivacyLevel::Standard);
engine.add_noise(42.0, 1.0, "test.col").unwrap();
let audit = engine.into_audit();
assert_eq!(audit.composition_method, Some("naive".to_string()));
assert_eq!(audit.rdp_alpha_effective, None);
}
#[test]
fn test_rdp_tighter_effective_epsilon_than_naive() {
let epsilon = 5.0;
let n_queries = 50;
let mut naive_config = PrivacyConfig::custom(epsilon, 5);
naive_config.composition_method = CompositionMethod::Naive;
let mut naive_engine = PrivacyEngine::new(naive_config);
let mut rdp_config = PrivacyConfig::custom(epsilon, 5);
rdp_config.composition_method = CompositionMethod::RenyiDP;
let mut rdp_engine = PrivacyEngine::new(rdp_config);
for i in 0..n_queries {
let target = format!("col_{}", i);
naive_engine.add_noise(100.0, 1.0, &target).unwrap();
rdp_engine.add_noise(100.0, 1.0, &target).unwrap();
}
let naive_effective = naive_engine.effective_epsilon();
let rdp_effective = rdp_engine.effective_epsilon();
assert!(
rdp_effective < naive_effective,
"RDP effective epsilon ({:.6}) should be less than naive ({:.6})",
rdp_effective,
naive_effective
);
assert_eq!(naive_engine.audit().actions.len(), n_queries);
assert_eq!(rdp_engine.audit().actions.len(), n_queries);
}
#[test]
fn test_rdp_engine_into_audit_populates_fields() {
let mut config = PrivacyConfig::custom(5.0, 5);
config.composition_method = CompositionMethod::RenyiDP;
let mut engine = PrivacyEngine::new(config);
engine.add_noise(42.0, 1.0, "test.col").unwrap();
let audit = engine.into_audit();
assert_eq!(audit.composition_method, Some("renyi_dp".to_string()));
assert!(
audit.rdp_alpha_effective.is_some(),
"RDP audit should have optimal alpha set"
);
}
#[test]
fn test_zcdp_tighter_effective_epsilon_than_naive() {
let epsilon = 5.0;
let n_queries = 50;
let mut naive_config = PrivacyConfig::custom(epsilon, 5);
naive_config.composition_method = CompositionMethod::Naive;
let mut naive_engine = PrivacyEngine::new(naive_config);
let mut zcdp_config = PrivacyConfig::custom(epsilon, 5);
zcdp_config.composition_method = CompositionMethod::ZeroCDP;
let mut zcdp_engine = PrivacyEngine::new(zcdp_config);
for i in 0..n_queries {
let target = format!("col_{}", i);
naive_engine.add_noise(100.0, 1.0, &target).unwrap();
zcdp_engine.add_noise(100.0, 1.0, &target).unwrap();
}
let naive_effective = naive_engine.effective_epsilon();
let zcdp_effective = zcdp_engine.effective_epsilon();
assert!(
zcdp_effective < naive_effective,
"zCDP effective epsilon ({:.6}) should be less than naive ({:.6})",
zcdp_effective,
naive_effective
);
}
#[test]
fn test_zcdp_engine_into_audit_populates_fields() {
let mut config = PrivacyConfig::custom(5.0, 5);
config.composition_method = CompositionMethod::ZeroCDP;
let mut engine = PrivacyEngine::new(config);
engine.add_noise(42.0, 1.0, "test.col").unwrap();
let audit = engine.into_audit();
assert_eq!(audit.composition_method, Some("zcdp".to_string()));
assert_eq!(audit.rdp_alpha_effective, None);
}
#[test]
fn test_naive_budget_exhaustion() {
let mut engine = PrivacyEngine::from_level(PrivacyLevel::Standard);
let mut succeeded = 0;
for i in 0..110 {
match engine.add_noise(1.0, 1.0, &format!("q_{}", i)) {
Ok(_) => succeeded += 1,
Err(_) => break,
}
}
assert!(
(99..=100).contains(&succeeded),
"Expected ~100 successful queries, got {}",
succeeded
);
let result = engine.add_noise(1.0, 1.0, "q_overflow");
assert!(
result.is_err(),
"Should fail after exhausting budget with naive composition"
);
}
#[test]
fn test_rdp_budget_allows_more_queries_than_naive_before_exhaustion() {
let mut rdp_config = PrivacyConfig::custom(1.0, 5);
rdp_config.composition_method = CompositionMethod::RenyiDP;
let mut rdp_engine = PrivacyEngine::new(rdp_config);
let mut count = 0;
for i in 0..500 {
let result = rdp_engine.add_noise(1.0, 1.0, &format!("q_{}", i));
if result.is_err() {
break;
}
count += 1;
}
assert!(
count > 100,
"RDP engine should allow more than 100 queries (got {}), since \
effective epsilon grows sub-linearly with composition",
count
);
}
#[test]
fn test_build_privacy_metadata_naive() {
let engine = PrivacyEngine::from_level(PrivacyLevel::Standard);
let meta = engine.build_privacy_metadata();
assert_eq!(meta.composition_method, Some("naive".to_string()));
assert_eq!(meta.delta, None);
assert!((meta.epsilon - 1.0).abs() < 1e-10);
}
#[test]
fn test_build_privacy_metadata_rdp() {
let mut config = PrivacyConfig::custom(2.0, 5);
config.composition_method = CompositionMethod::RenyiDP;
let engine = PrivacyEngine::new(config);
let meta = engine.build_privacy_metadata();
assert_eq!(meta.composition_method, Some("renyi_dp".to_string()));
assert!(meta.delta.is_some(), "RDP metadata should include delta");
assert!((meta.delta.unwrap() - 1e-5).abs() < 1e-15);
assert!((meta.epsilon - 2.0).abs() < 1e-10);
}
#[test]
fn test_build_privacy_metadata_zcdp() {
let mut config = PrivacyConfig::custom(2.0, 5);
config.composition_method = CompositionMethod::ZeroCDP;
let engine = PrivacyEngine::new(config);
let meta = engine.build_privacy_metadata();
assert_eq!(meta.composition_method, Some("zcdp".to_string()));
assert!(meta.delta.is_some(), "zCDP metadata should include delta");
assert!((meta.delta.unwrap() - 1e-5).abs() < 1e-15);
}
#[test]
fn test_accountant_remaining_budget_tighter_for_rdp() {
let mut config = PrivacyConfig::custom(5.0, 5);
config.composition_method = CompositionMethod::RenyiDP;
let mut engine = PrivacyEngine::new(config);
for i in 0..50 {
engine.add_noise(1.0, 1.0, &format!("q_{}", i)).unwrap();
}
let audit_remaining = engine.remaining_budget();
let accountant_remaining = engine.accountant_remaining_budget();
assert!((audit_remaining - 2.5).abs() < 1e-10);
assert!(
accountant_remaining > audit_remaining,
"Accountant remaining ({:.4}) should be greater than audit remaining ({:.4}) for RDP",
accountant_remaining,
audit_remaining
);
}
}