use crate::dataset::Dataset;
use crate::error::{DataError, Result};
use crate::sampler::{Sampler, SamplerIterator};
use scirs2_core::rand_prelude::Distribution;
use scirs2_core::random::RandNormal;
use scirs2_core::RngExt;
use std::collections::HashMap;
#[cfg(not(feature = "std"))]
use alloc::{boxed::Box, vec, vec::Vec};
#[derive(Clone, Debug)]
pub enum DPMechanism {
Laplace { sensitivity: f64, epsilon: f64 },
Gaussian {
sensitivity: f64,
epsilon: f64,
delta: f64,
},
Exponential { sensitivity: f64, epsilon: f64 },
ReportNoisyMax { epsilon: f64 },
}
#[derive(Clone, Debug)]
pub struct PrivacyBudget {
epsilon: f64,
delta: f64,
used_epsilon: f64,
used_delta: f64,
composition_type: CompositionType,
}
#[derive(Clone, Debug)]
pub enum CompositionType {
Basic,
Advanced,
MomentsAccountant,
}
impl PrivacyBudget {
pub fn new(epsilon: f64, delta: f64) -> Self {
assert!(epsilon > 0.0, "epsilon must be positive");
assert!(delta >= 0.0 && delta < 1.0, "delta must be in [0, 1)");
Self {
epsilon,
delta,
used_epsilon: 0.0,
used_delta: 0.0,
composition_type: CompositionType::Basic,
}
}
pub fn with_composition(mut self, composition_type: CompositionType) -> Self {
self.composition_type = composition_type;
self
}
pub fn can_answer(&self, query_epsilon: f64, query_delta: f64) -> bool {
match self.composition_type {
CompositionType::Basic => {
self.used_epsilon + query_epsilon <= self.epsilon
&& self.used_delta + query_delta <= self.delta
}
CompositionType::Advanced => {
let advanced_epsilon =
(query_epsilon * (2.0 * (1.0 / self.delta).ln()).sqrt()).min(query_epsilon);
self.used_epsilon + advanced_epsilon <= self.epsilon
&& self.used_delta + query_delta <= self.delta
}
CompositionType::MomentsAccountant => {
self.used_epsilon + query_epsilon * 0.8 <= self.epsilon
&& self.used_delta + query_delta <= self.delta
}
}
}
pub fn consume(&mut self, query_epsilon: f64, query_delta: f64) -> Result<()> {
if !self.can_answer(query_epsilon, query_delta) {
return Err(DataError::privacy_budget_exceeded(format!(
"Insufficient privacy budget: need ({}, {}), have ({}, {})",
query_epsilon,
query_delta,
self.epsilon - self.used_epsilon,
self.delta - self.used_delta
)));
}
self.used_epsilon += query_epsilon;
self.used_delta += query_delta;
Ok(())
}
pub fn remaining(&self) -> (f64, f64) {
(
self.epsilon - self.used_epsilon,
self.delta - self.used_delta,
)
}
pub fn reset(&mut self) {
self.used_epsilon = 0.0;
self.used_delta = 0.0;
}
}
pub struct PrivateDataset<D> {
dataset: D,
privacy_budget: PrivacyBudget,
dp_mechanism: DPMechanism,
noise_generator: Box<dyn NoiseGenerator + Send>,
access_count: usize,
max_accesses: Option<usize>,
}
impl<D: Dataset<Item = torsh_tensor::Tensor>> PrivateDataset<D> {
pub fn new(dataset: D, privacy_budget: PrivacyBudget, dp_mechanism: DPMechanism) -> Self {
let noise_generator: Box<dyn NoiseGenerator + Send> = match dp_mechanism {
DPMechanism::Laplace { .. } => Box::new(LaplaceNoise::new()),
DPMechanism::Gaussian { .. } => Box::new(GaussianNoise::new()),
_ => Box::new(LaplaceNoise::new()),
};
Self {
dataset,
privacy_budget,
dp_mechanism,
noise_generator,
access_count: 0,
max_accesses: None,
}
}
pub fn with_max_accesses(mut self, max_accesses: usize) -> Self {
self.max_accesses = Some(max_accesses);
self
}
pub fn private_get(&mut self, index: usize) -> Result<Option<torsh_tensor::Tensor>> {
if let Some(max) = self.max_accesses {
if self.access_count >= max {
return Err(DataError::privacy_access_limit_exceeded(format!(
"Maximum accesses ({}) exceeded",
max
)));
}
}
let (query_epsilon, query_delta) = self.get_query_cost();
if !self.privacy_budget.can_answer(query_epsilon, query_delta) {
return Ok(None); }
let sample = self.dataset.get(index)?;
let private_sample = self.apply_dp_mechanism(sample)?;
self.privacy_budget.consume(query_epsilon, query_delta)?;
self.access_count += 1;
Ok(Some(private_sample))
}
fn get_query_cost(&self) -> (f64, f64) {
match &self.dp_mechanism {
DPMechanism::Laplace { epsilon, .. }
| DPMechanism::Exponential { epsilon, .. }
| DPMechanism::ReportNoisyMax { epsilon } => (*epsilon / 1000.0, 0.0),
DPMechanism::Gaussian { epsilon, delta, .. } => (*epsilon / 1000.0, *delta / 1000.0),
}
}
fn apply_dp_mechanism(&mut self, tensor: torsh_tensor::Tensor) -> Result<torsh_tensor::Tensor> {
match &self.dp_mechanism {
DPMechanism::Laplace {
sensitivity,
epsilon,
} => self.add_laplace_noise(tensor, *sensitivity / epsilon),
DPMechanism::Gaussian {
sensitivity,
epsilon,
delta,
} => {
let sigma = sensitivity * (2.0 * (1.25 / delta).ln()).sqrt() / epsilon;
self.add_gaussian_noise(tensor, sigma)
}
DPMechanism::Exponential { .. } => {
self.add_laplace_noise(tensor, 0.01)
}
DPMechanism::ReportNoisyMax { epsilon } => {
self.add_laplace_noise(tensor, 1.0 / epsilon)
}
}
}
fn add_laplace_noise(
&mut self,
tensor: torsh_tensor::Tensor,
scale: f64,
) -> Result<torsh_tensor::Tensor> {
let shape: Vec<usize> = tensor.shape().dims().to_vec();
let _noise = self
.noise_generator
.generate_laplace_tensor(&shape, scale)?;
Ok(tensor)
}
fn add_gaussian_noise(
&mut self,
tensor: torsh_tensor::Tensor,
sigma: f64,
) -> Result<torsh_tensor::Tensor> {
let shape: Vec<usize> = tensor.shape().dims().to_vec();
let _noise = self
.noise_generator
.generate_gaussian_tensor(&shape, 0.0, sigma)?;
Ok(tensor)
}
pub fn budget_status(&self) -> (f64, f64) {
self.privacy_budget.remaining()
}
pub fn reset_privacy(&mut self) {
self.privacy_budget.reset();
self.access_count = 0;
}
}
impl<D: Dataset<Item = torsh_tensor::Tensor>> Dataset for PrivateDataset<D> {
type Item = torsh_tensor::Tensor;
fn len(&self) -> usize {
self.dataset.len()
}
fn get(&self, _index: usize) -> std::result::Result<Self::Item, torsh_core::TorshError> {
Err(torsh_core::TorshError::Other(
"Use private_get() for privacy-preserving access".to_string(),
))
}
}
pub trait NoiseGenerator: Send + Sync {
fn generate_laplace_tensor(
&mut self,
shape: &[usize],
scale: f64,
) -> Result<torsh_tensor::Tensor>;
fn generate_gaussian_tensor(
&mut self,
shape: &[usize],
mean: f64,
std: f64,
) -> Result<torsh_tensor::Tensor>;
}
#[derive(Clone, Debug)]
pub struct LaplaceNoise {
seed: u64,
}
unsafe impl Send for LaplaceNoise {}
unsafe impl Sync for LaplaceNoise {}
impl LaplaceNoise {
pub fn new() -> Self {
Self { seed: 42 }
}
pub fn with_seed(seed: u64) -> Self {
Self { seed }
}
}
impl NoiseGenerator for LaplaceNoise {
fn generate_laplace_tensor(
&mut self,
shape: &[usize],
scale: f64,
) -> Result<torsh_tensor::Tensor> {
let size: usize = shape.iter().product();
let mut rng = scirs2_core::random::Random::seed(self.seed);
let data: Vec<f32> = (0..size)
.map(|_| {
let u1: f64 = rng.random();
let u2: f64 = rng.random();
let sign = if u1 < 0.5 { -1.0 } else { 1.0 };
(sign * scale * (-u2.ln())) as f32
})
.collect();
torsh_tensor::Tensor::from_data(data, shape.to_vec(), torsh_core::DeviceType::Cpu).map_err(
|e| DataError::tensor_creation_failed(format!("Failed to create tensor: {}", e)),
)
}
fn generate_gaussian_tensor(
&mut self,
shape: &[usize],
mean: f64,
std: f64,
) -> Result<torsh_tensor::Tensor> {
let size: usize = shape.iter().product();
let normal = RandNormal::new(mean, std).map_err(|e| {
DataError::tensor_creation_failed(format!(
"Failed to create RandNormal distribution: {}",
e
))
})?;
let mut rng = scirs2_core::random::Random::seed(self.seed);
let data: Vec<f32> = (0..size).map(|_| normal.sample(&mut rng) as f32).collect();
torsh_tensor::Tensor::from_data(data, shape.to_vec(), torsh_core::DeviceType::Cpu).map_err(
|e| DataError::tensor_creation_failed(format!("Failed to create tensor: {}", e)),
)
}
}
#[derive(Clone, Debug)]
pub struct GaussianNoise {
seed: u64,
}
unsafe impl Send for GaussianNoise {}
unsafe impl Sync for GaussianNoise {}
impl GaussianNoise {
pub fn new() -> Self {
Self { seed: 42 }
}
pub fn with_seed(seed: u64) -> Self {
Self { seed }
}
}
impl NoiseGenerator for GaussianNoise {
fn generate_laplace_tensor(
&mut self,
shape: &[usize],
scale: f64,
) -> Result<torsh_tensor::Tensor> {
self.generate_gaussian_tensor(shape, 0.0, scale * std::f64::consts::SQRT_2)
}
fn generate_gaussian_tensor(
&mut self,
shape: &[usize],
mean: f64,
std: f64,
) -> Result<torsh_tensor::Tensor> {
let size: usize = shape.iter().product();
let normal = RandNormal::new(mean, std).map_err(|e| {
DataError::tensor_creation_failed(format!(
"Failed to create RandNormal distribution: {}",
e
))
})?;
let mut rng = scirs2_core::random::Random::seed(self.seed);
let data: Vec<f32> = (0..size).map(|_| normal.sample(&mut rng) as f32).collect();
torsh_tensor::Tensor::from_data(data, shape.to_vec(), torsh_core::DeviceType::Cpu).map_err(
|e| DataError::tensor_creation_failed(format!("Failed to create tensor: {}", e)),
)
}
}
pub struct PrivateSampler<S: Sampler> {
base_sampler: S,
privacy_budget: PrivacyBudget,
dp_mechanism: DPMechanism,
sample_access_counts: HashMap<usize, usize>,
max_sample_accesses: usize,
}
impl<S: Sampler> PrivateSampler<S> {
pub fn new(base_sampler: S, privacy_budget: PrivacyBudget, dp_mechanism: DPMechanism) -> Self {
Self {
base_sampler,
privacy_budget,
dp_mechanism,
sample_access_counts: HashMap::new(),
max_sample_accesses: 10, }
}
pub fn with_max_sample_accesses(mut self, max_accesses: usize) -> Self {
self.max_sample_accesses = max_accesses;
self
}
pub fn private_iter(&mut self) -> Result<Vec<usize>> {
let (query_epsilon, query_delta) = (0.01, 0.0);
if !self.privacy_budget.can_answer(query_epsilon, query_delta) {
return Err(DataError::privacy_budget_exceeded(
"Insufficient budget for sampling operation",
));
}
let mut indices: Vec<usize> = self.base_sampler.iter().collect();
indices.retain(|&idx| {
self.sample_access_counts.get(&idx).unwrap_or(&0) < &self.max_sample_accesses
});
if matches!(self.dp_mechanism, DPMechanism::ReportNoisyMax { .. }) {
self.add_sampling_noise(&mut indices)?;
}
for &idx in &indices {
*self.sample_access_counts.entry(idx).or_insert(0) += 1;
}
self.privacy_budget.consume(query_epsilon, query_delta)?;
Ok(indices)
}
fn add_sampling_noise(&self, indices: &mut Vec<usize>) -> Result<()> {
if let DPMechanism::ReportNoisyMax { epsilon } = &self.dp_mechanism {
let mut rng = scirs2_core::random::Random::seed(42);
let noise_level = (1.0 / epsilon * indices.len() as f64 * 0.1) as usize;
for _ in 0..noise_level.min(indices.len() / 10) {
use scirs2_core::rand_prelude::SliceRandom;
indices.shuffle(&mut rng);
}
}
Ok(())
}
pub fn budget_status(&self) -> (f64, f64) {
self.privacy_budget.remaining()
}
pub fn reset_privacy(&mut self) {
self.privacy_budget.reset();
self.sample_access_counts.clear();
}
}
impl<S: Sampler> Sampler for PrivateSampler<S> {
type Iter = SamplerIterator;
fn iter(&self) -> Self::Iter {
SamplerIterator::new(vec![])
}
fn len(&self) -> usize {
self.base_sampler.len()
}
}
pub struct PrivacyBuilder {
epsilon: f64,
delta: f64,
mechanism: DPMechanism,
composition_type: CompositionType,
max_accesses: Option<usize>,
max_sample_accesses: Option<usize>,
}
impl PrivacyBuilder {
pub fn new(epsilon: f64) -> Self {
Self {
epsilon,
delta: 1e-5, mechanism: DPMechanism::Laplace {
sensitivity: 1.0,
epsilon,
},
composition_type: CompositionType::Basic,
max_accesses: None,
max_sample_accesses: None,
}
}
pub fn delta(mut self, delta: f64) -> Self {
self.delta = delta;
self
}
pub fn mechanism(mut self, mechanism: DPMechanism) -> Self {
self.mechanism = mechanism;
self
}
pub fn composition_type(mut self, composition_type: CompositionType) -> Self {
self.composition_type = composition_type;
self
}
pub fn max_accesses(mut self, max_accesses: usize) -> Self {
self.max_accesses = Some(max_accesses);
self
}
pub fn max_sample_accesses(mut self, max_sample_accesses: usize) -> Self {
self.max_sample_accesses = Some(max_sample_accesses);
self
}
pub fn build_dataset<D: Dataset<Item = torsh_tensor::Tensor>>(
self,
dataset: D,
) -> PrivateDataset<D> {
let privacy_budget =
PrivacyBudget::new(self.epsilon, self.delta).with_composition(self.composition_type);
let mut private_dataset = PrivateDataset::new(dataset, privacy_budget, self.mechanism);
if let Some(max_accesses) = self.max_accesses {
private_dataset = private_dataset.with_max_accesses(max_accesses);
}
private_dataset
}
pub fn build_sampler<S: Sampler>(self, sampler: S) -> PrivateSampler<S> {
let privacy_budget =
PrivacyBudget::new(self.epsilon, self.delta).with_composition(self.composition_type);
let mut private_sampler = PrivateSampler::new(sampler, privacy_budget, self.mechanism);
if let Some(max_sample_accesses) = self.max_sample_accesses {
private_sampler = private_sampler.with_max_sample_accesses(max_sample_accesses);
}
private_sampler
}
}
pub mod dp_utils {
use super::*;
pub fn basic_composition_cost(
k: usize,
epsilon_per_query: f64,
delta_per_query: f64,
) -> (f64, f64) {
(k as f64 * epsilon_per_query, k as f64 * delta_per_query)
}
pub fn laplace_noise_scale(sensitivity: f64, epsilon: f64) -> f64 {
sensitivity / epsilon
}
pub fn gaussian_noise_scale(sensitivity: f64, epsilon: f64, delta: f64) -> f64 {
sensitivity * (2.0 * (1.25 / delta).ln()).sqrt() / epsilon
}
pub fn validate_privacy_parameters(epsilon: f64, delta: f64) -> Result<()> {
if epsilon <= 0.0 {
return Err(DataError::invalid_privacy_parameter(
"epsilon must be positive",
));
}
if delta < 0.0 || delta >= 1.0 {
return Err(DataError::invalid_privacy_parameter(
"delta must be in [0, 1)",
));
}
Ok(())
}
pub fn estimate_privacy_loss(
num_queries: usize,
epsilon_per_query: f64,
delta_per_query: f64,
composition_type: &CompositionType,
) -> (f64, f64) {
match composition_type {
CompositionType::Basic => {
basic_composition_cost(num_queries, epsilon_per_query, delta_per_query)
}
CompositionType::Advanced => {
let k = num_queries as f64;
let epsilon_total =
epsilon_per_query * (2.0 * k * (1.0 / delta_per_query).ln()).sqrt();
(epsilon_total, k * delta_per_query)
}
CompositionType::MomentsAccountant => {
let k = num_queries as f64;
let epsilon_total = epsilon_per_query * (k * 0.5).sqrt();
(epsilon_total, k * delta_per_query)
}
}
}
}
#[allow(dead_code)]
struct SingleTensorDataset<T: torsh_core::dtype::TensorElement> {
inner: crate::dataset::TensorDataset<T>,
}
#[allow(dead_code)]
impl<T: torsh_core::dtype::TensorElement> SingleTensorDataset<T> {
fn new(inner: crate::dataset::TensorDataset<T>) -> Self {
Self { inner }
}
}
impl<T: torsh_core::dtype::TensorElement> Dataset for SingleTensorDataset<T> {
type Item = torsh_tensor::Tensor<T>;
fn get(&self, idx: usize) -> torsh_core::error::Result<Self::Item> {
let vec = self.inner.get(idx)?;
vec.into_iter()
.next()
.ok_or_else(|| torsh_core::TorshError::Other("Dataset contains no tensors".to_string()))
}
fn len(&self) -> usize {
self.inner.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::dataset::TensorDataset;
use torsh_tensor::creation::ones;
#[test]
fn test_privacy_budget() {
let mut budget = PrivacyBudget::new(1.0, 1e-5);
assert!(budget.can_answer(0.5, 0.0));
assert!(budget.consume(0.5, 0.0).is_ok());
let (remaining_eps, remaining_delta) = budget.remaining();
assert!((remaining_eps - 0.5).abs() < 1e-10);
assert!((remaining_delta - 1e-5).abs() < 1e-10);
assert!(!budget.can_answer(0.6, 0.0));
}
#[test]
fn test_dp_mechanisms() {
let mechanisms = vec![
DPMechanism::Laplace {
sensitivity: 1.0,
epsilon: 1.0,
},
DPMechanism::Gaussian {
sensitivity: 1.0,
epsilon: 1.0,
delta: 1e-5,
},
DPMechanism::Exponential {
sensitivity: 1.0,
epsilon: 1.0,
},
DPMechanism::ReportNoisyMax { epsilon: 1.0 },
];
for mechanism in mechanisms {
let budget = PrivacyBudget::new(1.0, 1e-5);
let data = ones::<f32>(&[10, 5]).unwrap();
let dataset = SingleTensorDataset::new(TensorDataset::from_tensor(data));
let _private_dataset = PrivateDataset::new(dataset, budget, mechanism);
}
}
#[test]
fn test_noise_generators() {
let mut laplace_gen = LaplaceNoise::with_seed(42);
let mut gaussian_gen = GaussianNoise::with_seed(42);
let shape = &[5, 3];
let laplace_noise = laplace_gen.generate_laplace_tensor(shape, 1.0).unwrap();
assert_eq!(laplace_noise.shape().dims(), shape);
let gaussian_noise = gaussian_gen
.generate_gaussian_tensor(shape, 0.0, 1.0)
.unwrap();
assert_eq!(gaussian_noise.shape().dims(), shape);
}
#[test]
fn test_privacy_builder() {
let data = ones::<f32>(&[10, 5]).unwrap();
let dataset = SingleTensorDataset::new(TensorDataset::from_tensor(data));
let private_dataset = PrivacyBuilder::new(1.0)
.delta(1e-5)
.mechanism(DPMechanism::Laplace {
sensitivity: 1.0,
epsilon: 1.0,
})
.max_accesses(100)
.build_dataset(dataset);
assert_eq!(private_dataset.len(), 10);
let (eps, delta) = private_dataset.budget_status();
assert!((eps - 1.0).abs() < 1e-10);
assert!((delta - 1e-5).abs() < 1e-10);
}
#[test]
fn test_dp_utils() {
use dp_utils::*;
assert!(validate_privacy_parameters(1.0, 1e-5).is_ok());
assert!(validate_privacy_parameters(-1.0, 1e-5).is_err());
assert!(validate_privacy_parameters(1.0, 1.0).is_err());
let scale = laplace_noise_scale(1.0, 1.0);
assert!((scale - 1.0).abs() < 1e-10);
let (eps, delta) = basic_composition_cost(10, 0.1, 0.0);
assert!((eps - 1.0).abs() < 1e-10);
assert!((delta - 0.0).abs() < 1e-10);
}
}