ghostflow_nn/
differential_privacy.rs

1//! Differential Privacy for Machine Learning
2//!
3//! Implements privacy-preserving machine learning techniques:
4//! - DP-SGD (Differentially Private Stochastic Gradient Descent)
5//! - Gradient clipping and noise addition
6//! - Privacy budget tracking (epsilon, delta)
7//! - Moments accountant for tight privacy bounds
8//! - PATE (Private Aggregation of Teacher Ensembles)
9
10use ghostflow_core::Tensor;
11use rand::Rng;
12use rand_distr::{Distribution, Normal};
13use std::collections::VecDeque;
14
15/// Differential privacy configuration
16#[derive(Debug, Clone)]
17pub struct DPConfig {
18    /// Target epsilon (privacy budget)
19    pub target_epsilon: f32,
20    /// Target delta (failure probability)
21    pub target_delta: f32,
22    /// Gradient clipping norm
23    pub clip_norm: f32,
24    /// Noise multiplier
25    pub noise_multiplier: f32,
26    /// Batch size for sampling
27    pub batch_size: usize,
28    /// Total number of training examples
29    pub num_examples: usize,
30}
31
32impl Default for DPConfig {
33    fn default() -> Self {
34        DPConfig {
35            target_epsilon: 1.0,
36            target_delta: 1e-5,
37            clip_norm: 1.0,
38            noise_multiplier: 1.1,
39            batch_size: 256,
40            num_examples: 60000,
41        }
42    }
43}
44
45/// Privacy accountant for tracking privacy budget
46pub struct PrivacyAccountant {
47    config: DPConfig,
48    steps: usize,
49    epsilon_spent: f32,
50    moments: VecDeque<f32>,
51}
52
53impl PrivacyAccountant {
54    /// Create new privacy accountant
55    pub fn new(config: DPConfig) -> Self {
56        PrivacyAccountant {
57            config,
58            steps: 0,
59            epsilon_spent: 0.0,
60            moments: VecDeque::new(),
61        }
62    }
63    
64    /// Record a training step
65    pub fn step(&mut self) {
66        self.steps += 1;
67        
68        // Compute privacy loss for this step using moments accountant
69        let q = self.config.batch_size as f32 / self.config.num_examples as f32;
70        let sigma = self.config.noise_multiplier;
71        
72        // Simplified moments accountant (real implementation would be more complex)
73        let moment = q * q / (2.0 * sigma * sigma);
74        self.moments.push_back(moment);
75        
76        // Keep only recent moments (sliding window)
77        if self.moments.len() > 1000 {
78            self.moments.pop_front();
79        }
80        
81        // Update epsilon spent
82        self.epsilon_spent = self.compute_epsilon();
83    }
84    
85    /// Compute current epsilon using moments accountant
86    fn compute_epsilon(&self) -> f32 {
87        if self.steps == 0 {
88            return 0.0;
89        }
90        
91        let q = self.config.batch_size as f32 / self.config.num_examples as f32;
92        let sigma = self.config.noise_multiplier;
93        let steps = self.steps as f32;
94        
95        // Simplified epsilon calculation (real implementation uses RDP)
96        let epsilon = (q * steps).sqrt() / sigma + 
97                     (self.config.target_delta.ln() / steps).abs();
98        
99        epsilon.min(self.config.target_epsilon * 2.0) // Cap at 2x target
100    }
101    
102    /// Check if privacy budget is exhausted
103    pub fn is_budget_exhausted(&self) -> bool {
104        self.epsilon_spent >= self.config.target_epsilon
105    }
106    
107    /// Get current privacy parameters
108    pub fn get_privacy_spent(&self) -> (f32, f32) {
109        (self.epsilon_spent, self.config.target_delta)
110    }
111    
112    /// Get remaining privacy budget
113    pub fn get_remaining_budget(&self) -> f32 {
114        (self.config.target_epsilon - self.epsilon_spent).max(0.0)
115    }
116}
117
118/// DP-SGD optimizer wrapper
119pub struct DPSGDOptimizer {
120    config: DPConfig,
121    accountant: PrivacyAccountant,
122    rng: rand::rngs::ThreadRng,
123}
124
125impl DPSGDOptimizer {
126    /// Create new DP-SGD optimizer
127    pub fn new(config: DPConfig) -> Self {
128        let accountant = PrivacyAccountant::new(config.clone());
129        DPSGDOptimizer {
130            config,
131            accountant,
132            rng: rand::thread_rng(),
133        }
134    }
135    
136    /// Clip gradients to bound sensitivity
137    pub fn clip_gradients(&self, gradients: &Tensor) -> Result<Tensor, String> {
138        let grad_data = gradients.data_f32();
139        
140        // Compute L2 norm of gradients
141        let mut norm_sq = 0.0f32;
142        for &g in grad_data.iter() {
143            norm_sq += g * g;
144        }
145        let norm = norm_sq.sqrt();
146        
147        // Clip if norm exceeds threshold
148        let clipped_data = if norm > self.config.clip_norm {
149            let scale = self.config.clip_norm / norm;
150            grad_data.iter().map(|&g| g * scale).collect()
151        } else {
152            grad_data
153        };
154        
155        Tensor::from_slice(&clipped_data, gradients.dims())
156            .map_err(|e| format!("Failed to create clipped tensor: {:?}", e))
157    }
158    
159    /// Add calibrated Gaussian noise to gradients
160    pub fn add_noise(&mut self, gradients: &Tensor) -> Result<Tensor, String> {
161        let grad_data = gradients.data_f32();
162        
163        // Compute noise scale
164        let noise_scale = self.config.clip_norm * self.config.noise_multiplier;
165        let normal = Normal::new(0.0, noise_scale as f64)
166            .map_err(|e| format!("Failed to create normal distribution: {}", e))?;
167        
168        // Add Gaussian noise to each gradient
169        let noisy_data: Vec<f32> = grad_data.iter().map(|&g| {
170            let noise = normal.sample(&mut self.rng) as f32;
171            g + noise
172        }).collect();
173        
174        Tensor::from_slice(&noisy_data, gradients.dims())
175            .map_err(|e| format!("Failed to create noisy tensor: {:?}", e))
176    }
177    
178    /// Process gradients with DP-SGD (clip + noise)
179    pub fn process_gradients(&mut self, gradients: &Tensor) -> Result<Tensor, String> {
180        // Step 1: Clip gradients
181        let clipped = self.clip_gradients(gradients)?;
182        
183        // Step 2: Add noise
184        let noisy = self.add_noise(&clipped)?;
185        
186        // Step 3: Update privacy accountant
187        self.accountant.step();
188        
189        Ok(noisy)
190    }
191    
192    /// Check if training should stop due to privacy budget
193    pub fn should_stop(&self) -> bool {
194        self.accountant.is_budget_exhausted()
195    }
196    
197    /// Get privacy spent
198    pub fn get_privacy_spent(&self) -> (f32, f32) {
199        self.accountant.get_privacy_spent()
200    }
201    
202    /// Get remaining budget
203    pub fn get_remaining_budget(&self) -> f32 {
204        self.accountant.get_remaining_budget()
205    }
206}
207
208/// PATE (Private Aggregation of Teacher Ensembles)
209pub struct PATEEnsemble {
210    num_teachers: usize,
211    num_classes: usize,
212    epsilon: f32,
213    delta: f32,
214}
215
216impl PATEEnsemble {
217    /// Create new PATE ensemble
218    pub fn new(num_teachers: usize, num_classes: usize, epsilon: f32, delta: f32) -> Self {
219        PATEEnsemble {
220            num_teachers,
221            num_classes,
222            epsilon,
223            delta,
224        }
225    }
226    
227    /// Aggregate teacher predictions with privacy
228    pub fn aggregate_predictions(&self, teacher_votes: &[Vec<usize>]) -> Result<Vec<usize>, String> {
229        if teacher_votes.is_empty() {
230            return Err("No teacher votes provided".to_string());
231        }
232        
233        let num_samples = teacher_votes[0].len();
234        let mut aggregated = Vec::with_capacity(num_samples);
235        let mut rng = rand::thread_rng();
236        
237        // For each sample
238        for i in 0..num_samples {
239            // Count votes for each class
240            let mut counts = vec![0usize; self.num_classes];
241            for teacher_preds in teacher_votes.iter() {
242                if i < teacher_preds.len() {
243                    let pred = teacher_preds[i];
244                    if pred < self.num_classes {
245                        counts[pred] += 1;
246                    }
247                }
248            }
249            
250            // Add Laplace noise for privacy
251            let sensitivity = 1.0; // One teacher can change count by at most 1
252            let scale = sensitivity / self.epsilon;
253            
254            let mut noisy_counts = Vec::with_capacity(self.num_classes);
255            for &count in counts.iter() {
256                // Sample from Laplace distribution
257                let u: f32 = rng.gen_range(-0.5..0.5);
258                let noise = -scale * u.signum() * (1.0 - 2.0 * u.abs()).ln();
259                let noisy_count = count as f32 + noise;
260                noisy_counts.push(noisy_count);
261            }
262            
263            // Find class with maximum noisy count
264            let pred_class = noisy_counts.iter()
265                .enumerate()
266                .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
267                .map(|(idx, _)| idx)
268                .unwrap_or(0);
269            
270            aggregated.push(pred_class);
271        }
272        
273        Ok(aggregated)
274    }
275    
276    /// Compute privacy cost of aggregation
277    pub fn compute_privacy_cost(&self, num_queries: usize) -> (f32, f32) {
278        // Simplified privacy analysis
279        let epsilon_total = self.epsilon * (num_queries as f32).sqrt();
280        let delta_total = self.delta * num_queries as f32;
281        (epsilon_total, delta_total)
282    }
283}
284
285/// Local differential privacy for data collection
286pub struct LocalDP {
287    epsilon: f32,
288}
289
290impl LocalDP {
291    /// Create new local DP mechanism
292    pub fn new(epsilon: f32) -> Self {
293        LocalDP { epsilon }
294    }
295    
296    /// Randomized response for binary data
297    pub fn randomized_response(&self, value: bool) -> bool {
298        let mut rng = rand::thread_rng();
299        let p = (self.epsilon.exp()) / (self.epsilon.exp() + 1.0);
300        
301        let flip_prob: f32 = rng.gen();
302        if flip_prob < p {
303            value
304        } else {
305            !value
306        }
307    }
308    
309    /// Add Laplace noise to numeric data
310    pub fn add_laplace_noise(&self, value: f32, sensitivity: f32) -> f32 {
311        let mut rng = rand::thread_rng();
312        let scale = sensitivity / self.epsilon;
313        
314        let u: f32 = rng.gen_range(-0.5..0.5);
315        let noise = -scale * u.signum() * (1.0 - 2.0 * u.abs()).ln();
316        
317        value + noise
318    }
319    
320    /// Privatize a vector of values
321    pub fn privatize_vector(&self, values: &[f32], sensitivity: f32) -> Vec<f32> {
322        values.iter()
323            .map(|&v| self.add_laplace_noise(v, sensitivity))
324            .collect()
325    }
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    #[test]
333    fn test_privacy_accountant() {
334        let config = DPConfig::default();
335        let mut accountant = PrivacyAccountant::new(config);
336        
337        // Initially no privacy spent
338        assert_eq!(accountant.steps, 0);
339        assert_eq!(accountant.epsilon_spent, 0.0);
340        
341        // After steps, privacy is spent
342        for _ in 0..100 {
343            accountant.step();
344        }
345        
346        assert!(accountant.epsilon_spent > 0.0);
347        assert!(accountant.epsilon_spent <= accountant.config.target_epsilon * 2.0);
348    }
349    
350    #[test]
351    fn test_gradient_clipping() {
352        let config = DPConfig {
353            clip_norm: 1.0,
354            ..Default::default()
355        };
356        let optimizer = DPSGDOptimizer::new(config);
357        
358        let gradients = Tensor::from_slice(&[2.0, 3.0, 4.0], &[3]).unwrap();
359        let clipped = optimizer.clip_gradients(&gradients).unwrap();
360        
361        // Check that norm is clipped to 1.0
362        let data = clipped.data_f32();
363        let norm: f32 = data.iter().map(|x| x * x).sum::<f32>().sqrt();
364        assert!((norm - 1.0).abs() < 1e-5);
365    }
366    
367    #[test]
368    fn test_pate_aggregation() {
369        let pate = PATEEnsemble::new(10, 3, 1.0, 1e-5);
370        
371        // 10 teachers, 5 samples, 3 classes
372        let teacher_votes = vec![
373            vec![0, 1, 2, 0, 1],
374            vec![0, 1, 2, 0, 1],
375            vec![0, 1, 2, 1, 1],
376            vec![0, 1, 2, 0, 1],
377            vec![0, 1, 2, 0, 2],
378            vec![0, 1, 1, 0, 1],
379            vec![0, 1, 2, 0, 1],
380            vec![0, 1, 2, 0, 1],
381            vec![0, 1, 2, 0, 1],
382            vec![0, 1, 2, 0, 1],
383        ];
384        
385        let result = pate.aggregate_predictions(&teacher_votes).unwrap();
386        assert_eq!(result.len(), 5);
387    }
388    
389    #[test]
390    fn test_local_dp() {
391        let ldp = LocalDP::new(1.0);
392        
393        // Test randomized response
394        let value = true;
395        let _ = ldp.randomized_response(value);
396        
397        // Test Laplace noise
398        let noisy = ldp.add_laplace_noise(10.0, 1.0);
399        assert!((noisy - 10.0).abs() < 10.0); // Noise should be bounded
400        
401        // Test vector privatization
402        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
403        let privatized = ldp.privatize_vector(&values, 1.0);
404        assert_eq!(privatized.len(), values.len());
405    }
406}