chie_crypto/
differential_privacy.rs

1//! Differential Privacy primitives for privacy-preserving data analysis
2//!
3//! Differential privacy provides mathematical guarantees that the output of a computation
4//! does not reveal too much about any individual input. This module implements:
5//! - Laplace mechanism for numeric queries
6//! - Exponential mechanism for non-numeric queries
7//! - Gaussian mechanism for improved utility
8//! - Privacy budget tracking and composition
9//!
10//! # Example
11//!
12//! ```
13//! use chie_crypto::differential_privacy::*;
14//!
15//! // Create a Laplace mechanism with epsilon = 1.0
16//! let mechanism = LaplaceMechanism::new(1.0).unwrap();
17//!
18//! // Add noise to a sum query with sensitivity 1.0
19//! let true_sum = 1000.0;
20//! let noisy_sum = mechanism.add_noise(true_sum, 1.0).unwrap();
21//!
22//! // The noisy result is approximately 1000 but not exact
23//! assert!((noisy_sum - true_sum).abs() < 100.0); // Usually within 100
24//! ```
25
26use rand::Rng;
27use serde::{Deserialize, Serialize};
28use thiserror::Error;
29
30/// Differential privacy error types
31#[derive(Error, Debug)]
32pub enum DPError {
33    #[error("Invalid epsilon: {0}")]
34    InvalidEpsilon(String),
35    #[error("Invalid delta: {0}")]
36    InvalidDelta(String),
37    #[error("Invalid sensitivity: {0}")]
38    InvalidSensitivity(String),
39    #[error("Privacy budget exceeded")]
40    BudgetExceeded,
41    #[error("Invalid probability: {0}")]
42    InvalidProbability(String),
43}
44
45/// Result type for differential privacy operations
46pub type DPResult<T> = Result<T, DPError>;
47
48/// Laplace mechanism for differential privacy
49///
50/// Adds noise drawn from Laplace distribution with scale parameter determined by
51/// the privacy parameter epsilon and the query's sensitivity.
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct LaplaceMechanism {
54    /// Privacy parameter (smaller = more private)
55    epsilon: f64,
56}
57
58impl LaplaceMechanism {
59    /// Create a new Laplace mechanism with given epsilon
60    ///
61    /// # Arguments
62    /// * `epsilon` - Privacy parameter (must be positive)
63    pub fn new(epsilon: f64) -> DPResult<Self> {
64        if epsilon <= 0.0 {
65            return Err(DPError::InvalidEpsilon(
66                "epsilon must be positive".to_string(),
67            ));
68        }
69
70        Ok(Self { epsilon })
71    }
72
73    /// Add Laplace noise to a value
74    ///
75    /// # Arguments
76    /// * `true_value` - The true value to add noise to
77    /// * `sensitivity` - The global sensitivity of the query
78    ///
79    /// # Returns
80    /// The value with added Laplace noise
81    pub fn add_noise(&self, true_value: f64, sensitivity: f64) -> DPResult<f64> {
82        if sensitivity <= 0.0 {
83            return Err(DPError::InvalidSensitivity(
84                "sensitivity must be positive".to_string(),
85            ));
86        }
87
88        let scale = sensitivity / self.epsilon;
89        let noise = sample_laplace(scale);
90
91        Ok(true_value + noise)
92    }
93
94    /// Get the privacy parameter
95    pub fn epsilon(&self) -> f64 {
96        self.epsilon
97    }
98}
99
100/// Gaussian mechanism for differential privacy
101///
102/// Adds noise drawn from Gaussian distribution. Provides better utility than Laplace
103/// for approximate differential privacy (with delta > 0).
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct GaussianMechanism {
106    /// Privacy parameter epsilon
107    epsilon: f64,
108    /// Privacy parameter delta (failure probability)
109    delta: f64,
110}
111
112impl GaussianMechanism {
113    /// Create a new Gaussian mechanism
114    ///
115    /// # Arguments
116    /// * `epsilon` - Privacy parameter (must be positive)
117    /// * `delta` - Failure probability (must be in (0, 1))
118    pub fn new(epsilon: f64, delta: f64) -> DPResult<Self> {
119        if epsilon <= 0.0 {
120            return Err(DPError::InvalidEpsilon(
121                "epsilon must be positive".to_string(),
122            ));
123        }
124
125        if delta <= 0.0 || delta >= 1.0 {
126            return Err(DPError::InvalidDelta("delta must be in (0, 1)".to_string()));
127        }
128
129        Ok(Self { epsilon, delta })
130    }
131
132    /// Add Gaussian noise to a value
133    ///
134    /// # Arguments
135    /// * `true_value` - The true value to add noise to
136    /// * `sensitivity` - The L2 sensitivity of the query
137    ///
138    /// # Returns
139    /// The value with added Gaussian noise
140    pub fn add_noise(&self, true_value: f64, sensitivity: f64) -> DPResult<f64> {
141        if sensitivity <= 0.0 {
142            return Err(DPError::InvalidSensitivity(
143                "sensitivity must be positive".to_string(),
144            ));
145        }
146
147        // Standard deviation for Gaussian noise
148        // sigma = sensitivity * sqrt(2 * ln(1.25 / delta)) / epsilon
149        let sigma = sensitivity * (2.0 * (1.25 / self.delta).ln()).sqrt() / self.epsilon;
150
151        let noise = sample_gaussian(sigma);
152
153        Ok(true_value + noise)
154    }
155
156    /// Get the privacy parameters
157    pub fn epsilon(&self) -> f64 {
158        self.epsilon
159    }
160
161    pub fn delta(&self) -> f64 {
162        self.delta
163    }
164}
165
166/// Exponential mechanism for selecting from a set of candidates
167///
168/// Used when the output is non-numeric (e.g., selecting the best option from a set)
169#[derive(Debug, Clone)]
170pub struct ExponentialMechanism {
171    /// Privacy parameter
172    epsilon: f64,
173}
174
175impl ExponentialMechanism {
176    /// Create a new exponential mechanism
177    ///
178    /// # Arguments
179    /// * `epsilon` - Privacy parameter (must be positive)
180    pub fn new(epsilon: f64) -> DPResult<Self> {
181        if epsilon <= 0.0 {
182            return Err(DPError::InvalidEpsilon(
183                "epsilon must be positive".to_string(),
184            ));
185        }
186
187        Ok(Self { epsilon })
188    }
189
190    /// Select an output from candidates based on their utility scores
191    ///
192    /// # Arguments
193    /// * `candidates` - Vector of candidate outputs
194    /// * `utilities` - Utility scores for each candidate
195    /// * `sensitivity` - Sensitivity of the utility function
196    ///
197    /// # Returns
198    /// Index of the selected candidate
199    pub fn select<T>(
200        &self,
201        candidates: &[T],
202        utilities: &[f64],
203        sensitivity: f64,
204    ) -> DPResult<usize> {
205        if candidates.len() != utilities.len() {
206            return Err(DPError::InvalidProbability(
207                "candidates and utilities must have the same length".to_string(),
208            ));
209        }
210
211        if candidates.is_empty() {
212            return Err(DPError::InvalidProbability(
213                "cannot select from empty set".to_string(),
214            ));
215        }
216
217        if sensitivity <= 0.0 {
218            return Err(DPError::InvalidSensitivity(
219                "sensitivity must be positive".to_string(),
220            ));
221        }
222
223        // Compute probabilities proportional to exp(epsilon * utility / (2 * sensitivity))
224        let mut probabilities = Vec::with_capacity(utilities.len());
225        let mut max_utility = utilities[0];
226
227        for &utility in utilities {
228            if utility > max_utility {
229                max_utility = utility;
230            }
231        }
232
233        // Normalize for numerical stability
234        let mut sum = 0.0;
235        for &utility in utilities {
236            let prob = ((self.epsilon * (utility - max_utility)) / (2.0 * sensitivity)).exp();
237            probabilities.push(prob);
238            sum += prob;
239        }
240
241        // Normalize probabilities
242        for prob in &mut probabilities {
243            *prob /= sum;
244        }
245
246        // Sample from categorical distribution
247        let mut rng = rand::thread_rng();
248        let sample: f64 = rng.gen_range(0.0..1.0);
249
250        let mut cumulative = 0.0;
251        for (i, &prob) in probabilities.iter().enumerate() {
252            cumulative += prob;
253            if sample <= cumulative {
254                return Ok(i);
255            }
256        }
257
258        // Fallback (should not happen due to floating point errors)
259        Ok(candidates.len() - 1)
260    }
261
262    pub fn epsilon(&self) -> f64 {
263        self.epsilon
264    }
265}
266
267/// Privacy budget tracker for composition of multiple queries
268#[derive(Debug, Clone, Serialize, Deserialize)]
269pub struct PrivacyBudget {
270    /// Total epsilon budget
271    total_epsilon: f64,
272    /// Total delta budget (for approximate DP)
273    total_delta: f64,
274    /// Consumed epsilon
275    consumed_epsilon: f64,
276    /// Consumed delta
277    consumed_delta: f64,
278}
279
280impl PrivacyBudget {
281    /// Create a new privacy budget
282    ///
283    /// # Arguments
284    /// * `total_epsilon` - Total epsilon budget
285    /// * `total_delta` - Total delta budget (use 0.0 for pure DP)
286    pub fn new(total_epsilon: f64, total_delta: f64) -> DPResult<Self> {
287        if total_epsilon <= 0.0 {
288            return Err(DPError::InvalidEpsilon(
289                "epsilon must be positive".to_string(),
290            ));
291        }
292
293        if !(0.0..1.0).contains(&total_delta) {
294            return Err(DPError::InvalidDelta("delta must be in [0, 1)".to_string()));
295        }
296
297        Ok(Self {
298            total_epsilon,
299            total_delta,
300            consumed_epsilon: 0.0,
301            consumed_delta: 0.0,
302        })
303    }
304
305    /// Check if a query can be executed with the given privacy cost
306    ///
307    /// # Arguments
308    /// * `epsilon` - Epsilon cost of the query
309    /// * `delta` - Delta cost of the query
310    pub fn can_execute(&self, epsilon: f64, delta: f64) -> bool {
311        self.consumed_epsilon + epsilon <= self.total_epsilon
312            && self.consumed_delta + delta <= self.total_delta
313    }
314
315    /// Consume privacy budget for a query
316    ///
317    /// # Arguments
318    /// * `epsilon` - Epsilon cost of the query
319    /// * `delta` - Delta cost of the query
320    pub fn consume(&mut self, epsilon: f64, delta: f64) -> DPResult<()> {
321        if !self.can_execute(epsilon, delta) {
322            return Err(DPError::BudgetExceeded);
323        }
324
325        self.consumed_epsilon += epsilon;
326        self.consumed_delta += delta;
327
328        Ok(())
329    }
330
331    /// Get remaining epsilon budget
332    pub fn remaining_epsilon(&self) -> f64 {
333        self.total_epsilon - self.consumed_epsilon
334    }
335
336    /// Get remaining delta budget
337    pub fn remaining_delta(&self) -> f64 {
338        self.total_delta - self.consumed_delta
339    }
340
341    /// Reset the budget
342    pub fn reset(&mut self) {
343        self.consumed_epsilon = 0.0;
344        self.consumed_delta = 0.0;
345    }
346}
347
348/// Sample from Laplace distribution with given scale parameter
349fn sample_laplace(scale: f64) -> f64 {
350    let mut rng = rand::thread_rng();
351    let u: f64 = rng.gen_range(-0.5..0.5);
352
353    -scale * u.signum() * (1.0 - 2.0 * u.abs()).ln()
354}
355
356/// Sample from Gaussian distribution with given standard deviation
357fn sample_gaussian(sigma: f64) -> f64 {
358    let mut rng = rand::thread_rng();
359
360    // Box-Muller transform
361    let u1: f64 = rng.gen_range(0.0..1.0);
362    let u2: f64 = rng.gen_range(0.0..1.0);
363
364    let r = (-2.0 * u1.ln()).sqrt();
365    let theta = 2.0 * std::f64::consts::PI * u2;
366
367    sigma * r * theta.cos()
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    #[test]
375    fn test_laplace_mechanism_basic() {
376        let mechanism = LaplaceMechanism::new(1.0).unwrap();
377
378        let true_value = 100.0;
379        let sensitivity = 1.0;
380
381        let noisy_value = mechanism.add_noise(true_value, sensitivity).unwrap();
382
383        // With high probability, noise should be within reasonable bounds
384        assert!((noisy_value - true_value).abs() < 50.0);
385    }
386
387    #[test]
388    fn test_laplace_invalid_epsilon() {
389        let result = LaplaceMechanism::new(-1.0);
390        assert!(result.is_err());
391
392        let result = LaplaceMechanism::new(0.0);
393        assert!(result.is_err());
394    }
395
396    #[test]
397    fn test_laplace_invalid_sensitivity() {
398        let mechanism = LaplaceMechanism::new(1.0).unwrap();
399        let result = mechanism.add_noise(100.0, -1.0);
400        assert!(result.is_err());
401
402        let result = mechanism.add_noise(100.0, 0.0);
403        assert!(result.is_err());
404    }
405
406    #[test]
407    fn test_gaussian_mechanism_basic() {
408        let mechanism = GaussianMechanism::new(1.0, 0.01).unwrap();
409
410        let true_value = 500.0;
411        let sensitivity = 2.0;
412
413        let noisy_value = mechanism.add_noise(true_value, sensitivity).unwrap();
414
415        // With high probability, noise should be within reasonable bounds
416        assert!((noisy_value - true_value).abs() < 100.0);
417    }
418
419    #[test]
420    fn test_gaussian_invalid_params() {
421        let result = GaussianMechanism::new(-1.0, 0.01);
422        assert!(result.is_err());
423
424        let result = GaussianMechanism::new(1.0, 0.0);
425        assert!(result.is_err());
426
427        let result = GaussianMechanism::new(1.0, 1.0);
428        assert!(result.is_err());
429    }
430
431    #[test]
432    fn test_exponential_mechanism() {
433        let mechanism = ExponentialMechanism::new(1.0).unwrap();
434
435        let candidates = vec!["A", "B", "C", "D"];
436        let utilities = vec![1.0, 5.0, 2.0, 1.5]; // B has highest utility
437
438        // Run multiple times to check distribution
439        let mut counts = vec![0; candidates.len()];
440
441        for _ in 0..1000 {
442            let idx = mechanism.select(&candidates, &utilities, 1.0).unwrap();
443            counts[idx] += 1;
444        }
445
446        // B should be selected most often
447        assert!(counts[1] > counts[0]);
448        assert!(counts[1] > counts[2]);
449        assert!(counts[1] > counts[3]);
450    }
451
452    #[test]
453    fn test_exponential_mechanism_errors() {
454        let mechanism = ExponentialMechanism::new(1.0).unwrap();
455
456        // Mismatched lengths
457        let candidates = vec!["A", "B"];
458        let utilities = vec![1.0];
459        let result = mechanism.select(&candidates, &utilities, 1.0);
460        assert!(result.is_err());
461
462        // Empty candidates
463        let candidates: Vec<&str> = vec![];
464        let utilities: Vec<f64> = vec![];
465        let result = mechanism.select(&candidates, &utilities, 1.0);
466        assert!(result.is_err());
467    }
468
469    #[test]
470    fn test_privacy_budget_basic() {
471        let mut budget = PrivacyBudget::new(10.0, 0.01).unwrap();
472
473        assert!(budget.can_execute(5.0, 0.005));
474
475        budget.consume(5.0, 0.005).unwrap();
476        assert_eq!(budget.remaining_epsilon(), 5.0);
477        assert!((budget.remaining_delta() - 0.005).abs() < 1e-10);
478
479        assert!(budget.can_execute(5.0, 0.005));
480
481        budget.consume(5.0, 0.005).unwrap();
482        assert!(budget.remaining_epsilon() < 1e-10);
483        assert!(budget.remaining_delta() < 1e-10);
484    }
485
486    #[test]
487    fn test_privacy_budget_exceeded() {
488        let mut budget = PrivacyBudget::new(1.0, 0.01).unwrap();
489
490        budget.consume(0.5, 0.005).unwrap();
491
492        let result = budget.consume(0.6, 0.005);
493        assert!(result.is_err());
494    }
495
496    #[test]
497    fn test_privacy_budget_reset() {
498        let mut budget = PrivacyBudget::new(1.0, 0.01).unwrap();
499
500        budget.consume(0.5, 0.005).unwrap();
501        assert_eq!(budget.remaining_epsilon(), 0.5);
502
503        budget.reset();
504        assert_eq!(budget.remaining_epsilon(), 1.0);
505        assert_eq!(budget.remaining_delta(), 0.01);
506    }
507
508    #[test]
509    fn test_laplace_noise_distribution() {
510        let mechanism = LaplaceMechanism::new(1.0).unwrap();
511
512        let mut samples = Vec::new();
513        for _ in 0..1000 {
514            let noisy = mechanism.add_noise(0.0, 1.0).unwrap();
515            samples.push(noisy);
516        }
517
518        // Mean should be close to 0
519        let mean: f64 = samples.iter().sum::<f64>() / samples.len() as f64;
520        assert!(mean.abs() < 0.2);
521
522        // Most samples should be within a reasonable range
523        let within_range = samples.iter().filter(|&&x| x.abs() < 5.0).count();
524        assert!(within_range > 900); // > 90%
525    }
526
527    #[test]
528    fn test_gaussian_noise_distribution() {
529        let mechanism = GaussianMechanism::new(1.0, 0.01).unwrap();
530
531        let mut samples = Vec::new();
532        for _ in 0..1000 {
533            let noisy = mechanism.add_noise(0.0, 1.0).unwrap();
534            samples.push(noisy);
535        }
536
537        // Mean should be close to 0
538        let mean: f64 = samples.iter().sum::<f64>() / samples.len() as f64;
539        assert!(mean.abs() < 0.5);
540
541        // Most samples should be within a reasonable range
542        let within_range = samples.iter().filter(|&&x| x.abs() < 10.0).count();
543        assert!(within_range > 900); // > 90%
544    }
545
546    #[test]
547    fn test_laplace_sensitivity_impact() {
548        let mechanism = LaplaceMechanism::new(1.0).unwrap();
549
550        // Higher sensitivity should lead to more noise
551        let _noisy1 = mechanism.add_noise(100.0, 1.0).unwrap();
552        let _noisy2 = mechanism.add_noise(100.0, 10.0).unwrap();
553
554        // Can't guarantee individual samples, but test the mechanism exists
555        assert!(mechanism.epsilon() == 1.0);
556    }
557
558    #[test]
559    fn test_exponential_equal_utilities() {
560        let mechanism = ExponentialMechanism::new(1.0).unwrap();
561
562        let candidates = vec!["A", "B", "C"];
563        let utilities = vec![1.0, 1.0, 1.0]; // All equal
564
565        let mut counts = vec![0; candidates.len()];
566
567        for _ in 0..300 {
568            let idx = mechanism.select(&candidates, &utilities, 1.0).unwrap();
569            counts[idx] += 1;
570        }
571
572        // Should be roughly equal (within reasonable variance)
573        for &count in &counts {
574            assert!(count > 50 && count < 150);
575        }
576    }
577
578    #[test]
579    fn test_privacy_budget_composition() {
580        let mut budget = PrivacyBudget::new(3.0, 0.1).unwrap();
581
582        // Sequential composition
583        budget.consume(1.0, 0.03).unwrap();
584        budget.consume(1.0, 0.03).unwrap();
585        budget.consume(1.0, 0.03).unwrap();
586
587        // Budget should be nearly exhausted
588        assert!(budget.remaining_epsilon() < 1e-10);
589        assert!((budget.remaining_delta() - 0.01).abs() < 1e-10);
590
591        // Cannot execute another query
592        assert!(!budget.can_execute(0.1, 0.01));
593    }
594
595    #[test]
596    fn test_budget_serialization() {
597        let budget = PrivacyBudget::new(5.0, 0.05).unwrap();
598
599        let serialized = crate::codec::encode(&budget).unwrap();
600        let deserialized: PrivacyBudget = crate::codec::decode(&serialized).unwrap();
601
602        assert_eq!(deserialized.total_epsilon, 5.0);
603        assert_eq!(deserialized.total_delta, 0.05);
604    }
605}