diff_priv/noise/laplace/
numerical_noiser.rs

1use crate::data_manipulation::anonymizable::{IntervalType, QuasiIdentifierType};
2use crate::noise::laplace::laplace_noiser::LOC;
3use float_next_after::NextAfter;
4use num::abs;
5use num::integer::Roots;
6use rand::distributions::{Distribution, Uniform};
7use rand::thread_rng;
8use std::collections::VecDeque;
9
10/// Noiser for numerical QI types
11#[derive(Clone)]
12pub struct NumericalNoiser {
13    eps: f64,                      // differential privacy parameter
14    k: usize,                      // k anonymity level
15    history_window: VecDeque<f64>, // vector containing all the previous laplace noises with a maximum of MAX_HISTORY
16    max: f64,                      // maximal value observed in the noiser
17    min: f64,                      // minimal value observed in the noiser
18    qi_amount: f64,                // count of qi's in stream
19    window: usize,                 // window of historic values
20}
21
22impl NumericalNoiser {
23    /// create a new numerical noiser and initialize the first values
24    pub fn initialize(eps: f64, k: usize, qi_amount: f64, interval: &IntervalType) -> Self {
25        let (qi_type, _, _, _) = interval;
26        let value = Self::extract_convert_value(qi_type);
27
28        let window = match k.sqrt() as usize {
29            val if val <= 2 => 2,
30            val if val > 2 => val,
31            _ => panic!("value couldn't be calculated"),
32        };
33
34        Self {
35            eps,
36            k,
37            max: value,
38            min: value,
39            qi_amount,
40            window,
41            ..Default::default()
42        }
43    }
44
45    /// extract the value from a `QuasiIdentifierType` and return the
46    /// (converted to f64) value
47    fn extract_convert_value(interval: &QuasiIdentifierType) -> f64 {
48        match *interval {
49            QuasiIdentifierType::Integer(value) => value as f64,
50            QuasiIdentifierType::Float(value) => value,
51        }
52    }
53
54    /// calculate the noise with an estimate of a scale
55    pub fn generate_noise(&mut self, interval: &IntervalType) -> f64 {
56        let (value, _, _, _) = interval;
57        let value = Self::extract_convert_value(value);
58        let scale = self.estimate_scale(value);
59
60        let between = Uniform::<f64>::from(-0.5..0.5);
61        let mut rng = thread_rng();
62        let mut sign = 1.0;
63        let unif = between.sample(&mut rng);
64        let diff = 0_f64.next_after(1_f64).max(1.0 - 2.0 * abs(unif));
65
66        if unif < 0.0 {
67            sign = -1.0;
68        }
69
70        LOC - (scale * sign * diff.ln())
71    }
72
73    /// return the estimated scale based on the history of previous
74    /// laplace noises
75    fn estimate_scale(&mut self, value: f64) -> f64 {
76        if value < self.min {
77            self.min = value
78        }
79        if value > self.max {
80            self.max = value
81        }
82
83        if self.history_window.len() > self.window {
84            self.history_window.pop_front();
85        }
86        self.history_window.push_back(value);
87        let predicted_sensitivity = self
88            .history_window
89            .iter()
90            .max_by(|a, b| a.partial_cmp(b).unwrap())
91            .unwrap()
92            - self
93                .history_window
94                .iter()
95                .min_by(|a, b| a.partial_cmp(b).unwrap())
96                .unwrap();
97
98        0.5 * self.qi_amount * predicted_sensitivity / (self.k as f64 * self.eps)
99    }
100}
101
102impl Default for NumericalNoiser {
103    fn default() -> Self {
104        Self {
105            eps: 0.0,
106            k: 0,
107            history_window: VecDeque::new(),
108            max: 0.0,
109            min: 0.0,
110            qi_amount: 0.0,
111            window: 0,
112        }
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use crate::noise::laplace::laplace_noiser::LOC;
119    use float_next_after::NextAfter;
120    use num::abs;
121    use rand::distributions::Uniform;
122    use rand::prelude::Distribution;
123    use rand::thread_rng;
124
125    const BUCKET_COUNT: usize = 100;
126    const SAMPLE_SIZE: usize = 50000;
127
128    fn truncate(index: i32) -> usize {
129        if index >= BUCKET_COUNT as i32 {
130            return BUCKET_COUNT - 1;
131        }
132
133        if index < 0 {
134            return 0_usize;
135        }
136
137        index as usize
138    }
139
140    fn x_to_index(x: f64, x0: f64, x1: f64) -> usize {
141        let index = ((x - x0) / (x1 - x0) * BUCKET_COUNT as f64).floor() as i32;
142        truncate(index)
143    }
144
145    #[test]
146    fn generate_noise() {
147        let scale = 1.0;
148        let mut buckets = [0; BUCKET_COUNT];
149
150        let x0 = -8.0;
151        let x1 = 8.0;
152
153        for _ in 0..SAMPLE_SIZE {
154            let between = Uniform::<f64>::from(-0.5..0.5);
155            let mut rng = thread_rng();
156            let mut sign = 1.0;
157            let unif = between.sample(&mut rng);
158            let diff = 0_f64.next_after(1_f64).max(1.0 - 2.0 * abs(unif));
159
160            if unif < 0.0 {
161                sign = -1.0;
162            }
163
164            let x = LOC - (scale * sign * diff.ln());
165            let index = x_to_index(x, x0, x1);
166            buckets[index] += 1;
167        }
168
169        let x_test = -1.0;
170        let index_test = x_to_index(x_test, x0, x1);
171        let fraction_below_x_test =
172            buckets[0..index_test].iter().sum::<i32>() as f64 / SAMPLE_SIZE as f64;
173
174        assert!((0.16..=0.17).contains(&fraction_below_x_test));
175    }
176}