1use crate::data_manipulation::aggregation::truncate_to_domain;
2use num::abs;
3use rand::distributions::{Distribution, Uniform};
4use rand::thread_rng;
5use rand_distr::Normal;
6use serde::Serialize;
7use std::time::SystemTime;
8use uuid::Uuid;
9
10#[derive(Hash, Eq, PartialEq)]
11pub enum SensitiveAttribute {
12 String(String),
13 Integer(i32),
14}
15
16pub type IntervalType = (
18 QuasiIdentifierType,
19 QuasiIdentifierType,
20 QuasiIdentifierType,
21 usize,
22);
23
24pub type OrdinalType = (i32, i32, usize);
26
27pub type NominalType = (i32, i32, usize);
29
30#[derive(Debug, Copy, Clone)]
31pub enum QuasiIdentifierType {
32 Float(f64),
33 Integer(i32),
34}
35
36#[derive(Debug)]
38pub enum QuasiIdentifierTypes {
39 Interval(IntervalType),
41 Ordinal(OrdinalType),
43 Nominal(NominalType),
45}
46
47impl QuasiIdentifierTypes {
48 pub fn extract_value(self) -> QuasiIdentifierType {
50 match self {
51 QuasiIdentifierTypes::Interval((value, _, _, _)) => value,
52 QuasiIdentifierTypes::Ordinal((value, _, _)) => QuasiIdentifierType::Integer(value),
53 QuasiIdentifierTypes::Nominal((value, _, _)) => QuasiIdentifierType::Integer(value),
54 }
55 }
56
57 pub fn randomize(self) -> QuasiIdentifierTypes {
58 let mut rng = thread_rng();
59 match self {
60 QuasiIdentifierTypes::Interval((value, min, max, weight)) => match (value, min, max) {
61 (
62 QuasiIdentifierType::Float(val_fl),
63 QuasiIdentifierType::Float(min_val),
64 QuasiIdentifierType::Float(max_val),
65 ) => {
66 let normal: Normal<f64> = Normal::new(val_fl, 1.0).unwrap();
67 let e = normal.sample(&mut rng);
68 QuasiIdentifierTypes::Interval((
69 QuasiIdentifierType::Float(truncate_to_domain(e, min_val, max_val)),
70 QuasiIdentifierType::Float(min_val),
71 QuasiIdentifierType::Float(max_val),
72 weight,
73 ))
74 }
75 (
76 QuasiIdentifierType::Integer(val_int),
77 QuasiIdentifierType::Integer(min_val),
78 QuasiIdentifierType::Integer(max_val),
79 ) => {
80 let normal: Normal<f64> = Normal::new(val_int as f64, 1.0).unwrap();
81 let e = normal.sample(&mut rng);
82 QuasiIdentifierTypes::Interval((
83 QuasiIdentifierType::Integer(truncate_to_domain(
84 e as i32, min_val, max_val,
85 )),
86 QuasiIdentifierType::Integer(min_val),
87 QuasiIdentifierType::Integer(max_val),
88 weight,
89 ))
90 }
91 _ => panic!("Wrong combination of type found in randomization of interval"),
92 },
93 QuasiIdentifierTypes::Ordinal((_, max_rank, weight)) => {
94 let between = Uniform::<i32>::from(0..max_rank + 1);
95 let random_ordinal_qi = between.sample(&mut rng);
96 QuasiIdentifierTypes::Ordinal((random_ordinal_qi as i32, max_rank, weight))
97 }
98 QuasiIdentifierTypes::Nominal((_, max_value, weight)) => {
99 let between = Uniform::<i32>::from(0..max_value + 1);
100 let random_nominal_qi = between.sample(&mut rng);
101 QuasiIdentifierTypes::Nominal((random_nominal_qi, max_value, weight))
102 }
103 }
104 }
105}
106
107pub trait Anonymizable: Default + Clone + Serialize + Sync {
110 fn calculate_difference(&self, other: &Self) -> f64 {
112 let mut sum_weight: usize = 0;
113 let diff: f64 = self
114 .quasi_identifiers()
115 .into_iter()
116 .zip(other.quasi_identifiers().into_iter())
117 .map(|(x, y)| match (x, y) {
118 (
119 QuasiIdentifierTypes::Interval(interval_x),
120 QuasiIdentifierTypes::Interval(interval_y),
121 ) => {
122 let (_, _, _, weight) = interval_x;
123 sum_weight += weight;
124 Self::calculate_interval_distance(interval_x, interval_y)
125 }
126 (
127 QuasiIdentifierTypes::Ordinal(ordinal_x),
128 QuasiIdentifierTypes::Ordinal(ordinal_y),
129 ) => {
130 let (_, _, weight) = ordinal_x;
131 sum_weight += weight;
132 Self::calculate_ordinal_distance(ordinal_x, ordinal_y)
133 }
134 (
135 QuasiIdentifierTypes::Nominal(nominal_x),
136 QuasiIdentifierTypes::Nominal(nominal_y),
137 ) => {
138 let (_, _, weight) = nominal_x;
139 sum_weight += weight;
140 Self::calculate_nominal_distance(nominal_x, nominal_y)
141 }
142 _ => {
143 panic!("wrong types provided")
144 }
145 })
146 .sum();
147
148 diff / sum_weight as f64
149 }
150
151 fn calculate_info_loss(&self, other: &Self) -> f64 {
154 let mut distance = 0.0;
155 let self_qi = self.quasi_identifiers();
156 let other_qi = other.quasi_identifiers();
157
158 self_qi
159 .into_iter()
160 .zip(other_qi.into_iter())
161 .for_each(|(x, y)| match (x.extract_value(), y.extract_value()) {
162 (QuasiIdentifierType::Integer(value1), QuasiIdentifierType::Integer(value2)) => {
163 distance += (value1 as f64 - value2 as f64).powi(2)
164 }
165 (QuasiIdentifierType::Float(value1), QuasiIdentifierType::Float(value2)) => {
166 distance += (value1 - value2).powi(2)
167 }
168 _ => {
169 panic!("Incompatible values have been found")
170 }
171 });
172
173 distance.sqrt()
174 }
175
176 fn quasi_identifiers(&self) -> Vec<QuasiIdentifierTypes>;
178
179 fn update_quasi_identifiers(&self, qi: Vec<QuasiIdentifierTypes>) -> Self;
183
184 fn sensitive_value(&self) -> SensitiveAttribute;
186
187 fn extract_string_values(&self, uuid: Uuid, dr: f64) -> Vec<String>;
189
190 fn get_timestamp(&self) -> SystemTime;
192
193 fn suppress(&self) -> Self {
195 let suppressed_qi = self
196 .quasi_identifiers()
197 .into_iter()
198 .map(|x| x.randomize())
199 .collect();
200
201 self.update_quasi_identifiers(suppressed_qi)
202 }
203
204 fn calculate_ordinal_distance(ordinal_x: OrdinalType, ordinal_y: OrdinalType) -> f64 {
207 let (rank1, max_rank, weight) = ordinal_x;
208 let (rank2, _, _) = ordinal_y;
209
210 let x = (rank1 as f64 - 1.0) / (max_rank as f64 - 1.0);
211 let y = (rank2 as f64 - 1.0) / (max_rank as f64 - 1.0);
212
213 (weight as f64)
214 * Self::calculate_interval_distance(
215 (
216 QuasiIdentifierType::Float(x),
217 QuasiIdentifierType::Float(1.0),
218 QuasiIdentifierType::Float(max_rank as f64),
219 weight,
220 ),
221 (
222 QuasiIdentifierType::Float(y),
223 QuasiIdentifierType::Float(1.0),
224 QuasiIdentifierType::Float(max_rank as f64),
225 weight,
226 ),
227 )
228 }
229
230 fn calculate_interval_distance(interval_x: IntervalType, interval_y: IntervalType) -> f64 {
232 let (num1, min, max, weight) = interval_x;
233 let (num2, _, _, _) = interval_y;
234
235 match (num1, min, max, num2) {
236 (
237 QuasiIdentifierType::Float(x),
238 QuasiIdentifierType::Float(min),
239 QuasiIdentifierType::Float(max),
240 QuasiIdentifierType::Float(y),
241 ) => weight as f64 * abs(x - y) / (max - min),
242 (
243 QuasiIdentifierType::Integer(x),
244 QuasiIdentifierType::Integer(min),
245 QuasiIdentifierType::Integer(max),
246 QuasiIdentifierType::Integer(y),
247 ) => weight as f64 * abs(x as f64 - y as f64) / (max as f64 - min as f64),
248 _ => {
249 panic!("wrong type conversion")
250 }
251 }
252 }
253
254 fn calculate_nominal_distance(nominal_x: NominalType, nominal_y: NominalType) -> f64 {
256 let (x, _, weight) = nominal_x;
257 let (y, _, _) = nominal_y;
258
259 match x == y {
260 true => 0.0,
261 false => weight as f64,
262 }
263 }
264}
265
266#[cfg(test)]
267mod tests {
268 use crate::data_manipulation::aggregation::AggregateType;
269 use crate::data_manipulation::anonymizable::Anonymizable;
270 use crate::data_manipulation::anonymizable::QuasiIdentifierType::{Float, Integer};
271 use crate::data_manipulation::anonymizable::QuasiIdentifierTypes::{
272 Interval, Nominal, Ordinal,
273 };
274 use crate::data_manipulation::mueller::MuellerStream;
275
276 #[test]
277 fn get_quasi_identifiers() {
278 let mueller = MuellerStream {
279 age: Some(32),
280 gender: Some("male".to_string()),
281 ..MuellerStream::default()
282 };
283
284 let mut quasi_identifiers = mueller.quasi_identifiers();
285
286 match quasi_identifiers.remove(0) {
287 Interval((Integer(32), Integer(33), Integer(85), 1)) => {}
288 _ => {
289 panic!()
290 }
291 }
292
293 match quasi_identifiers.remove(0) {
294 Nominal((0, 1, 1)) => {}
295 _ => {
296 panic!()
297 }
298 }
299 }
300
301 #[test]
302 fn update_quasi_identifiers() {
303 let mueller = MuellerStream {
304 age: Some(32),
305 gender: Some("male".to_string()),
306 ..MuellerStream::default()
307 };
308
309 let centroid = MuellerStream {
310 age: Some(50),
311 gender: Some("female".to_string()),
312 ..MuellerStream::default()
313 };
314
315 let anonymized = mueller.update_quasi_identifiers(centroid.quasi_identifiers());
316
317 assert_eq!(anonymized.age, Some(50));
318 assert_eq!(anonymized.gender, Some("female".to_string()))
319 }
320
321 #[test]
322 fn calculate_difference() {
323 let mueller = MuellerStream {
324 age: Some(37),
325 gender: Some("male".to_string()),
326 ..MuellerStream::default()
327 };
328
329 let centroid = MuellerStream {
330 age: Some(50),
331 gender: Some("female".to_string()),
332 ..MuellerStream::default()
333 };
334
335 let difference = mueller.calculate_difference(¢roid);
336
337 assert_eq!(difference, 0.625)
338 }
339
340 #[test]
341 fn calculate_difference_zero() {
342 let mueller = MuellerStream {
343 age: Some(37),
344 gender: Some("male".to_string()),
345 ..MuellerStream::default()
346 };
347
348 let centroid = MuellerStream {
349 age: Some(37),
350 gender: Some("male".to_string()),
351 ..MuellerStream::default()
352 };
353
354 let difference = mueller.calculate_difference(¢roid);
355
356 assert_eq!(difference, 0.0)
357 }
358
359 #[test]
360 fn calculate_difference_one() {
361 let mueller = MuellerStream {
362 age: Some(33),
363 gender: Some("male".to_string()),
364 ..MuellerStream::default()
365 };
366
367 let centroid = MuellerStream {
368 age: Some(85),
369 gender: Some("female".to_string()),
370 ..MuellerStream::default()
371 };
372
373 let difference = mueller.calculate_difference(¢roid);
374
375 assert_eq!(difference, 1.0)
376 }
377
378 #[test]
379 fn calculate_info_loss() {
380 let mueller = MuellerStream {
381 age: Some(33),
382 gender: Some("male".to_string()),
383 ..MuellerStream::default()
384 };
385
386 let centroid = MuellerStream {
387 age: Some(50),
388 gender: Some("female".to_string()),
389 ..MuellerStream::default()
390 };
391
392 let info_loss = mueller.calculate_info_loss(¢roid);
393 assert!((info_loss - 17.29) <= f64::EPSILON)
394 }
395
396 #[test]
397 fn aggregation_interval_integer() {
398 let agg1 = Interval((Integer(1), Integer(0), Integer(10), 1));
399 let agg2 = Interval((Integer(4), Integer(0), Integer(10), 1));
400 let agg3 = Interval((Integer(6), Integer(0), Integer(10), 1));
401 let agg4 = Interval((Integer(10), Integer(0), Integer(10), 1));
402
403 let aggregation = AggregateType::Mean(vec![agg1, agg2, agg3, agg4]).aggregate();
404
405 if let Integer(value) = aggregation.extract_value() {
406 assert_eq!(value, 5)
407 } else {
408 panic!()
409 }
410 }
411
412 #[test]
413 fn aggregation_interval_float() {
414 let agg1 = Interval((Float(1.0), Float(0.0), Float(10.0), 1));
415 let agg2 = Interval((Float(4.0), Float(0.0), Float(10.0), 1));
416 let agg3 = Interval((Float(6.0), Float(0.0), Float(10.0), 1));
417 let agg4 = Interval((Float(10.0), Float(0.0), Float(10.0), 1));
418
419 let aggregation = AggregateType::Mean(vec![agg1, agg2, agg3, agg4]).aggregate();
420
421 if let Float(value) = aggregation.extract_value() {
422 assert_eq!(value, 5.25)
423 } else {
424 panic!()
425 }
426 }
427
428 #[test]
429 fn aggregation_ordinal() {
430 let agg1 = Ordinal((1, 10, 1));
431 let agg2 = Ordinal((1, 10, 1));
432 let agg3 = Ordinal((2, 10, 1));
433 let agg4 = Ordinal((4, 10, 1));
434
435 let aggregation = AggregateType::Mode(vec![agg1, agg2, agg3, agg4]).aggregate();
436
437 if let Integer(value) = aggregation.extract_value() {
438 assert_eq!(value, 1)
439 } else {
440 panic!()
441 }
442 }
443
444 #[test]
445 fn aggregation_nominal() {
446 let agg1 = Nominal((1, 4, 10));
447 let agg2 = Nominal((1, 4, 10));
448 let agg3 = Nominal((2, 4, 10));
449 let agg4 = Nominal((4, 4, 10));
450
451 let aggregation = AggregateType::Mode(vec![agg1, agg2, agg3, agg4]).aggregate();
452
453 if let Integer(value) = aggregation.extract_value() {
454 assert_eq!(value, 1)
455 } else {
456 panic!()
457 }
458 }
459}