diff_priv/lib.rs
1//! # DiffPriv
2//! DiffPriv is a differential privacy framework for real time data streaming written in Rust. Supporting k-anonymity,
3//! (c,l)-diversity and ε-differential privacy. The framework is based on the [Preserving Differential Privacy and Utility of Non-stationary Data Streams](https://ieeexplore.ieee.org/document/8637412) paper, with various improvements implemented.
4//!
5//! This library is the result of my master thesis: Differential privacy in large scale data streaming.
6//! It has been developer during an internship at [STRM Privacy](https://strmprivacy.io/)
7//!
8//! # Using the anonymizer
9//! An example of using the anonymizer can be seen below
10//! ```
11//! use csv::Reader;
12//! use diff_priv::anonymization::microagg_anonymizer::MicroaggAnonymizer;
13//! use diff_priv::noise::laplace::laplace_noiser::LaplaceNoiser;
14//! use diff_priv::test::adult::Adult;
15//! use diff_priv::test::dummy_publisher::DummyPublisher;
16//!
17//! // we initialize our noiser that implements the `Noiser` trait
18//! let noiser = LaplaceNoiser::new(0.1, 3, 0.1);
19//! // we initialize a publisher that implements the `Publisher` trait
20//! let publisher = DummyPublisher::default();
21//! // we create the anonymizer with the desired parameters
22//! // k: 2 | k_max: 10 | c: 2 | l: 7 | diff_thres: 0.1 | delta: 10 | buff_size: 5
23//! let mut anonymizer: MicroaggAnonymizer<LaplaceNoiser, Adult, DummyPublisher> =
24//! MicroaggAnonymizer::new(2, 10, 2, 7, 0.1, 10, 5, publisher, noiser);
25//!
26//! // load CSV file representing an Adult
27//! let mut file = Reader::from_path("datasets/Adult_1_numeric_only_class_50K.csv").unwrap();
28//! for line in file.deserialize() {
29//! let row_result = line;
30//! // when we call for `anonymizer()` the anonymizer will
31//! // automatically publish to the given backend when the given
32//! // privacy parameter conditions are met
33//! match row_result {
34//! Ok(row) => anonymizer.anonymize(row),
35//! Err(e) => panic!("{}", e)
36//! }
37//! }
38//!
39//! // publish remaining data tuples to the given publisher
40//! // in this case a `DummyPublisher`
41//! anonymizer
42//! .cluster_set
43//! .into_iter()
44//! .for_each(|(_, mut cluster)| {
45//! cluster.publish_all(&mut anonymizer.publisher, &mut anonymizer.analysers)
46//! });
47//! ```
48//! ## Implementing `Anonymizable` trait to anonymize new data
49//! By implementing the `Anonymizable` trait on any type of datastructure, DiffPriv will know how to anonymize it.
50//! The following QIs types are implemented
51//! ```rust
52//! # use diff_priv::data_manipulation::anonymizable::QuasiIdentifierType;
53//! /// value, min_value, max_value, weight of attribute
54//! pub type IntervalType = (
55//! QuasiIdentifierType,
56//! QuasiIdentifierType,
57//! QuasiIdentifierType,
58//! usize,
59//! );
60//!
61//! /// rank, max_rank, weight of attribute
62//! pub type OrdinalType = (i32, i32, usize);
63//!
64//! /// value, max value, weight of attribute
65//! pub type NominalType = (i32, i32, usize);
66//! ```
67//! An example implementation of the `Anonymizable` trait can be seen below
68//! ```
69//! use std::time::{SystemTime, UNIX_EPOCH};
70//! use serde::{Serialize, Deserialize};
71//! use bimap::BiMap;
72//! use lazy_static::lazy_static;
73//! use uuid::Uuid;
74//!
75//! use diff_priv::data_manipulation::anonymizable::{
76//! Anonymizable, QuasiIdentifierType, QuasiIdentifierTypes, SensitiveAttribute,
77//! };
78//!
79//! lazy_static! {
80//! static ref CLASS_BIMAP: BiMap<&'static str, i32> =
81//! BiMap::from_iter(vec![("<=50K", 0), (">50K", 1),]);
82//! }
83//!
84//! // This is the datastructure that we are going to anonymize
85//! #[derive(Debug, Serialize, Clone, Deserialize)]
86//! pub struct Adult {
87//! timestamp: i32,
88//! age: i32,
89//! capital_gain: i32,
90//! capital_loss: i32,
91//! class: String,
92//! #[serde(skip_deserializing, default = "default_time")]
93//! time_generated: SystemTime,
94//! }
95//!
96//! fn default_time() -> SystemTime {
97//! SystemTime::now()
98//! }
99//!
100//! impl Default for Adult {
101//! fn default() -> Self {
102//! Self {
103//! timestamp: 0,
104//! age: 0,
105//! capital_gain: 0,
106//! capital_loss: 0,
107//! class: "".to_string(),
108//! time_generated: SystemTime::now(),
109//! }
110//! }
111//! }
112//!
113//! impl Adult {
114//! // here we extract an Interval QI from the `age` attribute
115//! fn get_age_qi(&self) -> QuasiIdentifierTypes {
116//! QuasiIdentifierTypes::Interval((
117//! QuasiIdentifierType::Integer(self.age),
118//! QuasiIdentifierType::Integer(1),
119//! QuasiIdentifierType::Integer(100),
120//! 1,
121//! ))
122//! }
123//!
124//! // here we extract an Interval QI from the `capital_gain` attribute
125//! fn get_capital_gain_qi(&self) -> QuasiIdentifierTypes {
126//! QuasiIdentifierTypes::Interval((
127//! QuasiIdentifierType::Integer(self.capital_gain),
128//! QuasiIdentifierType::Integer(0),
129//! QuasiIdentifierType::Integer(100000),
130//! 1,
131//! ))
132//! }
133//!
134//! // here we extract an Interval QI from the `capital_loss` attribute
135//! fn get_capital_loss_qi(&self) -> QuasiIdentifierTypes {
136//! QuasiIdentifierTypes::Interval((
137//! QuasiIdentifierType::Integer(self.capital_loss),
138//! QuasiIdentifierType::Integer(0),
139//! QuasiIdentifierType::Integer(5000),
140//! 1,
141//! ))
142//! }
143//!
144//! }
145//!
146//! // Here we implement the `Anonymizable` trait
147//! impl Anonymizable for Adult {
148//! // We extract the QIs from the datastructure and return a `vec` of QIs
149//! fn quasi_identifiers(&self) -> Vec<QuasiIdentifierTypes> {
150//! let age = self.get_age_qi();
151//! let capital_gain = self.get_capital_gain_qi();
152//! let capital_loss = self.get_capital_loss_qi();
153//!
154//! vec![
155//! age,
156//! capital_gain,
157//! capital_loss,
158//! ]
159//! }
160//!
161//! // We update the datastructures QIs with a `vec` of QIs. The `vec` needs to be
162//! // popped in the same order that the QIs are extracted with the `quasi_identifiers`
163//! // function
164//! fn update_quasi_identifiers(&self, mut qi: Vec<QuasiIdentifierTypes>) -> Self {
165//! if let (
166//! QuasiIdentifierType::Integer(capital_loss),
167//! QuasiIdentifierType::Integer(capital_gain),
168//! QuasiIdentifierType::Integer(age),
169//! ) = (
170//! qi.pop().unwrap().extract_value(),
171//! qi.pop().unwrap().extract_value(),
172//! qi.pop().unwrap().extract_value(),
173//! ) {
174//! Self {
175//! timestamp: self.timestamp,
176//! age,
177//! capital_gain,
178//! capital_loss,
179//! class: self.class.to_owned(),
180//! time_generated: self.time_generated,
181//! }
182//! } else {
183//! panic!("Couldn't Adult with QI's")
184//! }
185//! }
186//!
187//! // We extract the sensative attribute from the datastructure
188//! fn sensitive_value(&self) -> SensitiveAttribute {
189//! SensitiveAttribute::String(self.class.to_owned())
190//! }
191//!
192//! // We return a vector of strings containing the String version of the QIs
193//! // Used for printing to CSVs
194//! fn extract_string_values(&self, uuid: Uuid, dr: f64) -> Vec<String> {
195//! vec![
196//! uuid.to_string(),
197//! dr.to_string(),
198//! self.timestamp.to_string(),
199//! self.age.to_string(),
200//! self.capital_gain.to_string(),
201//! self.capital_loss.to_string(),
202//! self.class.to_owned(),
203//! ]
204//! }
205//!
206//! fn get_timestamp(&self) -> SystemTime {
207//! self.time_generated
208//! }
209//! }
210//! ```
211//!
212//! # The `Publisher` trait
213//! To publish an anonymized struct to a desired backend we use the `Publisher` trait.
214//! DiffPriv also support exporting to an [Apache Kafka topic](publishing::kafka_publisher::KafkaPublisher). This can be seen in `publishing` directory.
215//! An example publisher for CSVs can be seen here: [CsvPublisher](publishing::csv_publisher::CsvPublisher).
216//! To implement a custom publishing backend one can use the [Publisher](publishing::publisher::Publisher) trait.
217//!
218//! # The `Noiser` trait
219//! DiffPriv support [Laplace noise](noise::laplace::laplace_noiser::LaplaceNoiser) for ε-differential privacy.
220//! The noiser supports 2 different kind of noise: one for [numerical values](noise::laplace::numerical_noiser::NumericalNoiser) and one for [categorical](noise::laplace::categorical_noiser::CategoricalNoiser).
221//! To implement a custom implementation of ε-differential privacy noise, one can use the [Noiser](noise::noiser::Noiser) trait.
222
223#[macro_use]
224extern crate serde;
225#[macro_use]
226extern crate log;
227#[macro_use]
228extern crate lazy_static;
229
230extern crate core;
231extern crate pretty_env_logger;
232
233pub mod analysis;
234pub mod anonymization;
235pub mod config;
236pub mod data_manipulation;
237pub mod kafka;
238pub mod noise;
239pub mod publishing;
240pub mod test;
241pub mod vec_set;