diff-priv 0.1.0

k-anonymity, (c,l)-diversity and ε-differential privacy framework
Documentation
use std::time::{SystemTime, UNIX_EPOCH};

use bimap::BiMap;
use uuid::Uuid;

use crate::data_manipulation::anonymizable::{
    Anonymizable, QuasiIdentifierType, QuasiIdentifierTypes, SensitiveAttribute,
};

lazy_static! {
    static ref EDU_BIMAP: BiMap<&'static str, i32> = {
        BiMap::from_iter(vec![
            (" Preschool", 0),
            (" Bachelors", 1),
            (" Some-college", 2),
            (" 11th", 3),
            (" HS-grad", 4),
            (" Prof-school", 5),
            (" Assoc-acdm", 6),
            (" Assoc-voc", 7),
            (" 9th", 8),
            (" 7th-8th", 9),
            (" 12th", 10),
            (" Masters", 11),
            (" 1st-4th", 12),
            (" 10th", 13),
            (" Doctorate", 14),
            (" 5th-6th", 15),
        ])
    };
    static ref MAR_BIMAP: BiMap<&'static str, i32> = {
        BiMap::from_iter(vec![
            (" Married-AF-spouse", 0),
            (" Married-civ-spouse", 1),
            (" Divorced", 2),
            (" Never-married", 3),
            (" Separated", 4),
            (" Widowed", 5),
            (" Married-spouse-absent", 6),
        ])
    };
    static ref WORK_BIMAP: BiMap<&'static str, i32> = {
        BiMap::from_iter(vec![
            (" Never-worked", 0),
            (" Private", 1),
            (" Self-emp-not-inc", 2),
            (" Self-emp-inc", 3),
            (" Federal-gov", 4),
            (" Local-gov", 5),
            (" State-gov", 6),
            (" Without-pay", 7),
        ])
    };
    static ref NAT_BIMAP: BiMap<&'static str, i32> = {
        BiMap::from_iter(vec![
            (" Holand-Netherlands", 0),
            (" United-States", 1),
            (" Cambodia", 2),
            (" England", 3),
            (" Puerto-Rico", 4),
            (" Canada", 5),
            (" Germany", 6),
            (" Outlying-US(Guam-USVI-etc)", 7),
            (" India", 8),
            (" Japan", 9),
            (" Greece", 10),
            (" South", 11),
            (" China", 12),
            (" Cuba", 13),
            (" Iran", 14),
            (" Honduras", 15),
            (" Philippines", 16),
            (" Italy", 17),
            (" Poland", 18),
            (" Jamaica", 19),
            (" Vietnam", 20),
            (" Mexico", 21),
            (" Portugal", 22),
            (" Ireland", 23),
            (" France", 24),
            (" Dominican-Republic", 25),
            (" Laos", 26),
            (" Ecuador", 27),
            (" Taiwan", 28),
            (" Haiti", 29),
            (" Columbia", 30),
            (" Hungary", 31),
            (" Guatemala", 32),
            (" Nicaragua", 33),
            (" Scotland", 34),
            (" Thailand", 35),
            (" Yugoslavia", 36),
            (" El-Salvador", 37),
            (" Trinadad&Tobago", 38),
            (" Peru", 39),
            (" Hong", 40),
        ])
    };
    static ref OCC_BIMAP: BiMap<&'static str, i32> = {
        BiMap::from_iter(vec![
            (" Armed-Forces", 0),
            (" Tech-support", 1),
            (" Craft-repair", 2),
            (" Other-service", 3),
            (" Sales", 4),
            (" Exec-managerial", 5),
            (" Prof-specialty", 6),
            (" Handlers-cleaners", 7),
            (" Machine-op-inspct", 8),
            (" Adm-clerical", 9),
            (" Farming-fishing", 10),
            (" Transport-moving", 11),
            (" Priv-house-serv", 12),
            (" Protective-serv", 13),
        ])
    };
}

#[derive(Debug, Serialize, Clone, Deserialize)]
pub struct AdultLarge {
    timestamp: i32,
    age: i32,
    fnlwgt: i32,
    education_num: i32,
    capital_gain: i32,
    capital_loss: i32,
    hours_per_week: i32,
    education: String,
    marital_status: String,
    workclass: String,
    native_country: String,
    occupation: String,
    class: String,
    #[serde(skip_deserializing, default = "default_time")]
    time_generated: SystemTime,
}

fn default_time() -> SystemTime {
    SystemTime::now()
}

impl Default for AdultLarge {
    fn default() -> Self {
        Self {
            timestamp: 0,
            age: 0,
            fnlwgt: 0,
            education_num: 0,
            capital_gain: 0,
            capital_loss: 0,
            hours_per_week: 0,
            education: "".to_string(),
            marital_status: "".to_string(),
            workclass: "".to_string(),
            native_country: "".to_string(),
            occupation: "".to_string(),
            class: "".to_string(),
            time_generated: SystemTime::now(),
        }
    }
}

impl AdultLarge {
    fn get_age_qi(&self) -> QuasiIdentifierTypes {
        QuasiIdentifierTypes::Interval((
            QuasiIdentifierType::Integer(self.age),
            QuasiIdentifierType::Integer(1),
            QuasiIdentifierType::Integer(100),
            1,
        ))
    }

    fn get_fnlwgt_qi(&self) -> QuasiIdentifierTypes {
        QuasiIdentifierTypes::Interval((
            QuasiIdentifierType::Integer(self.fnlwgt),
            QuasiIdentifierType::Integer(1),
            QuasiIdentifierType::Integer(1500000),
            1,
        ))
    }

    fn get_education_num_qi(&self) -> QuasiIdentifierTypes {
        QuasiIdentifierTypes::Interval((
            QuasiIdentifierType::Integer(self.education_num),
            QuasiIdentifierType::Integer(1),
            QuasiIdentifierType::Integer(20),
            1,
        ))
    }

    fn get_capital_gain_qi(&self) -> QuasiIdentifierTypes {
        QuasiIdentifierTypes::Interval((
            QuasiIdentifierType::Integer(self.capital_gain),
            QuasiIdentifierType::Integer(0),
            QuasiIdentifierType::Integer(100000),
            1,
        ))
    }

    fn get_capital_loss_qi(&self) -> QuasiIdentifierTypes {
        QuasiIdentifierTypes::Interval((
            QuasiIdentifierType::Integer(self.capital_loss),
            QuasiIdentifierType::Integer(0),
            QuasiIdentifierType::Integer(5000),
            1,
        ))
    }

    fn get_hours_per_week_qi(&self) -> QuasiIdentifierTypes {
        QuasiIdentifierTypes::Interval((
            QuasiIdentifierType::Integer(self.hours_per_week),
            QuasiIdentifierType::Integer(1),
            QuasiIdentifierType::Integer(100),
            1,
        ))
    }

    fn get_education_qi(&self) -> QuasiIdentifierTypes {
        let rank = match EDU_BIMAP.get_by_left(self.education.as_str()) {
            None => {
                panic!("Wrong education status found: {}", self.education)
            }
            Some(education) => *education,
        };

        QuasiIdentifierTypes::Nominal((rank, 15, 1))
    }

    fn get_marital_status_qi(&self) -> QuasiIdentifierTypes {
        let rank = match MAR_BIMAP.get_by_left(self.marital_status.as_str()) {
            None => panic!("Wrong marital status found: {}", self.marital_status),
            Some(marital) => *marital,
        };

        QuasiIdentifierTypes::Nominal((rank, 6, 1))
    }

    fn get_workclass_qi(&self) -> QuasiIdentifierTypes {
        let rank = match WORK_BIMAP.get_by_left(self.workclass.as_str()) {
            None => panic!("Wrong workclass status found: {}", self.workclass),
            Some(workclass) => *workclass,
        };

        QuasiIdentifierTypes::Nominal((rank, 7, 1))
    }

    fn get_native_country_qi(&self) -> QuasiIdentifierTypes {
        let rank = match NAT_BIMAP.get_by_left(self.native_country.as_str()) {
            None => panic!("Wrong native country found: {}", self.native_country),
            Some(native) => *native,
        };

        QuasiIdentifierTypes::Nominal((rank, 40, 1))
    }

    fn get_occupation_qi(&self) -> QuasiIdentifierTypes {
        let rank = match OCC_BIMAP.get_by_left(self.occupation.as_str()) {
            None => panic!("Wrong marital status found: {}", self.occupation),
            Some(occupation) => *occupation,
        };

        QuasiIdentifierTypes::Nominal((rank, 13, 1))
    }
}

impl Anonymizable for AdultLarge {
    fn quasi_identifiers(&self) -> Vec<QuasiIdentifierTypes> {
        let age = self.get_age_qi();
        let fnlwgt = self.get_fnlwgt_qi();
        let education_num = self.get_education_num_qi();
        let capital_gain = self.get_capital_gain_qi();
        let capital_loss = self.get_capital_loss_qi();
        let hours_per_week = self.get_hours_per_week_qi();
        let education = self.get_education_qi();
        let marital_status = self.get_marital_status_qi();
        let workclass = self.get_workclass_qi();
        let native_country = self.get_native_country_qi();
        let occupation = self.get_occupation_qi();

        vec![
            age,
            fnlwgt,
            education_num,
            capital_gain,
            capital_loss,
            hours_per_week,
            education,
            marital_status,
            workclass,
            native_country,
            occupation,
        ]
    }

    fn update_quasi_identifiers(&self, mut qi: Vec<QuasiIdentifierTypes>) -> Self {
        if let (
            QuasiIdentifierType::Integer(occupation),
            QuasiIdentifierType::Integer(native_country),
            QuasiIdentifierType::Integer(workclass),
            QuasiIdentifierType::Integer(marital_status),
            QuasiIdentifierType::Integer(education),
            QuasiIdentifierType::Integer(hours_per_week),
            QuasiIdentifierType::Integer(capital_loss),
            QuasiIdentifierType::Integer(capital_gain),
            QuasiIdentifierType::Integer(education_num),
            QuasiIdentifierType::Integer(fnlwgt),
            QuasiIdentifierType::Integer(age),
        ) = (
            qi.pop().unwrap().extract_value(),
            qi.pop().unwrap().extract_value(),
            qi.pop().unwrap().extract_value(),
            qi.pop().unwrap().extract_value(),
            qi.pop().unwrap().extract_value(),
            qi.pop().unwrap().extract_value(),
            qi.pop().unwrap().extract_value(),
            qi.pop().unwrap().extract_value(),
            qi.pop().unwrap().extract_value(),
            qi.pop().unwrap().extract_value(),
            qi.pop().unwrap().extract_value(),
        ) {
            Self {
                timestamp: self.timestamp,
                age,
                fnlwgt,
                education_num,
                capital_gain,
                capital_loss,
                hours_per_week,
                education: EDU_BIMAP.get_by_right(&education).unwrap().to_string(),
                marital_status: MAR_BIMAP.get_by_right(&marital_status).unwrap().to_string(),
                workclass: WORK_BIMAP.get_by_right(&workclass).unwrap().to_string(),
                native_country: NAT_BIMAP.get_by_right(&native_country).unwrap().to_string(),
                occupation: OCC_BIMAP.get_by_right(&occupation).unwrap().to_string(),
                class: self.class.to_owned(),
                time_generated: self.time_generated,
            }
        } else {
            panic!("Couldn't Adult with QI's")
        }
    }

    fn sensitive_value(&self) -> SensitiveAttribute {
        SensitiveAttribute::String(self.class.to_owned())
    }

    fn extract_string_values(&self, uuid: Uuid, dr: f64) -> Vec<String> {
        vec![
            uuid.to_string(),
            dr.to_string(),
            self.timestamp.to_string(),
            self.age.to_string(),
            self.fnlwgt.to_string(),
            self.education_num.to_string(),
            self.capital_gain.to_string(),
            self.capital_loss.to_string(),
            self.hours_per_week.to_string(),
            self.education.to_owned(),
            self.marital_status.to_owned(),
            self.workclass.to_owned(),
            self.native_country.to_owned(),
            self.occupation.to_owned(),
            self.class.to_owned(),
        ]
    }

    fn get_timestamp(&self) -> SystemTime {
        self.time_generated
    }
}