use crate::data_manipulation::anonymizable::{
IntervalType, NominalType, OrdinalType, QuasiIdentifierType, QuasiIdentifierTypes,
};
use itertools::Itertools;
pub enum AggregateType {
Mean(Vec<QuasiIdentifierTypes>),
Mode(Vec<QuasiIdentifierTypes>),
}
impl AggregateType {
pub fn aggregate(self) -> QuasiIdentifierTypes {
match self {
AggregateType::Mean(mut list) => match list.pop().unwrap() {
QuasiIdentifierTypes::Interval(interval) => {
Self::aggregate_interval(interval, list)
}
_ => panic!("Wrong QI type found during aggregation for Mean"),
},
AggregateType::Mode(mut list) => {
match list.pop().unwrap() {
QuasiIdentifierTypes::Ordinal(ordinal) => {
Self::aggregate_ordinal(ordinal, list)
}
QuasiIdentifierTypes::Nominal(nominal) => {
Self::aggregate_nominal(nominal, list)
}
_ => panic!("Wrong QI type for calculating mode"),
}
}
}
}
fn aggregate_interval(
interval: IntervalType,
list: Vec<QuasiIdentifierTypes>,
) -> QuasiIdentifierTypes {
let size = list.len() + 1;
match interval {
(
QuasiIdentifierType::Float(value),
QuasiIdentifierType::Float(min),
QuasiIdentifierType::Float(max),
weight,
) => {
let sum: f64 = list
.into_iter()
.map(|x| match x.extract_value() {
QuasiIdentifierType::Float(temp) => temp,
_ => panic!("Wrong type found for Mean aggregation"),
})
.sum();
QuasiIdentifierTypes::Interval((
QuasiIdentifierType::Float((value + sum) / size as f64),
QuasiIdentifierType::Float(min),
QuasiIdentifierType::Float(max),
weight,
))
}
(
QuasiIdentifierType::Integer(value),
QuasiIdentifierType::Integer(min),
QuasiIdentifierType::Integer(max),
weight,
) => {
let sum: i32 = list
.into_iter()
.map(|x| match x.extract_value() {
QuasiIdentifierType::Integer(temp) => temp,
_ => panic!("Wrong type found for Mean aggregation"),
})
.sum();
QuasiIdentifierTypes::Interval((
QuasiIdentifierType::Integer((value + sum) / size as i32),
QuasiIdentifierType::Integer(min),
QuasiIdentifierType::Integer(max),
weight,
))
}
_ => panic!("Wrong interval type set found during aggregation"),
}
}
fn aggregate_ordinal(
ordinal: OrdinalType,
list: Vec<QuasiIdentifierTypes>,
) -> QuasiIdentifierTypes {
let (rank, max_rank, weight) = ordinal;
let mut mode_list = Vec::new();
list.into_iter().for_each(|x| match x {
QuasiIdentifierTypes::Ordinal((temp, _, _)) => mode_list.push(temp),
_ => panic!("Wrong QI type"),
});
mode_list.push(rank);
let mode = Self::get_mode(mode_list);
QuasiIdentifierTypes::Ordinal((mode, max_rank, weight))
}
fn aggregate_nominal(
nominal: NominalType,
list: Vec<QuasiIdentifierTypes>,
) -> QuasiIdentifierTypes {
let (value, max_value, weight) = nominal;
let mut mode_list = Vec::new();
list.into_iter().for_each(|x| match x {
QuasiIdentifierTypes::Nominal((temp, _, _)) => mode_list.push(temp),
_ => panic!("Wrong QI type"),
});
mode_list.push(value);
let mode = Self::get_mode(mode_list);
QuasiIdentifierTypes::Nominal((mode, max_value, weight))
}
fn get_mode(mode_list: Vec<i32>) -> i32 {
let mut mode_grouped: Vec<(i32, Vec<i32>)> = Vec::new();
for (key, group) in &mode_list.into_iter().sorted().group_by(|&x| x) {
mode_grouped.push((key, group.collect()))
}
let (mode, _) = mode_grouped
.into_iter()
.map(|(key, group)| (key, group.len()))
.max_by_key(|(_, group)| *group)
.unwrap();
mode
}
}
pub fn truncate_to_domain<T: PartialOrd>(value: T, min: T, max: T) -> T {
match value {
x if x <= min => min,
x if x >= max => max,
_ => value,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn truncating_to_domain_integer() {
let min = 0;
let value = 5;
let max = 10;
assert_eq!(truncate_to_domain(value, min, max), 5)
}
#[test]
fn truncate_to_domain_integer_max() {
let min = 0;
let value = 11;
let max = 10;
assert_eq!(truncate_to_domain(value, min, max), 10)
}
#[test]
fn truncate_to_domain_float() {
let min = 0.0;
let value = 5.0;
let max = 10.0;
assert!(truncate_to_domain(value, min, max) - 5.0 <= f64::EPSILON)
}
#[test]
fn truncate_to_domain_float_max() {
let min = 0.0;
let value = 10.0;
let max = 5.0;
assert!(truncate_to_domain(value, min, max) - 5.0 <= f64::EPSILON)
}
}