use std::collections::HashMap;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
pub const FINGERPRINT_VERSION: &str = "1.0.0";
pub const FINGERPRINT_FORMAT: &str = "dsf";
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Manifest {
pub version: String,
pub format: String,
pub created_at: DateTime<Utc>,
pub source: SourceMetadata,
pub privacy: PrivacyMetadata,
pub checksums: HashMap<String, String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub signature: Option<SignatureMetadata>,
}
impl Manifest {
pub fn new(source: SourceMetadata, privacy: PrivacyMetadata) -> Self {
Self {
version: FINGERPRINT_VERSION.to_string(),
format: FINGERPRINT_FORMAT.to_string(),
created_at: Utc::now(),
source,
privacy,
checksums: HashMap::new(),
signature: None,
}
}
pub fn add_checksum(&mut self, file: impl Into<String>, checksum: impl Into<String>) {
self.checksums.insert(file.into(), checksum.into());
}
pub fn verify_checksums(&self) -> bool {
let required = ["schema.yaml", "statistics.yaml"];
required.iter().all(|f| self.checksums.contains_key(*f))
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SourceMetadata {
pub description: String,
pub table_count: usize,
pub total_rows: u64,
pub tables: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub date_range: Option<DateRange>,
#[serde(skip_serializing_if = "Option::is_none")]
pub industry: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub country_code: Option<String>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub metadata: HashMap<String, String>,
}
impl SourceMetadata {
pub fn new(description: impl Into<String>, tables: Vec<String>, total_rows: u64) -> Self {
Self {
description: description.into(),
table_count: tables.len(),
total_rows,
tables,
date_range: None,
industry: None,
country_code: None,
metadata: HashMap::new(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DateRange {
pub start: String,
pub end: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PrivacyMetadata {
pub level: PrivacyLevel,
pub epsilon: f64,
pub k_anonymity: u32,
pub outlier_percentile: f64,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub suppressed_fields: Vec<String>,
pub min_occurrence: u32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub delta: Option<f64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub composition_method: Option<String>,
}
impl PrivacyMetadata {
pub fn from_level(level: PrivacyLevel) -> Self {
let (epsilon, k, outlier_percentile, min_occurrence) = match level {
PrivacyLevel::Minimal => (5.0, 3, 99.0, 3),
PrivacyLevel::Standard | PrivacyLevel::Custom => (1.0, 5, 95.0, 5),
PrivacyLevel::High => (0.5, 10, 90.0, 10),
PrivacyLevel::Maximum => (0.1, 20, 85.0, 20),
};
Self {
level,
epsilon,
k_anonymity: k,
outlier_percentile,
suppressed_fields: Vec::new(),
min_occurrence,
delta: None,
composition_method: None,
}
}
pub fn custom(epsilon: f64, k_anonymity: u32) -> Self {
Self {
level: PrivacyLevel::Standard,
epsilon,
k_anonymity,
outlier_percentile: 95.0,
suppressed_fields: Vec::new(),
min_occurrence: k_anonymity,
delta: None,
composition_method: None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum PrivacyLevel {
Minimal,
#[default]
Standard,
High,
Maximum,
Custom,
}
impl PrivacyLevel {
pub fn epsilon(&self) -> f64 {
match self {
Self::Minimal => 5.0,
Self::Standard => 1.0,
Self::High => 0.5,
Self::Maximum => 0.1,
Self::Custom => 1.0,
}
}
pub fn k_anonymity(&self) -> u32 {
match self {
Self::Minimal => 3,
Self::Standard => 5,
Self::High => 10,
Self::Maximum => 20,
Self::Custom => 5,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SignatureMetadata {
pub algorithm: String,
pub key_id: String,
pub signature: String,
pub signed_at: DateTime<Utc>,
}