1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
//! Root fingerprint structure.
use serde::{Deserialize, Serialize};
use super::{
AnomalyFingerprint, BankingFingerprint, BehavioralPriors, CorrelationFingerprint,
IntegrityFingerprint, Manifest, PrivacyAudit, RulesFingerprint, SchemaFingerprint,
StatisticsFingerprint,
};
/// The root fingerprint structure containing all extracted components.
///
/// A fingerprint captures the statistical properties of a dataset without
/// storing any individual records, enabling privacy-preserving synthetic
/// data generation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Fingerprint {
/// Metadata about the fingerprint (version, source, privacy config).
pub manifest: Manifest,
/// Schema information (tables, columns, types, relationships).
pub schema: SchemaFingerprint,
/// Statistical distributions for numeric and categorical columns.
pub statistics: StatisticsFingerprint,
/// Correlation matrices and copulas for preserving relationships.
#[serde(skip_serializing_if = "Option::is_none")]
pub correlations: Option<CorrelationFingerprint>,
/// Referential integrity (foreign keys, cardinalities).
#[serde(skip_serializing_if = "Option::is_none")]
pub integrity: Option<IntegrityFingerprint>,
/// Business rules (balance constraints, approval thresholds).
#[serde(skip_serializing_if = "Option::is_none")]
pub rules: Option<RulesFingerprint>,
/// Anomaly patterns (rates, type distribution, temporal patterns).
#[serde(skip_serializing_if = "Option::is_none")]
pub anomalies: Option<AnomalyFingerprint>,
/// Banking/AML patterns (customer/account/typology distributions).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub banking: Option<BankingFingerprint>,
/// Behavioral priors mined from corpus GL data (SP2).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub behavioral: Option<BehavioralPriors>,
/// Privacy audit trail documenting all privacy decisions.
pub privacy_audit: PrivacyAudit,
}
impl Fingerprint {
/// Create a new fingerprint with required components.
pub fn new(
manifest: Manifest,
schema: SchemaFingerprint,
statistics: StatisticsFingerprint,
privacy_audit: PrivacyAudit,
) -> Self {
Self {
manifest,
schema,
statistics,
correlations: None,
integrity: None,
rules: None,
anomalies: None,
banking: None,
behavioral: None,
privacy_audit,
}
}
/// Add banking fingerprint.
pub fn with_banking(mut self, banking: BankingFingerprint) -> Self {
self.banking = Some(banking);
self
}
/// Check if the fingerprint has banking data.
pub fn has_banking(&self) -> bool {
self.banking.is_some()
}
/// Add correlation fingerprint.
pub fn with_correlations(mut self, correlations: CorrelationFingerprint) -> Self {
self.correlations = Some(correlations);
self
}
/// Add integrity fingerprint.
pub fn with_integrity(mut self, integrity: IntegrityFingerprint) -> Self {
self.integrity = Some(integrity);
self
}
/// Add rules fingerprint.
pub fn with_rules(mut self, rules: RulesFingerprint) -> Self {
self.rules = Some(rules);
self
}
/// Add anomaly fingerprint.
pub fn with_anomalies(mut self, anomalies: AnomalyFingerprint) -> Self {
self.anomalies = Some(anomalies);
self
}
/// Get the fingerprint version.
pub fn version(&self) -> &str {
&self.manifest.version
}
/// Check if the fingerprint has correlation data.
pub fn has_correlations(&self) -> bool {
self.correlations.is_some()
}
/// Check if the fingerprint has integrity constraints.
pub fn has_integrity(&self) -> bool {
self.integrity.is_some()
}
/// Check if the fingerprint has business rules.
pub fn has_rules(&self) -> bool {
self.rules.is_some()
}
/// Check if the fingerprint has anomaly patterns.
pub fn has_anomalies(&self) -> bool {
self.anomalies.is_some()
}
/// Get total epsilon spent on privacy.
pub fn epsilon_spent(&self) -> f64 {
self.privacy_audit.total_epsilon_spent
}
}