1use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct LinkageConfig {
16 pub max_reidentification_rate: f64,
19 pub min_k_anonymity: usize,
22}
23
24impl Default for LinkageConfig {
25 fn default() -> Self {
26 Self {
27 max_reidentification_rate: 0.05,
28 min_k_anonymity: 5,
29 }
30 }
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct LinkageResults {
36 pub re_identification_rate: f64,
38 pub k_anonymity_achieved: usize,
40 pub unique_qi_combos_original: usize,
42 pub unique_qi_combos_synthetic: usize,
44 pub overlapping_combos: usize,
46 pub uniquely_linked: usize,
48 pub total_synthetic: usize,
50 pub passes: bool,
52}
53
54pub struct LinkageAttack {
59 config: LinkageConfig,
60}
61
62impl LinkageAttack {
63 pub fn new(config: LinkageConfig) -> Self {
65 Self { config }
66 }
67
68 pub fn with_defaults() -> Self {
70 Self::new(LinkageConfig::default())
71 }
72
73 pub fn evaluate(
93 &self,
94 original_qis: &[Vec<String>],
95 synthetic_qis: &[Vec<String>],
96 ) -> LinkageResults {
97 if original_qis.is_empty() || synthetic_qis.is_empty() {
98 return LinkageResults {
99 re_identification_rate: 0.0,
100 k_anonymity_achieved: usize::MAX,
101 unique_qi_combos_original: 0,
102 unique_qi_combos_synthetic: 0,
103 overlapping_combos: 0,
104 uniquely_linked: 0,
105 total_synthetic: synthetic_qis.len(),
106 passes: true,
107 };
108 }
109
110 let mut original_freq: HashMap<Vec<String>, usize> = HashMap::new();
112 for qi in original_qis {
113 *original_freq.entry(qi.clone()).or_insert(0) += 1;
114 }
115
116 let mut synthetic_freq: HashMap<Vec<String>, usize> = HashMap::new();
118 for qi in synthetic_qis {
119 *synthetic_freq.entry(qi.clone()).or_insert(0) += 1;
120 }
121
122 let overlapping_combos = synthetic_freq
124 .keys()
125 .filter(|qi| original_freq.contains_key(*qi))
126 .count();
127
128 let mut uniquely_linked = 0usize;
132 for qi in synthetic_qis {
133 if let Some(&orig_count) = original_freq.get(qi) {
134 if orig_count == 1 {
135 uniquely_linked += 1;
136 }
137 }
138 }
139
140 let re_identification_rate = if synthetic_qis.is_empty() {
141 0.0
142 } else {
143 uniquely_linked as f64 / synthetic_qis.len() as f64
144 };
145
146 let k_anonymity_achieved = original_freq.values().copied().min().unwrap_or(0);
148
149 let passes = re_identification_rate <= self.config.max_reidentification_rate
150 && k_anonymity_achieved >= self.config.min_k_anonymity;
151
152 LinkageResults {
153 re_identification_rate,
154 k_anonymity_achieved,
155 unique_qi_combos_original: original_freq.len(),
156 unique_qi_combos_synthetic: synthetic_freq.len(),
157 overlapping_combos,
158 uniquely_linked,
159 total_synthetic: synthetic_qis.len(),
160 passes,
161 }
162 }
163}
164
165#[cfg(test)]
166#[allow(clippy::unwrap_used)]
167mod tests {
168 use super::*;
169
170 fn make_qi(fields: &[&str]) -> Vec<String> {
171 fields.iter().map(|s| s.to_string()).collect()
172 }
173
174 #[test]
175 fn test_k_anonymized_data_low_reidentification() {
176 let mut original = Vec::new();
178 for _ in 0..5 {
179 original.push(make_qi(&["30-39", "100", "M"]));
180 original.push(make_qi(&["40-49", "200", "F"]));
181 original.push(make_qi(&["50-59", "300", "M"]));
182 }
183
184 let synthetic = vec![
185 make_qi(&["30-39", "100", "M"]),
186 make_qi(&["40-49", "200", "F"]),
187 make_qi(&["50-59", "300", "M"]),
188 ];
189
190 let attack = LinkageAttack::with_defaults();
191 let results = attack.evaluate(&original, &synthetic);
192
193 assert_eq!(results.re_identification_rate, 0.0);
194 assert_eq!(results.k_anonymity_achieved, 5);
195 assert!(results.passes);
196 }
197
198 #[test]
199 fn test_unique_records_high_reidentification() {
200 let original = vec![
202 make_qi(&["25", "10001", "M"]),
203 make_qi(&["32", "10002", "F"]),
204 make_qi(&["45", "10003", "M"]),
205 make_qi(&["58", "10004", "F"]),
206 ];
207
208 let synthetic = vec![
210 make_qi(&["25", "10001", "M"]),
211 make_qi(&["32", "10002", "F"]),
212 ];
213
214 let attack = LinkageAttack::with_defaults();
215 let results = attack.evaluate(&original, &synthetic);
216
217 assert!((results.re_identification_rate - 1.0).abs() < 1e-10);
219 assert_eq!(results.k_anonymity_achieved, 1);
220 assert!(!results.passes);
221 }
222
223 #[test]
224 fn test_no_overlap() {
225 let original = vec![make_qi(&["A", "1"]), make_qi(&["B", "2"])];
226 let synthetic = vec![make_qi(&["C", "3"]), make_qi(&["D", "4"])];
227
228 let attack = LinkageAttack::with_defaults();
229 let results = attack.evaluate(&original, &synthetic);
230
231 assert_eq!(results.re_identification_rate, 0.0);
232 assert_eq!(results.overlapping_combos, 0);
233 assert_eq!(results.uniquely_linked, 0);
234 }
235
236 #[test]
237 fn test_empty_datasets() {
238 let attack = LinkageAttack::with_defaults();
239 let results = attack.evaluate(&[], &[]);
240 assert!(results.passes);
241 assert_eq!(results.re_identification_rate, 0.0);
242 }
243
244 #[test]
245 fn test_linkage_config_serde() {
246 let config = LinkageConfig::default();
247 let json = serde_json::to_string(&config).unwrap();
248 let parsed: LinkageConfig = serde_json::from_str(&json).unwrap();
249 assert!((parsed.max_reidentification_rate - 0.05).abs() < 1e-10);
250 assert_eq!(parsed.min_k_anonymity, 5);
251 }
252
253 #[test]
254 fn test_linkage_results_serde() {
255 let results = LinkageResults {
256 re_identification_rate: 0.02,
257 k_anonymity_achieved: 10,
258 unique_qi_combos_original: 50,
259 unique_qi_combos_synthetic: 45,
260 overlapping_combos: 30,
261 uniquely_linked: 1,
262 total_synthetic: 100,
263 passes: true,
264 };
265 let json = serde_json::to_string(&results).unwrap();
266 let parsed: LinkageResults = serde_json::from_str(&json).unwrap();
267 assert!((parsed.re_identification_rate - 0.02).abs() < 1e-10);
268 assert_eq!(parsed.k_anonymity_achieved, 10);
269 }
270
271 #[test]
272 fn test_partial_overlap() {
273 let original = vec![
275 make_qi(&["A", "1"]), make_qi(&["B", "2"]), make_qi(&["B", "2"]),
278 make_qi(&["C", "3"]), make_qi(&["C", "3"]),
280 make_qi(&["C", "3"]),
281 ];
282
283 let synthetic = vec![
285 make_qi(&["A", "1"]), make_qi(&["B", "2"]), make_qi(&["C", "3"]), ];
289
290 let attack = LinkageAttack::with_defaults();
291 let results = attack.evaluate(&original, &synthetic);
292
293 assert_eq!(results.uniquely_linked, 1);
294 assert!((results.re_identification_rate - 1.0 / 3.0).abs() < 1e-10);
295 assert_eq!(results.k_anonymity_achieved, 1); }
297}