1use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct LinkageConfig {
16 pub max_reidentification_rate: f64,
19 pub min_k_anonymity: usize,
22}
23
24impl Default for LinkageConfig {
25 fn default() -> Self {
26 Self {
27 max_reidentification_rate: 0.05,
28 min_k_anonymity: 5,
29 }
30 }
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct LinkageResults {
36 pub re_identification_rate: f64,
38 pub k_anonymity_achieved: usize,
40 pub unique_qi_combos_original: usize,
42 pub unique_qi_combos_synthetic: usize,
44 pub overlapping_combos: usize,
46 pub uniquely_linked: usize,
48 pub total_synthetic: usize,
50 pub passes: bool,
52}
53
54pub struct LinkageAttack {
59 config: LinkageConfig,
60}
61
62impl LinkageAttack {
63 pub fn new(config: LinkageConfig) -> Self {
65 Self { config }
66 }
67
68 pub fn with_defaults() -> Self {
70 Self::new(LinkageConfig::default())
71 }
72
73 pub fn evaluate(
93 &self,
94 original_qis: &[Vec<String>],
95 synthetic_qis: &[Vec<String>],
96 ) -> LinkageResults {
97 if original_qis.is_empty() || synthetic_qis.is_empty() {
98 return LinkageResults {
99 re_identification_rate: 0.0,
100 k_anonymity_achieved: usize::MAX,
101 unique_qi_combos_original: 0,
102 unique_qi_combos_synthetic: 0,
103 overlapping_combos: 0,
104 uniquely_linked: 0,
105 total_synthetic: synthetic_qis.len(),
106 passes: true,
107 };
108 }
109
110 let mut original_freq: HashMap<Vec<String>, usize> = HashMap::new();
112 for qi in original_qis {
113 *original_freq.entry(qi.clone()).or_insert(0) += 1;
114 }
115
116 let mut synthetic_freq: HashMap<Vec<String>, usize> = HashMap::new();
118 for qi in synthetic_qis {
119 *synthetic_freq.entry(qi.clone()).or_insert(0) += 1;
120 }
121
122 let overlapping_combos = synthetic_freq
124 .keys()
125 .filter(|qi| original_freq.contains_key(*qi))
126 .count();
127
128 let mut uniquely_linked = 0usize;
132 for qi in synthetic_qis {
133 if let Some(&orig_count) = original_freq.get(qi) {
134 if orig_count == 1 {
135 uniquely_linked += 1;
136 }
137 }
138 }
139
140 let re_identification_rate = if synthetic_qis.is_empty() {
141 0.0
142 } else {
143 uniquely_linked as f64 / synthetic_qis.len() as f64
144 };
145
146 let k_anonymity_achieved = original_freq.values().copied().min().unwrap_or(0);
148
149 let passes = re_identification_rate <= self.config.max_reidentification_rate
150 && k_anonymity_achieved >= self.config.min_k_anonymity;
151
152 LinkageResults {
153 re_identification_rate,
154 k_anonymity_achieved,
155 unique_qi_combos_original: original_freq.len(),
156 unique_qi_combos_synthetic: synthetic_freq.len(),
157 overlapping_combos,
158 uniquely_linked,
159 total_synthetic: synthetic_qis.len(),
160 passes,
161 }
162 }
163}
164
165#[cfg(test)]
166mod tests {
167 use super::*;
168
169 fn make_qi(fields: &[&str]) -> Vec<String> {
170 fields.iter().map(|s| s.to_string()).collect()
171 }
172
173 #[test]
174 fn test_k_anonymized_data_low_reidentification() {
175 let mut original = Vec::new();
177 for _ in 0..5 {
178 original.push(make_qi(&["30-39", "100", "M"]));
179 original.push(make_qi(&["40-49", "200", "F"]));
180 original.push(make_qi(&["50-59", "300", "M"]));
181 }
182
183 let synthetic = vec![
184 make_qi(&["30-39", "100", "M"]),
185 make_qi(&["40-49", "200", "F"]),
186 make_qi(&["50-59", "300", "M"]),
187 ];
188
189 let attack = LinkageAttack::with_defaults();
190 let results = attack.evaluate(&original, &synthetic);
191
192 assert_eq!(results.re_identification_rate, 0.0);
193 assert_eq!(results.k_anonymity_achieved, 5);
194 assert!(results.passes);
195 }
196
197 #[test]
198 fn test_unique_records_high_reidentification() {
199 let original = vec![
201 make_qi(&["25", "10001", "M"]),
202 make_qi(&["32", "10002", "F"]),
203 make_qi(&["45", "10003", "M"]),
204 make_qi(&["58", "10004", "F"]),
205 ];
206
207 let synthetic = vec![
209 make_qi(&["25", "10001", "M"]),
210 make_qi(&["32", "10002", "F"]),
211 ];
212
213 let attack = LinkageAttack::with_defaults();
214 let results = attack.evaluate(&original, &synthetic);
215
216 assert!((results.re_identification_rate - 1.0).abs() < 1e-10);
218 assert_eq!(results.k_anonymity_achieved, 1);
219 assert!(!results.passes);
220 }
221
222 #[test]
223 fn test_no_overlap() {
224 let original = vec![make_qi(&["A", "1"]), make_qi(&["B", "2"])];
225 let synthetic = vec![make_qi(&["C", "3"]), make_qi(&["D", "4"])];
226
227 let attack = LinkageAttack::with_defaults();
228 let results = attack.evaluate(&original, &synthetic);
229
230 assert_eq!(results.re_identification_rate, 0.0);
231 assert_eq!(results.overlapping_combos, 0);
232 assert_eq!(results.uniquely_linked, 0);
233 }
234
235 #[test]
236 fn test_empty_datasets() {
237 let attack = LinkageAttack::with_defaults();
238 let results = attack.evaluate(&[], &[]);
239 assert!(results.passes);
240 assert_eq!(results.re_identification_rate, 0.0);
241 }
242
243 #[test]
244 fn test_linkage_config_serde() {
245 let config = LinkageConfig::default();
246 let json = serde_json::to_string(&config).unwrap();
247 let parsed: LinkageConfig = serde_json::from_str(&json).unwrap();
248 assert!((parsed.max_reidentification_rate - 0.05).abs() < 1e-10);
249 assert_eq!(parsed.min_k_anonymity, 5);
250 }
251
252 #[test]
253 fn test_linkage_results_serde() {
254 let results = LinkageResults {
255 re_identification_rate: 0.02,
256 k_anonymity_achieved: 10,
257 unique_qi_combos_original: 50,
258 unique_qi_combos_synthetic: 45,
259 overlapping_combos: 30,
260 uniquely_linked: 1,
261 total_synthetic: 100,
262 passes: true,
263 };
264 let json = serde_json::to_string(&results).unwrap();
265 let parsed: LinkageResults = serde_json::from_str(&json).unwrap();
266 assert!((parsed.re_identification_rate - 0.02).abs() < 1e-10);
267 assert_eq!(parsed.k_anonymity_achieved, 10);
268 }
269
270 #[test]
271 fn test_partial_overlap() {
272 let original = vec![
274 make_qi(&["A", "1"]), make_qi(&["B", "2"]), make_qi(&["B", "2"]),
277 make_qi(&["C", "3"]), make_qi(&["C", "3"]),
279 make_qi(&["C", "3"]),
280 ];
281
282 let synthetic = vec![
284 make_qi(&["A", "1"]), make_qi(&["B", "2"]), make_qi(&["C", "3"]), ];
288
289 let attack = LinkageAttack::with_defaults();
290 let results = attack.evaluate(&original, &synthetic);
291
292 assert_eq!(results.uniquely_linked, 1);
293 assert!((results.re_identification_rate - 1.0 / 3.0).abs() < 1e-10);
294 assert_eq!(results.k_anonymity_achieved, 1); }
296}