Skip to main content

datasynth_eval/privacy/
metrics.rs

1//! Privacy metrics: NIST SP 800-226 alignment and SynQP quality-privacy matrix.
2//!
3//! Provides structured self-assessment against NIST standards for synthetic data
4//! and a quality-privacy evaluation quadrant (SynQP) for high-level classification.
5
6use serde::{Deserialize, Serialize};
7
8/// NIST SP 800-226 alignment self-assessment report.
9///
10/// Maps DataSynth's privacy controls to NIST criteria for evaluating
11/// de-identification and synthetic data methodologies.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct NistAlignmentReport {
14    /// Whether differential privacy is applied.
15    pub differential_privacy_applied: bool,
16    /// Epsilon value used (if applicable).
17    pub epsilon: Option<f64>,
18    /// Delta value used (if applicable).
19    pub delta: Option<f64>,
20    /// The composition method used.
21    pub composition_method: Option<String>,
22    /// Whether k-anonymity is enforced.
23    pub k_anonymity_enforced: bool,
24    /// The k-anonymity level achieved.
25    pub k_anonymity_level: Option<usize>,
26    /// Whether membership inference was tested.
27    pub membership_inference_tested: bool,
28    /// MIA AUC-ROC result (if tested).
29    pub mia_auc_roc: Option<f64>,
30    /// Whether linkage attack was tested.
31    pub linkage_attack_tested: bool,
32    /// Re-identification rate (if tested).
33    pub re_identification_rate: Option<f64>,
34    /// Overall NIST alignment score (0.0-1.0).
35    /// Based on how many criteria are met.
36    pub alignment_score: f64,
37    /// Individual criterion assessments.
38    pub criteria: Vec<NistCriterion>,
39    /// Whether the overall assessment passes.
40    pub passes: bool,
41}
42
43/// A single NIST criterion assessment.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct NistCriterion {
46    /// Criterion identifier (e.g., "DP-1", "KA-1", "MIA-1").
47    pub id: String,
48    /// Human-readable description.
49    pub description: String,
50    /// Whether this criterion is met.
51    pub met: bool,
52    /// Evidence or rationale.
53    pub evidence: String,
54}
55
56impl NistAlignmentReport {
57    /// Build a NIST alignment report from privacy evaluation results.
58    pub fn build(
59        dp_applied: bool,
60        epsilon: Option<f64>,
61        delta: Option<f64>,
62        composition_method: Option<String>,
63        k_anonymity_enforced: bool,
64        k_anonymity_level: Option<usize>,
65        mia_auc_roc: Option<f64>,
66        re_identification_rate: Option<f64>,
67    ) -> Self {
68        let mut criteria = Vec::new();
69
70        // DP criteria
71        criteria.push(NistCriterion {
72            id: "DP-1".to_string(),
73            description: "Differential privacy mechanism applied".to_string(),
74            met: dp_applied,
75            evidence: if dp_applied {
76                format!(
77                    "DP applied with epsilon={}, delta={}, method={}",
78                    epsilon.map_or("N/A".to_string(), |e| format!("{e:.4}")),
79                    delta.map_or("N/A".to_string(), |d| format!("{d:.2e}")),
80                    composition_method.as_deref().unwrap_or("naive"),
81                )
82            } else {
83                "No differential privacy mechanism applied".to_string()
84            },
85        });
86
87        criteria.push(NistCriterion {
88            id: "DP-2".to_string(),
89            description: "Epsilon within reasonable bounds (< 10.0)".to_string(),
90            met: epsilon.is_some_and(|e| e < 10.0),
91            evidence: epsilon.map_or("No epsilon specified".to_string(), |e| {
92                format!("Epsilon = {e:.4}")
93            }),
94        });
95
96        // K-anonymity criteria
97        criteria.push(NistCriterion {
98            id: "KA-1".to_string(),
99            description: "K-anonymity enforced with k >= 5".to_string(),
100            met: k_anonymity_enforced && k_anonymity_level.is_some_and(|k| k >= 5),
101            evidence: if k_anonymity_enforced {
102                format!(
103                    "K-anonymity enforced, k = {}",
104                    k_anonymity_level.map_or("unknown".to_string(), |k| k.to_string())
105                )
106            } else {
107                "K-anonymity not enforced".to_string()
108            },
109        });
110
111        // MIA criteria
112        let mia_tested = mia_auc_roc.is_some();
113        criteria.push(NistCriterion {
114            id: "MIA-1".to_string(),
115            description: "Membership inference attack tested".to_string(),
116            met: mia_tested,
117            evidence: if mia_tested {
118                format!("MIA AUC-ROC = {:.4}", mia_auc_roc.unwrap_or(0.0))
119            } else {
120                "MIA not tested".to_string()
121            },
122        });
123
124        criteria.push(NistCriterion {
125            id: "MIA-2".to_string(),
126            description: "MIA AUC-ROC < 0.6 (near-random)".to_string(),
127            met: mia_auc_roc.is_some_and(|auc| auc < 0.6),
128            evidence: mia_auc_roc.map_or("MIA not tested".to_string(), |auc| {
129                format!("AUC-ROC = {auc:.4}")
130            }),
131        });
132
133        // Linkage criteria
134        let linkage_tested = re_identification_rate.is_some();
135        criteria.push(NistCriterion {
136            id: "LA-1".to_string(),
137            description: "Linkage attack tested".to_string(),
138            met: linkage_tested,
139            evidence: if linkage_tested {
140                format!(
141                    "Re-identification rate = {:.4}",
142                    re_identification_rate.unwrap_or(0.0)
143                )
144            } else {
145                "Linkage attack not tested".to_string()
146            },
147        });
148
149        criteria.push(NistCriterion {
150            id: "LA-2".to_string(),
151            description: "Re-identification rate < 5%".to_string(),
152            met: re_identification_rate.is_some_and(|r| r < 0.05),
153            evidence: re_identification_rate.map_or("Not tested".to_string(), |r| {
154                format!("Re-identification rate = {:.2}%", r * 100.0)
155            }),
156        });
157
158        let met_count = criteria.iter().filter(|c| c.met).count();
159        let alignment_score = if criteria.is_empty() {
160            0.0
161        } else {
162            met_count as f64 / criteria.len() as f64
163        };
164
165        // Pass if at least 5 out of 7 criteria are met
166        let passes = met_count >= 5;
167
168        Self {
169            differential_privacy_applied: dp_applied,
170            epsilon,
171            delta,
172            composition_method,
173            k_anonymity_enforced,
174            k_anonymity_level,
175            membership_inference_tested: mia_tested,
176            mia_auc_roc,
177            linkage_attack_tested: linkage_tested,
178            re_identification_rate,
179            alignment_score,
180            criteria,
181            passes,
182        }
183    }
184}
185
186/// Quality-Privacy evaluation quadrant (SynQP).
187///
188/// Classifies synthetic data output into one of four quadrants based on
189/// how well it balances data quality (utility) with privacy protection.
190#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
191pub enum SynQPQuadrant {
192    /// High quality, high privacy — the ideal outcome.
193    HighQHighP,
194    /// High quality, low privacy — useful but risky.
195    HighQLowP,
196    /// Low quality, high privacy — safe but less useful.
197    LowQHighP,
198    /// Low quality, low privacy — worst outcome.
199    LowQLowP,
200}
201
202impl std::fmt::Display for SynQPQuadrant {
203    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
204        match self {
205            Self::HighQHighP => write!(f, "High Quality / High Privacy (Ideal)"),
206            Self::HighQLowP => write!(f, "High Quality / Low Privacy (Risky)"),
207            Self::LowQHighP => write!(f, "Low Quality / High Privacy (Conservative)"),
208            Self::LowQLowP => write!(f, "Low Quality / Low Privacy (Poor)"),
209        }
210    }
211}
212
213/// SynQP matrix evaluation result.
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct SynQPMatrix {
216    /// Quality score (0.0 - 1.0). Derived from data evaluation metrics.
217    pub quality_score: f64,
218    /// Privacy score (0.0 - 1.0). Derived from privacy evaluation metrics.
219    pub privacy_score: f64,
220    /// The quadrant classification.
221    pub quadrant: SynQPQuadrant,
222    /// Quality threshold for high/low classification.
223    pub quality_threshold: f64,
224    /// Privacy threshold for high/low classification.
225    pub privacy_threshold: f64,
226}
227
228impl SynQPMatrix {
229    /// Compute the SynQP matrix from quality and privacy scores.
230    ///
231    /// # Arguments
232    /// * `quality_score` - Overall data quality score (0.0-1.0, higher = better quality)
233    /// * `privacy_score` - Overall privacy score (0.0-1.0, higher = better privacy)
234    /// * `quality_threshold` - Threshold for high vs low quality (default: 0.7)
235    /// * `privacy_threshold` - Threshold for high vs low privacy (default: 0.7)
236    pub fn evaluate(
237        quality_score: f64,
238        privacy_score: f64,
239        quality_threshold: f64,
240        privacy_threshold: f64,
241    ) -> Self {
242        let quadrant = match (
243            quality_score >= quality_threshold,
244            privacy_score >= privacy_threshold,
245        ) {
246            (true, true) => SynQPQuadrant::HighQHighP,
247            (true, false) => SynQPQuadrant::HighQLowP,
248            (false, true) => SynQPQuadrant::LowQHighP,
249            (false, false) => SynQPQuadrant::LowQLowP,
250        };
251
252        Self {
253            quality_score,
254            privacy_score,
255            quadrant,
256            quality_threshold,
257            privacy_threshold,
258        }
259    }
260
261    /// Evaluate with default thresholds (0.7 for both).
262    pub fn evaluate_default(quality_score: f64, privacy_score: f64) -> Self {
263        Self::evaluate(quality_score, privacy_score, 0.7, 0.7)
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270
271    #[test]
272    fn test_nist_report_all_criteria_met() {
273        let report = NistAlignmentReport::build(
274            true,
275            Some(1.0),
276            Some(1e-5),
277            Some("renyi_dp".to_string()),
278            true,
279            Some(10),
280            Some(0.52),
281            Some(0.01),
282        );
283
284        assert!(report.passes);
285        assert!(report.alignment_score > 0.9);
286        assert_eq!(report.criteria.len(), 7);
287        assert!(report.criteria.iter().all(|c| c.met));
288    }
289
290    #[test]
291    fn test_nist_report_no_privacy() {
292        let report = NistAlignmentReport::build(
293            false, // no DP
294            None, None, None, false, // no k-anonymity
295            None, None, // no MIA
296            None, // no linkage
297        );
298
299        assert!(!report.passes);
300        assert_eq!(report.alignment_score, 0.0);
301        assert!(report.criteria.iter().all(|c| !c.met));
302    }
303
304    #[test]
305    fn test_nist_report_partial() {
306        let report = NistAlignmentReport::build(
307            true,
308            Some(5.0),
309            Some(1e-5),
310            Some("naive".to_string()),
311            true,
312            Some(3),    // k=3, which is < 5 threshold
313            Some(0.55), // passes MIA
314            Some(0.03), // passes linkage
315        );
316
317        // DP-1: met, DP-2: met (5<10), KA-1: NOT met (3<5),
318        // MIA-1: met, MIA-2: met (0.55<0.6), LA-1: met, LA-2: met (0.03<0.05)
319        let met = report.criteria.iter().filter(|c| c.met).count();
320        assert_eq!(met, 6); // 6 out of 7
321        assert!(report.passes);
322    }
323
324    #[test]
325    fn test_nist_report_serde() {
326        let report = NistAlignmentReport::build(
327            true,
328            Some(1.0),
329            Some(1e-5),
330            None,
331            true,
332            Some(10),
333            Some(0.5),
334            Some(0.01),
335        );
336        let json = serde_json::to_string(&report).unwrap();
337        let parsed: NistAlignmentReport = serde_json::from_str(&json).unwrap();
338        assert_eq!(parsed.criteria.len(), 7);
339        assert!(parsed.passes);
340    }
341
342    #[test]
343    fn test_synqp_high_quality_high_privacy() {
344        let matrix = SynQPMatrix::evaluate_default(0.85, 0.90);
345        assert_eq!(matrix.quadrant, SynQPQuadrant::HighQHighP);
346    }
347
348    #[test]
349    fn test_synqp_high_quality_low_privacy() {
350        let matrix = SynQPMatrix::evaluate_default(0.85, 0.40);
351        assert_eq!(matrix.quadrant, SynQPQuadrant::HighQLowP);
352    }
353
354    #[test]
355    fn test_synqp_low_quality_high_privacy() {
356        let matrix = SynQPMatrix::evaluate_default(0.30, 0.90);
357        assert_eq!(matrix.quadrant, SynQPQuadrant::LowQHighP);
358    }
359
360    #[test]
361    fn test_synqp_low_quality_low_privacy() {
362        let matrix = SynQPMatrix::evaluate_default(0.30, 0.40);
363        assert_eq!(matrix.quadrant, SynQPQuadrant::LowQLowP);
364    }
365
366    #[test]
367    fn test_synqp_custom_thresholds() {
368        // With low thresholds, everything is "high"
369        let matrix = SynQPMatrix::evaluate(0.5, 0.5, 0.3, 0.3);
370        assert_eq!(matrix.quadrant, SynQPQuadrant::HighQHighP);
371    }
372
373    #[test]
374    fn test_synqp_display() {
375        assert_eq!(
376            format!("{}", SynQPQuadrant::HighQHighP),
377            "High Quality / High Privacy (Ideal)"
378        );
379        assert_eq!(
380            format!("{}", SynQPQuadrant::LowQLowP),
381            "Low Quality / Low Privacy (Poor)"
382        );
383    }
384
385    #[test]
386    fn test_synqp_serde() {
387        let matrix = SynQPMatrix::evaluate_default(0.8, 0.9);
388        let json = serde_json::to_string(&matrix).unwrap();
389        let parsed: SynQPMatrix = serde_json::from_str(&json).unwrap();
390        assert_eq!(parsed.quadrant, SynQPQuadrant::HighQHighP);
391        assert!((parsed.quality_score - 0.8).abs() < 1e-10);
392    }
393}