datasynth_eval/privacy/
metrics.rs

1//! Privacy metrics: NIST SP 800-226 alignment and SynQP quality-privacy matrix.
2//!
3//! Provides structured self-assessment against NIST standards for synthetic data
4//! and a quality-privacy evaluation quadrant (SynQP) for high-level classification.
5
6use serde::{Deserialize, Serialize};
7
8/// NIST SP 800-226 alignment self-assessment report.
9///
10/// Maps DataSynth's privacy controls to NIST criteria for evaluating
11/// de-identification and synthetic data methodologies.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct NistAlignmentReport {
14    /// Whether differential privacy is applied.
15    pub differential_privacy_applied: bool,
16    /// Epsilon value used (if applicable).
17    pub epsilon: Option<f64>,
18    /// Delta value used (if applicable).
19    pub delta: Option<f64>,
20    /// The composition method used.
21    pub composition_method: Option<String>,
22    /// Whether k-anonymity is enforced.
23    pub k_anonymity_enforced: bool,
24    /// The k-anonymity level achieved.
25    pub k_anonymity_level: Option<usize>,
26    /// Whether membership inference was tested.
27    pub membership_inference_tested: bool,
28    /// MIA AUC-ROC result (if tested).
29    pub mia_auc_roc: Option<f64>,
30    /// Whether linkage attack was tested.
31    pub linkage_attack_tested: bool,
32    /// Re-identification rate (if tested).
33    pub re_identification_rate: Option<f64>,
34    /// Overall NIST alignment score (0.0-1.0).
35    /// Based on how many criteria are met.
36    pub alignment_score: f64,
37    /// Individual criterion assessments.
38    pub criteria: Vec<NistCriterion>,
39    /// Whether the overall assessment passes.
40    pub passes: bool,
41}
42
43/// A single NIST criterion assessment.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct NistCriterion {
46    /// Criterion identifier (e.g., "DP-1", "KA-1", "MIA-1").
47    pub id: String,
48    /// Human-readable description.
49    pub description: String,
50    /// Whether this criterion is met.
51    pub met: bool,
52    /// Evidence or rationale.
53    pub evidence: String,
54}
55
56impl NistAlignmentReport {
57    /// Build a NIST alignment report from privacy evaluation results.
58    pub fn build(
59        dp_applied: bool,
60        epsilon: Option<f64>,
61        delta: Option<f64>,
62        composition_method: Option<String>,
63        k_anonymity_enforced: bool,
64        k_anonymity_level: Option<usize>,
65        mia_auc_roc: Option<f64>,
66        re_identification_rate: Option<f64>,
67    ) -> Self {
68        let mut criteria = Vec::new();
69
70        // DP criteria
71        criteria.push(NistCriterion {
72            id: "DP-1".to_string(),
73            description: "Differential privacy mechanism applied".to_string(),
74            met: dp_applied,
75            evidence: if dp_applied {
76                format!(
77                    "DP applied with epsilon={}, delta={}, method={}",
78                    epsilon.map_or("N/A".to_string(), |e| format!("{:.4}", e)),
79                    delta.map_or("N/A".to_string(), |d| format!("{:.2e}", d)),
80                    composition_method.as_deref().unwrap_or("naive"),
81                )
82            } else {
83                "No differential privacy mechanism applied".to_string()
84            },
85        });
86
87        criteria.push(NistCriterion {
88            id: "DP-2".to_string(),
89            description: "Epsilon within reasonable bounds (< 10.0)".to_string(),
90            met: epsilon.is_some_and(|e| e < 10.0),
91            evidence: epsilon.map_or("No epsilon specified".to_string(), |e| {
92                format!("Epsilon = {:.4}", e)
93            }),
94        });
95
96        // K-anonymity criteria
97        criteria.push(NistCriterion {
98            id: "KA-1".to_string(),
99            description: "K-anonymity enforced with k >= 5".to_string(),
100            met: k_anonymity_enforced && k_anonymity_level.is_some_and(|k| k >= 5),
101            evidence: if k_anonymity_enforced {
102                format!(
103                    "K-anonymity enforced, k = {}",
104                    k_anonymity_level.map_or("unknown".to_string(), |k| k.to_string())
105                )
106            } else {
107                "K-anonymity not enforced".to_string()
108            },
109        });
110
111        // MIA criteria
112        let mia_tested = mia_auc_roc.is_some();
113        criteria.push(NistCriterion {
114            id: "MIA-1".to_string(),
115            description: "Membership inference attack tested".to_string(),
116            met: mia_tested,
117            evidence: if mia_tested {
118                format!("MIA AUC-ROC = {:.4}", mia_auc_roc.unwrap_or(0.0))
119            } else {
120                "MIA not tested".to_string()
121            },
122        });
123
124        criteria.push(NistCriterion {
125            id: "MIA-2".to_string(),
126            description: "MIA AUC-ROC < 0.6 (near-random)".to_string(),
127            met: mia_auc_roc.is_some_and(|auc| auc < 0.6),
128            evidence: mia_auc_roc.map_or("MIA not tested".to_string(), |auc| {
129                format!("AUC-ROC = {:.4}", auc)
130            }),
131        });
132
133        // Linkage criteria
134        let linkage_tested = re_identification_rate.is_some();
135        criteria.push(NistCriterion {
136            id: "LA-1".to_string(),
137            description: "Linkage attack tested".to_string(),
138            met: linkage_tested,
139            evidence: if linkage_tested {
140                format!(
141                    "Re-identification rate = {:.4}",
142                    re_identification_rate.unwrap_or(0.0)
143                )
144            } else {
145                "Linkage attack not tested".to_string()
146            },
147        });
148
149        criteria.push(NistCriterion {
150            id: "LA-2".to_string(),
151            description: "Re-identification rate < 5%".to_string(),
152            met: re_identification_rate.is_some_and(|r| r < 0.05),
153            evidence: re_identification_rate.map_or("Not tested".to_string(), |r| {
154                format!("Re-identification rate = {:.2}%", r * 100.0)
155            }),
156        });
157
158        let met_count = criteria.iter().filter(|c| c.met).count();
159        let alignment_score = if criteria.is_empty() {
160            0.0
161        } else {
162            met_count as f64 / criteria.len() as f64
163        };
164
165        // Pass if at least 5 out of 7 criteria are met
166        let passes = met_count >= 5;
167
168        Self {
169            differential_privacy_applied: dp_applied,
170            epsilon,
171            delta,
172            composition_method,
173            k_anonymity_enforced,
174            k_anonymity_level,
175            membership_inference_tested: mia_tested,
176            mia_auc_roc,
177            linkage_attack_tested: linkage_tested,
178            re_identification_rate,
179            alignment_score,
180            criteria,
181            passes,
182        }
183    }
184}
185
186/// Quality-Privacy evaluation quadrant (SynQP).
187///
188/// Classifies synthetic data output into one of four quadrants based on
189/// how well it balances data quality (utility) with privacy protection.
190#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
191pub enum SynQPQuadrant {
192    /// High quality, high privacy — the ideal outcome.
193    HighQHighP,
194    /// High quality, low privacy — useful but risky.
195    HighQLowP,
196    /// Low quality, high privacy — safe but less useful.
197    LowQHighP,
198    /// Low quality, low privacy — worst outcome.
199    LowQLowP,
200}
201
202impl std::fmt::Display for SynQPQuadrant {
203    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
204        match self {
205            Self::HighQHighP => write!(f, "High Quality / High Privacy (Ideal)"),
206            Self::HighQLowP => write!(f, "High Quality / Low Privacy (Risky)"),
207            Self::LowQHighP => write!(f, "Low Quality / High Privacy (Conservative)"),
208            Self::LowQLowP => write!(f, "Low Quality / Low Privacy (Poor)"),
209        }
210    }
211}
212
213/// SynQP matrix evaluation result.
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct SynQPMatrix {
216    /// Quality score (0.0 - 1.0). Derived from data evaluation metrics.
217    pub quality_score: f64,
218    /// Privacy score (0.0 - 1.0). Derived from privacy evaluation metrics.
219    pub privacy_score: f64,
220    /// The quadrant classification.
221    pub quadrant: SynQPQuadrant,
222    /// Quality threshold for high/low classification.
223    pub quality_threshold: f64,
224    /// Privacy threshold for high/low classification.
225    pub privacy_threshold: f64,
226}
227
228impl SynQPMatrix {
229    /// Compute the SynQP matrix from quality and privacy scores.
230    ///
231    /// # Arguments
232    /// * `quality_score` - Overall data quality score (0.0-1.0, higher = better quality)
233    /// * `privacy_score` - Overall privacy score (0.0-1.0, higher = better privacy)
234    /// * `quality_threshold` - Threshold for high vs low quality (default: 0.7)
235    /// * `privacy_threshold` - Threshold for high vs low privacy (default: 0.7)
236    pub fn evaluate(
237        quality_score: f64,
238        privacy_score: f64,
239        quality_threshold: f64,
240        privacy_threshold: f64,
241    ) -> Self {
242        let quadrant = match (
243            quality_score >= quality_threshold,
244            privacy_score >= privacy_threshold,
245        ) {
246            (true, true) => SynQPQuadrant::HighQHighP,
247            (true, false) => SynQPQuadrant::HighQLowP,
248            (false, true) => SynQPQuadrant::LowQHighP,
249            (false, false) => SynQPQuadrant::LowQLowP,
250        };
251
252        Self {
253            quality_score,
254            privacy_score,
255            quadrant,
256            quality_threshold,
257            privacy_threshold,
258        }
259    }
260
261    /// Evaluate with default thresholds (0.7 for both).
262    pub fn evaluate_default(quality_score: f64, privacy_score: f64) -> Self {
263        Self::evaluate(quality_score, privacy_score, 0.7, 0.7)
264    }
265}
266
267#[cfg(test)]
268#[allow(clippy::unwrap_used)]
269mod tests {
270    use super::*;
271
272    #[test]
273    fn test_nist_report_all_criteria_met() {
274        let report = NistAlignmentReport::build(
275            true,
276            Some(1.0),
277            Some(1e-5),
278            Some("renyi_dp".to_string()),
279            true,
280            Some(10),
281            Some(0.52),
282            Some(0.01),
283        );
284
285        assert!(report.passes);
286        assert!(report.alignment_score > 0.9);
287        assert_eq!(report.criteria.len(), 7);
288        assert!(report.criteria.iter().all(|c| c.met));
289    }
290
291    #[test]
292    fn test_nist_report_no_privacy() {
293        let report = NistAlignmentReport::build(
294            false, // no DP
295            None, None, None, false, // no k-anonymity
296            None, None, // no MIA
297            None, // no linkage
298        );
299
300        assert!(!report.passes);
301        assert_eq!(report.alignment_score, 0.0);
302        assert!(report.criteria.iter().all(|c| !c.met));
303    }
304
305    #[test]
306    fn test_nist_report_partial() {
307        let report = NistAlignmentReport::build(
308            true,
309            Some(5.0),
310            Some(1e-5),
311            Some("naive".to_string()),
312            true,
313            Some(3),    // k=3, which is < 5 threshold
314            Some(0.55), // passes MIA
315            Some(0.03), // passes linkage
316        );
317
318        // DP-1: met, DP-2: met (5<10), KA-1: NOT met (3<5),
319        // MIA-1: met, MIA-2: met (0.55<0.6), LA-1: met, LA-2: met (0.03<0.05)
320        let met = report.criteria.iter().filter(|c| c.met).count();
321        assert_eq!(met, 6); // 6 out of 7
322        assert!(report.passes);
323    }
324
325    #[test]
326    fn test_nist_report_serde() {
327        let report = NistAlignmentReport::build(
328            true,
329            Some(1.0),
330            Some(1e-5),
331            None,
332            true,
333            Some(10),
334            Some(0.5),
335            Some(0.01),
336        );
337        let json = serde_json::to_string(&report).unwrap();
338        let parsed: NistAlignmentReport = serde_json::from_str(&json).unwrap();
339        assert_eq!(parsed.criteria.len(), 7);
340        assert!(parsed.passes);
341    }
342
343    #[test]
344    fn test_synqp_high_quality_high_privacy() {
345        let matrix = SynQPMatrix::evaluate_default(0.85, 0.90);
346        assert_eq!(matrix.quadrant, SynQPQuadrant::HighQHighP);
347    }
348
349    #[test]
350    fn test_synqp_high_quality_low_privacy() {
351        let matrix = SynQPMatrix::evaluate_default(0.85, 0.40);
352        assert_eq!(matrix.quadrant, SynQPQuadrant::HighQLowP);
353    }
354
355    #[test]
356    fn test_synqp_low_quality_high_privacy() {
357        let matrix = SynQPMatrix::evaluate_default(0.30, 0.90);
358        assert_eq!(matrix.quadrant, SynQPQuadrant::LowQHighP);
359    }
360
361    #[test]
362    fn test_synqp_low_quality_low_privacy() {
363        let matrix = SynQPMatrix::evaluate_default(0.30, 0.40);
364        assert_eq!(matrix.quadrant, SynQPQuadrant::LowQLowP);
365    }
366
367    #[test]
368    fn test_synqp_custom_thresholds() {
369        // With low thresholds, everything is "high"
370        let matrix = SynQPMatrix::evaluate(0.5, 0.5, 0.3, 0.3);
371        assert_eq!(matrix.quadrant, SynQPQuadrant::HighQHighP);
372    }
373
374    #[test]
375    fn test_synqp_display() {
376        assert_eq!(
377            format!("{}", SynQPQuadrant::HighQHighP),
378            "High Quality / High Privacy (Ideal)"
379        );
380        assert_eq!(
381            format!("{}", SynQPQuadrant::LowQLowP),
382            "Low Quality / Low Privacy (Poor)"
383        );
384    }
385
386    #[test]
387    fn test_synqp_serde() {
388        let matrix = SynQPMatrix::evaluate_default(0.8, 0.9);
389        let json = serde_json::to_string(&matrix).unwrap();
390        let parsed: SynQPMatrix = serde_json::from_str(&json).unwrap();
391        assert_eq!(parsed.quadrant, SynQPQuadrant::HighQHighP);
392        assert!((parsed.quality_score - 0.8).abs() < 1e-10);
393    }
394}
datasynth_eval/privacy/metrics.rs

datasynth_eval/privacy/
metrics.rs