datasynth_eval/calibration/
objective.rs

1//! C3 Piece 1 — calibration objective.
2//!
3//! The objective is the scalar function L(synth, ref) the calibration
4//! loop minimises. Initial cut supports the three headline scalars
5//! the BF report already exposes (`composite_bf_score`,
6//! `composite_bf_median`, `composite_bf_volume_corrected`). Per-
7//! submetric weighting is deferred — the BF report's per-entity
8//! shape uses typed fields rather than a generic map, so a weighted
9//! sub-metric objective needs a small typed dispatch table that's
10//! out of scope for the first cut.
11
12use crate::behavioral_fidelity::report::BehavioralFidelityReport;
13
14/// What we're minimising.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16pub enum ObjectiveMetric {
17    /// Sajja BF composite mean (the default headline metric).
18    /// Lower is better — 0 means synth matches reference within
19    /// the noise floor.
20    #[default]
21    BfComposite,
22    /// BF composite median — robust to a small number of very
23    /// high-DR outlier sub-metrics. Useful when one sub-metric is
24    /// wildly off and skews the mean.
25    BfCompositeMedian,
26    /// Volume-corrected BF composite (excludes degenerate +
27    /// volume-bounded metrics). Use when the loop should ignore
28    /// engine-volume-dependent gaps so calibration focuses on the
29    /// structural-fidelity sub-metrics.
30    BfCompositeVolumeCorrected,
31}
32
33impl ObjectiveMetric {
34    /// Display-friendly identifier for logs + history persistence.
35    pub fn name(&self) -> &'static str {
36        match self {
37            Self::BfComposite => "bf_composite",
38            Self::BfCompositeMedian => "bf_composite_median",
39            Self::BfCompositeVolumeCorrected => "bf_composite_volume_corrected",
40        }
41    }
42}
43
44/// One iterable target: which scalar drives the loop + optional
45/// convergence threshold.
46#[derive(Debug, Clone, Default)]
47pub struct CalibrationObjective {
48    /// Which scalar drives the loop.
49    pub metric: ObjectiveMetric,
50    /// Optional convergence target — stop when the multi-seed mean
51    /// loss is ≤ this. `None` (default) lets the loop run to
52    /// `max_iterations` / patience exhaustion.
53    pub target: Option<f64>,
54}
55
56impl CalibrationObjective {
57    /// Default — minimise the BF composite mean, no explicit target.
58    pub fn bf_composite() -> Self {
59        Self {
60            metric: ObjectiveMetric::BfComposite,
61            target: None,
62        }
63    }
64
65    /// Pick a specific metric. Identical to constructing the struct
66    /// directly; provided as a fluent builder for callers that read
67    /// nicer with a chained form.
68    pub fn with_metric(mut self, m: ObjectiveMetric) -> Self {
69        self.metric = m;
70        self
71    }
72
73    /// Set a convergence target. Stops the loop when E_seed[L] ≤ `t`.
74    pub fn with_target(mut self, t: f64) -> Self {
75        self.target = Some(t);
76        self
77    }
78
79    /// Compute the scalar loss for one report.
80    ///
81    /// All three scalars are always present in the
82    /// [`BehavioralFidelityReport`] (the v5.x writer fills them via
83    /// `#[serde(default)]`-zero on older fixtures), so this never
84    /// returns `None`. The return is `Option` to allow a future
85    /// per-submetric variant to signal a missing-path error without
86    /// a breaking signature change.
87    pub fn evaluate(&self, report: &BehavioralFidelityReport) -> Option<f64> {
88        Some(match self.metric {
89            ObjectiveMetric::BfComposite => report.composite_bf_score,
90            ObjectiveMetric::BfCompositeMedian => report.composite_bf_median,
91            ObjectiveMetric::BfCompositeVolumeCorrected => report.composite_bf_volume_corrected,
92        })
93    }
94
95    /// Aggregate multiple BF reports into a single (mean, std) pair —
96    /// the multi-seed harness the C3 loop relies on for noise-floor
97    /// rejection (T3 methodology: single-shard composite CV ≈ 25 %,
98    /// so a step must beat the prior best by > ~σ to be credited).
99    ///
100    /// Returns `None` only when `reports` is empty.
101    pub fn aggregate(&self, reports: &[BehavioralFidelityReport]) -> Option<(f64, f64)> {
102        if reports.is_empty() {
103            return None;
104        }
105        let vals: Vec<f64> = reports.iter().filter_map(|r| self.evaluate(r)).collect();
106        if vals.is_empty() {
107            return None;
108        }
109        let n = vals.len() as f64;
110        let mean = vals.iter().sum::<f64>() / n;
111        let variance = vals.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / n;
112        Some((mean, variance.sqrt()))
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119    use crate::behavioral_fidelity::report::{
120        BaselineValues, CorpusSummary, EntityMetrics, GateResult, PerMetric,
121    };
122    use chrono::Utc;
123    use std::collections::BTreeMap;
124
125    fn empty_per_metric() -> PerMetric {
126        PerMetric {
127            raw: 0.0,
128            baseline: 0.0,
129            dr: 0.0,
130            is_degenerate_baseline: false,
131            is_volume_bounded: false,
132        }
133    }
134
135    fn empty_entity_metrics() -> EntityMetrics {
136        EntityMetrics {
137            entity_column: "test".into(),
138            p1_ietd: empty_per_metric(),
139            p1_autocorr: empty_per_metric(),
140            p2_active_lifetime: empty_per_metric(),
141            p2_burst_len_by_threshold: BTreeMap::new(),
142            p2_je_line_burst: empty_per_metric(),
143            p3_fanout_by_attr: BTreeMap::new(),
144            p3_clustering: empty_per_metric(),
145            p3_triangle_log_ratio: empty_per_metric(),
146            p4_rule_results: vec![],
147            p4_mean_gap: empty_per_metric(),
148        }
149    }
150
151    fn make_report(composite: f64, median: f64, vc: f64) -> BehavioralFidelityReport {
152        BehavioralFidelityReport {
153            profile: "test".into(),
154            generator_id: "test".into(),
155            generator_version: "v5.x".into(),
156            seed: 0,
157            generated_at: Utc::now(),
158            reference_corpus: CorpusSummary {
159                path: "/dev/null".into(),
160                n_rows: 0,
161                n_entities_primary: 0,
162                n_entities_secondary: 0,
163                period_start: None,
164                period_end: None,
165            },
166            synthetic: CorpusSummary {
167                path: "/dev/null".into(),
168                n_rows: 0,
169                n_entities_primary: 0,
170                n_entities_secondary: 0,
171                period_start: None,
172                period_end: None,
173            },
174            noise_floor: BaselineValues {
175                p1_ietd_w1_days: 0.0,
176                p1_autocorr_gap: 0.0,
177                p2_active_lifetime_w1: 0.0,
178                p2_burst_len_by_threshold: BTreeMap::new(),
179                p2_je_line_burst_w1: 0.0,
180                p3_fanout_by_attr: BTreeMap::new(),
181                p3_clustering_gap: 0.0,
182                p3_triangle_log_ratio: 0.0,
183                p4_mean_gap: 0.0,
184            },
185            per_entity: {
186                let mut m = BTreeMap::new();
187                m.insert("test".to_string(), empty_entity_metrics());
188                m
189            },
190            composite_bf_score: composite,
191            composite_bf_median: median,
192            n_metrics_aggregated: 1,
193            n_metrics_excluded_degenerate: 0,
194            composite_bf_volume_corrected: vc,
195            n_metrics_excluded_volume: 0,
196            intraday_structural: None,
197            gates: GateResult {
198                fail_if_dr_above: 100.0,
199                fail_if_composite_above: 100.0,
200                passed: true,
201                failures: vec![],
202            },
203        }
204    }
205
206    #[test]
207    fn bf_composite_default() {
208        let obj = CalibrationObjective::default();
209        assert_eq!(obj.metric, ObjectiveMetric::BfComposite);
210        assert_eq!(obj.target, None);
211        let report = make_report(42.0, 17.0, 36.0);
212        assert_eq!(obj.evaluate(&report), Some(42.0));
213    }
214
215    #[test]
216    fn bf_composite_median_picks_median_field() {
217        let obj = CalibrationObjective::default().with_metric(ObjectiveMetric::BfCompositeMedian);
218        let report = make_report(42.0, 17.0, 36.0);
219        assert_eq!(obj.evaluate(&report), Some(17.0));
220    }
221
222    #[test]
223    fn bf_composite_volume_corrected_picks_vc_field() {
224        let obj = CalibrationObjective::default()
225            .with_metric(ObjectiveMetric::BfCompositeVolumeCorrected);
226        let report = make_report(42.0, 17.0, 36.0);
227        assert_eq!(obj.evaluate(&report), Some(36.0));
228    }
229
230    #[test]
231    fn target_round_trips() {
232        let obj = CalibrationObjective::bf_composite().with_target(25.0);
233        assert_eq!(obj.target, Some(25.0));
234    }
235
236    #[test]
237    fn aggregate_returns_mean_and_std() {
238        let obj = CalibrationObjective::bf_composite();
239        let reports = vec![
240            make_report(40.0, 0.0, 0.0),
241            make_report(42.0, 0.0, 0.0),
242            make_report(44.0, 0.0, 0.0),
243        ];
244        let (mean, std) = obj.aggregate(&reports).expect("non-empty");
245        assert!((mean - 42.0).abs() < 1e-9, "mean = {mean}");
246        // Population std of {40, 42, 44}: sqrt(((-2)² + 0 + 2²) / 3) = sqrt(8/3) ≈ 1.6330
247        assert!((std - (8.0_f64 / 3.0).sqrt()).abs() < 1e-9, "std = {std}");
248    }
249
250    #[test]
251    fn aggregate_empty_input_is_none() {
252        let obj = CalibrationObjective::bf_composite();
253        assert_eq!(obj.aggregate(&[]), None);
254    }
255}
datasynth_eval/calibration/objective.rs

datasynth_eval/calibration/
objective.rs