1use std::collections::BTreeMap;
4use std::path::Path;
5
6use chrono::{DateTime, NaiveDate, Utc};
7use serde::{Deserialize, Serialize};
8
9use super::error::{BehavioralFidelityError, BehavioralFidelityResult};
10use super::intraday::IntradayMetrics;
11
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
13pub struct CorpusSummary {
14 pub path: String,
15 pub n_rows: usize,
16 pub n_entities_primary: usize,
17 pub n_entities_secondary: usize,
18 pub period_start: Option<NaiveDate>,
19 pub period_end: Option<NaiveDate>,
20}
21
22#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
23pub struct BaselineValues {
24 pub p1_ietd_w1_days: f64,
25 pub p1_autocorr_gap: f64,
26 pub p2_active_lifetime_w1: f64,
27 pub p2_burst_len_by_threshold: BTreeMap<i64, f64>,
28 pub p2_je_line_burst_w1: f64,
29 pub p3_fanout_by_attr: BTreeMap<String, f64>,
30 pub p3_clustering_gap: f64,
31 pub p3_triangle_log_ratio: f64,
32 pub p4_mean_gap: f64,
33}
34
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub struct PerMetric {
37 pub raw: f64,
38 pub baseline: f64,
39 pub dr: f64,
40 #[serde(default)]
44 pub is_degenerate_baseline: bool,
45 #[serde(default)]
53 pub is_volume_bounded: bool,
54}
55
56#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
57pub struct EntityMetrics {
58 pub entity_column: String,
59 pub p1_ietd: PerMetric,
60 pub p1_autocorr: PerMetric,
61 pub p2_active_lifetime: PerMetric,
62 pub p2_burst_len_by_threshold: BTreeMap<i64, PerMetric>,
63 pub p2_je_line_burst: PerMetric,
64 pub p3_fanout_by_attr: BTreeMap<String, PerMetric>,
65 pub p3_clustering: PerMetric,
66 pub p3_triangle_log_ratio: PerMetric,
67 pub p4_rule_results: Vec<super::velocity_rules::RuleResult>,
68 pub p4_mean_gap: PerMetric,
69}
70
71#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub struct GateResult {
73 pub fail_if_dr_above: f64,
74 pub fail_if_composite_above: f64,
75 pub passed: bool,
76 pub failures: Vec<String>,
77}
78
79#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
80pub struct BehavioralFidelityReport {
81 pub profile: String,
82 pub generator_id: String,
83 pub generator_version: String,
84 pub seed: u64,
85 pub generated_at: DateTime<Utc>,
86 pub reference_corpus: CorpusSummary,
87 pub synthetic: CorpusSummary,
88 pub noise_floor: BaselineValues,
89 pub per_entity: BTreeMap<String, EntityMetrics>,
90 pub composite_bf_score: f64,
91 #[serde(default)]
95 pub composite_bf_median: f64,
96 #[serde(default)]
99 pub n_metrics_aggregated: usize,
100 #[serde(default)]
104 pub n_metrics_excluded_degenerate: usize,
105 #[serde(default)]
110 pub composite_bf_volume_corrected: f64,
111 #[serde(default)]
114 pub n_metrics_excluded_volume: usize,
115 pub intraday_structural: Option<IntradayMetrics>,
116 pub gates: GateResult,
117}
118
119impl BehavioralFidelityReport {
120 pub fn write_json(&self, path: &Path) -> BehavioralFidelityResult<()> {
121 let f = std::fs::File::create(path)?;
122 serde_json::to_writer_pretty(f, self)?;
123 Ok(())
124 }
125}
126
127impl BehavioralFidelityReport {
128 pub fn write_markdown(&self, path: &Path) -> BehavioralFidelityResult<()> {
129 use std::fmt::Write;
130 let mut buf = String::new();
131
132 writeln!(buf, "# Behavioral-Fidelity Report").ok();
133 writeln!(buf).ok();
134 writeln!(buf, "- **Profile:** `{}`", self.profile).ok();
135 writeln!(
136 buf,
137 "- **Generator:** `{}` ({})",
138 self.generator_id, self.generator_version
139 )
140 .ok();
141 writeln!(buf, "- **Seed:** {}", self.seed).ok();
142 writeln!(
143 buf,
144 "- **Generated at:** {}",
145 self.generated_at.to_rfc3339()
146 )
147 .ok();
148 writeln!(
149 buf,
150 "- **Composite BF score (mean):** **{:.3}** (over {} metrics; {} excluded for degenerate baseline; 1.0 = noise floor; lower is better)",
151 self.composite_bf_score,
152 self.n_metrics_aggregated,
153 self.n_metrics_excluded_degenerate,
154 )
155 .ok();
156 writeln!(
157 buf,
158 "- **Composite BF score (median):** **{:.3}** (robust to outliers; compare with mean to gauge skew)",
159 self.composite_bf_median,
160 )
161 .ok();
162 writeln!(
163 buf,
164 "- **Composite BF score (volume-corrected, exc. is_volume_bounded):** **{:.3}** (over {} metrics; {} excluded as volume-bounded)",
165 self.composite_bf_volume_corrected,
166 self.n_metrics_aggregated.saturating_sub(self.n_metrics_excluded_volume),
167 self.n_metrics_excluded_volume,
168 )
169 .ok();
170 writeln!(buf).ok();
171 writeln!(buf, "## Per-entity DR table").ok();
172 writeln!(buf).ok();
173 writeln!(buf, "| Entity column | P1 IETD | P1 ACorr | P2 Lifetime | P2 BurstLen avg | P2 JE-line | P3 Fanout avg | P3 Clust | P3 Δlog | P4 mean |").ok();
174 writeln!(buf, "|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|").ok();
175 for (name, m) in &self.per_entity {
176 let p2_burst_avg = avg_dr(&m.p2_burst_len_by_threshold);
177 let p3_fanout_avg = avg_dr_str(&m.p3_fanout_by_attr);
178 writeln!(
179 buf,
180 "| `{}` | {:.2} | {:.2} | {:.2} | {:.2} | {:.2} | {:.2} | {:.2} | {:.2} | {:.2} |",
181 name,
182 m.p1_ietd.dr,
183 m.p1_autocorr.dr,
184 m.p2_active_lifetime.dr,
185 p2_burst_avg,
186 m.p2_je_line_burst.dr,
187 p3_fanout_avg,
188 m.p3_clustering.dr,
189 m.p3_triangle_log_ratio.dr,
190 m.p4_mean_gap.dr
191 )
192 .ok();
193 }
194 writeln!(buf).ok();
195 writeln!(buf, "## Gate result").ok();
196 writeln!(buf).ok();
197 writeln!(
198 buf,
199 "- **Passed:** {}",
200 if self.gates.passed { "yes" } else { "no" }
201 )
202 .ok();
203 writeln!(
204 buf,
205 "- **Threshold (any DR):** {:.2}",
206 self.gates.fail_if_dr_above
207 )
208 .ok();
209 writeln!(
210 buf,
211 "- **Threshold (composite):** {:.2}",
212 self.gates.fail_if_composite_above
213 )
214 .ok();
215 if !self.gates.failures.is_empty() {
216 writeln!(buf, "- **Failures:**").ok();
217 for f in &self.gates.failures {
218 writeln!(buf, " - {}", f).ok();
219 }
220 }
221 if let Some(intra) = &self.intraday_structural {
222 writeln!(buf).ok();
223 writeln!(buf, "## Synthetic-only intraday metrics (info)").ok();
224 writeln!(buf).ok();
225 writeln!(buf, "- IETD median (s): {:.2}", intra.p1_intra_w1_seconds).ok();
226 writeln!(buf, "- Lag-1 autocorr (s): {:.3}", intra.p1_intra_autocorr).ok();
227 writeln!(buf, "- Off-hours rate: {:.3}", intra.off_hours_rate).ok();
228 }
229 std::fs::write(path, buf)?;
230 Ok(())
231 }
232
233 pub fn write_csv(&self, path: &Path) -> BehavioralFidelityResult<()> {
234 let mut wtr = csv::Writer::from_path(path)
235 .map_err(|e| BehavioralFidelityError::Io(std::io::Error::other(e.to_string())))?;
236 wtr.write_record([
237 "entity_column",
238 "metric",
239 "raw",
240 "baseline",
241 "dr",
242 "is_degenerate_baseline",
243 "is_volume_bounded",
244 ])
245 .map_err(|e| BehavioralFidelityError::Io(std::io::Error::other(e.to_string())))?;
246 for (name, m) in &self.per_entity {
247 write_metric_row(&mut wtr, name, "P1_IETD_W1_days", &m.p1_ietd)?;
248 write_metric_row(&mut wtr, name, "P1_AutocorrGap", &m.p1_autocorr)?;
249 write_metric_row(
250 &mut wtr,
251 name,
252 "P2_ActiveLifetime_W1",
253 &m.p2_active_lifetime,
254 )?;
255 for (t, v) in &m.p2_burst_len_by_threshold {
256 write_metric_row(&mut wtr, name, &format!("P2_BurstLen_W1_{}d", t), v)?;
257 }
258 write_metric_row(&mut wtr, name, "P2_JELineBurst_W1", &m.p2_je_line_burst)?;
259 for (attr, v) in &m.p3_fanout_by_attr {
260 write_metric_row(&mut wtr, name, &format!("P3_Fanout_W1_{}", attr), v)?;
261 }
262 write_metric_row(&mut wtr, name, "P3_ClusteringGap", &m.p3_clustering)?;
263 write_metric_row(
264 &mut wtr,
265 name,
266 "P3_TriangleLogRatio",
267 &m.p3_triangle_log_ratio,
268 )?;
269 write_metric_row(&mut wtr, name, "P4_MeanGap", &m.p4_mean_gap)?;
270 }
271 wtr.flush()
272 .map_err(|e| BehavioralFidelityError::Io(std::io::Error::other(e.to_string())))?;
273 Ok(())
274 }
275}
276
277fn write_metric_row(
278 wtr: &mut csv::Writer<std::fs::File>,
279 entity: &str,
280 metric: &str,
281 pm: &PerMetric,
282) -> BehavioralFidelityResult<()> {
283 wtr.write_record([
284 entity,
285 metric,
286 &format!("{:.6}", pm.raw),
287 &format!("{:.6}", pm.baseline),
288 &format!("{:.6}", pm.dr),
289 if pm.is_degenerate_baseline {
290 "true"
291 } else {
292 "false"
293 },
294 if pm.is_volume_bounded {
295 "true"
296 } else {
297 "false"
298 },
299 ])
300 .map_err(|e| BehavioralFidelityError::Io(std::io::Error::other(e.to_string())))?;
301 Ok(())
302}
303
304fn avg_dr(map: &BTreeMap<i64, PerMetric>) -> f64 {
305 if map.is_empty() {
306 return 0.0;
307 }
308 map.values().map(|p| p.dr).sum::<f64>() / map.len() as f64
309}
310
311fn avg_dr_str(map: &BTreeMap<String, PerMetric>) -> f64 {
312 if map.is_empty() {
313 return 0.0;
314 }
315 map.values().map(|p| p.dr).sum::<f64>() / map.len() as f64
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321
322 #[test]
323 fn json_roundtrip_preserves_btreemap_ordering() {
324 let mut by_attr = BTreeMap::new();
325 by_attr.insert("CostCenter".to_string(), 1.0);
326 by_attr.insert("GLAccount".to_string(), 2.0);
327 let baseline = BaselineValues {
328 p1_ietd_w1_days: 1.0,
329 p1_autocorr_gap: 0.0,
330 p2_active_lifetime_w1: 1.0,
331 p2_burst_len_by_threshold: BTreeMap::new(),
332 p2_je_line_burst_w1: 1.0,
333 p3_fanout_by_attr: by_attr,
334 p3_clustering_gap: 0.0,
335 p3_triangle_log_ratio: 0.0,
336 p4_mean_gap: 0.0,
337 };
338 let json = serde_json::to_string(&baseline).expect("serialize");
339 let key_a = json.find("CostCenter").expect("CostCenter present");
340 let key_g = json.find("GLAccount").expect("GLAccount present");
341 assert!(key_a < key_g, "BTreeMap should produce ordered JSON keys");
342 let _: BaselineValues = serde_json::from_str(&json).expect("roundtrip");
343 }
344}