1pub mod burst;
12pub mod degradation;
13pub mod entity_profile;
14pub mod error;
15pub mod fanout;
16pub mod ietd;
17pub mod intraday;
18pub mod loader;
19pub mod math;
20pub mod report;
21pub mod types;
22pub mod velocity_rules;
23
24pub use entity_profile::{gl_source_tp, reference_corpus_aliases, synthetic_aliases};
25pub use error::{BehavioralFidelityError, BehavioralFidelityResult};
26pub use report::BehavioralFidelityReport;
27pub use types::{BehavioralFidelityConfig, EntityProfile, GateThresholds, Record, RuleSet};
28
29use std::collections::BTreeMap;
30use std::path::Path;
31
32use chrono::Utc;
33
34use crate::behavioral_fidelity::report::{
35 BaselineValues, CorpusSummary, EntityMetrics, GateResult, PerMetric,
36};
37
38const SELF_VERSION: &str = env!("CARGO_PKG_VERSION");
39
40fn per_metric(name: &str, raw: f64, baseline: f64, dr: f64) -> PerMetric {
44 PerMetric {
45 raw,
46 baseline,
47 dr,
48 is_degenerate_baseline: degradation::is_degenerate_baseline(baseline),
49 is_volume_bounded: degradation::is_volume_bounded(name),
50 }
51}
52
53pub fn compute_report(
54 cfg: &BehavioralFidelityConfig,
55 real: &[Record],
56 syn: &[Record],
57) -> BehavioralFidelityResult<BehavioralFidelityReport> {
58 let real_capped =
64 degradation::subsample_to_je_cap(real, degradation::NOISE_FLOOR_JE_CAP, cfg.seed);
65 let real: &[Record] = &real_capped;
66 let (real_a, real_b) = degradation::split_5050(real, cfg.seed);
67
68 let mut per_entity = BTreeMap::new();
69
70 let em_primary = compute_entity_metrics(
72 &cfg.profile,
73 real,
74 syn,
75 &real_a,
76 &real_b,
77 &cfg.profile.primary_entity,
78 )?;
79 per_entity.insert(cfg.profile.primary_entity.clone(), em_primary);
80
81 if let Some(sec) = &cfg.profile.secondary_entity {
83 let em_sec = compute_entity_metrics(&cfg.profile, real, syn, &real_a, &real_b, sec)?;
84 per_entity.insert(sec.clone(), em_sec);
85 }
86
87 let (rule_results, mean_gap) =
89 velocity_rules::evaluate_rule_set(&cfg.rule_set, real, syn, |r| {
90 project_entity(r, &cfg.profile.primary_entity)
91 });
92 let (_, mean_gap_baseline) =
93 velocity_rules::evaluate_rule_set(&cfg.rule_set, &real_a, &real_b, |r| {
94 project_entity(r, &cfg.profile.primary_entity)
95 });
96 if let Some(em) = per_entity.get_mut(&cfg.profile.primary_entity) {
97 em.p4_rule_results = rule_results;
98 em.p4_mean_gap = per_metric(
99 "P4_MeanGap",
100 mean_gap,
101 mean_gap_baseline,
102 degradation::degradation_ratio(mean_gap, mean_gap_baseline),
103 );
104 }
105
106 let intraday =
107 intraday::compute_intraday(syn, |r| project_entity(r, &cfg.profile.primary_entity));
108
109 let noise_floor = collect_baseline_values(&per_entity, &cfg.profile);
110 let (
111 composite_bf_score,
112 composite_bf_median,
113 composite_bf_volume_corrected,
114 n_metrics_aggregated,
115 n_metrics_excluded_degenerate,
116 n_metrics_excluded_volume,
117 ) = compute_composite_bf(&per_entity);
118
119 let gates = build_gate_result(&cfg.fail_thresholds, &per_entity, composite_bf_score);
120
121 Ok(BehavioralFidelityReport {
122 profile: cfg.profile.name.clone(),
123 generator_id: "datasynth".to_string(),
124 generator_version: SELF_VERSION.to_string(),
125 seed: cfg.seed,
126 generated_at: Utc::now(),
127 reference_corpus: summary(real, &cfg.profile),
128 synthetic: summary(syn, &cfg.profile),
129 noise_floor,
130 per_entity,
131 composite_bf_score,
132 composite_bf_median,
133 composite_bf_volume_corrected,
134 n_metrics_aggregated,
135 n_metrics_excluded_degenerate,
136 n_metrics_excluded_volume,
137 intraday_structural: intraday,
138 gates,
139 })
140}
141
142pub fn compute_report_from_paths(
143 cfg: &BehavioralFidelityConfig,
144 real_path: &Path,
145 syn_path: &Path,
146) -> BehavioralFidelityResult<BehavioralFidelityReport> {
147 let real = load_any(real_path)?;
148 let syn = load_any(syn_path)?;
149 compute_report(cfg, &real, &syn)
150}
151
152fn load_any(p: &Path) -> BehavioralFidelityResult<Vec<Record>> {
153 if p.is_dir() {
154 for entry in std::fs::read_dir(p)? {
155 let path = entry?.path();
156 if let Some(ext) = path.extension() {
157 if ext.eq_ignore_ascii_case("parquet") {
158 return loader::load_parquet_records(&path);
159 }
160 if ext.eq_ignore_ascii_case("csv") {
161 return loader::load_csv_records(&path);
162 }
163 }
164 }
165 return Err(BehavioralFidelityError::Io(std::io::Error::other(
166 "no .parquet or .csv in dir",
167 )));
168 }
169 match p.extension().and_then(|s| s.to_str()) {
170 Some("parquet") => loader::load_parquet_records(p),
171 Some("csv") => loader::load_csv_records(p),
172 _ => Err(BehavioralFidelityError::Io(std::io::Error::other(
173 "unknown extension",
174 ))),
175 }
176}
177
178fn compute_entity_metrics(
179 profile: &EntityProfile,
180 real: &[Record],
181 syn: &[Record],
182 real_a: &[Record],
183 real_b: &[Record],
184 entity_col: &str,
185) -> BehavioralFidelityResult<EntityMetrics> {
186 let project = |r: &Record| project_entity(r, entity_col);
187
188 let p1 = ietd::compute_p1(real, syn, project, |r| r.entry_date);
190 let p1_bl = ietd::compute_p1(real_a, real_b, project, |r| r.entry_date);
191 let p1_ietd = per_metric(
192 "P1_IETD_W1_days",
193 p1.ietd_w1_days,
194 p1_bl.ietd_w1_days,
195 degradation::degradation_ratio(p1.ietd_w1_days, p1_bl.ietd_w1_days),
196 );
197 let p1_autocorr = per_metric(
198 "P1_AutocorrGap",
199 p1.autocorr_gap,
200 p1_bl.autocorr_gap,
201 degradation::degradation_ratio(p1.autocorr_gap, p1_bl.autocorr_gap),
202 );
203
204 let p2_al_raw = burst::active_lifetime_w1(real, syn, project, |r| r.entry_date);
206 let p2_al_bl = burst::active_lifetime_w1(real_a, real_b, project, |r| r.entry_date);
207 let p2_active_lifetime = per_metric(
208 "P2_ActiveLifetime_W1",
209 p2_al_raw,
210 p2_al_bl,
211 degradation::degradation_ratio(p2_al_raw, p2_al_bl),
212 );
213
214 let mut p2_burst_len_by_threshold = BTreeMap::new();
216 for t in &profile.burst_thresholds {
217 let raw = burst::burst_length_w1(real, syn, project, |r| r.entry_date, *t);
218 let bl = burst::burst_length_w1(real_a, real_b, project, |r| r.entry_date, *t);
219 let name = format!("P2_BurstLen_W1_{}d", t);
220 p2_burst_len_by_threshold.insert(
221 *t,
222 per_metric(&name, raw, bl, degradation::degradation_ratio(raw, bl)),
223 );
224 }
225
226 let p2_jl_raw = burst::je_line_burst_w1(real, syn);
228 let p2_jl_bl = burst::je_line_burst_w1(real_a, real_b);
229 let p2_je_line_burst = per_metric(
230 "P2_JELineBurst_W1",
231 p2_jl_raw,
232 p2_jl_bl,
233 degradation::degradation_ratio(p2_jl_raw, p2_jl_bl),
234 );
235
236 let mut p3_fanout_by_attr = BTreeMap::new();
238 for attr in &profile.attributes_for_p3 {
239 let attr_proj = make_attr_projector(attr);
240 let raw = fanout::fanout_w1(real, syn, project, attr_proj);
241 let bl = fanout::fanout_w1(real_a, real_b, project, attr_proj);
242 let name = format!("P3_Fanout_W1_{}", attr);
243 p3_fanout_by_attr.insert(
244 attr.clone(),
245 per_metric(&name, raw, bl, degradation::degradation_ratio(raw, bl)),
246 );
247 }
248
249 let canonical_attr = profile
251 .attributes_for_p3
252 .first()
253 .map(|a| make_attr_projector(a))
254 .unwrap_or(fanout::gl_account_of);
255 let cc_real = fanout::clustering_coefficient(real, project, canonical_attr);
256 let cc_syn = fanout::clustering_coefficient(syn, project, canonical_attr);
257 let cc_a = fanout::clustering_coefficient(real_a, project, canonical_attr);
258 let cc_b = fanout::clustering_coefficient(real_b, project, canonical_attr);
259 let cc_gap_real_syn = (cc_real - cc_syn).abs();
260 let cc_gap_bl = (cc_a - cc_b).abs();
261 let p3_clustering = per_metric(
262 "P3_ClusteringGap",
263 cc_gap_real_syn,
264 cc_gap_bl,
265 degradation::degradation_ratio(cc_gap_real_syn, cc_gap_bl),
266 );
267
268 let t_real = fanout::triangle_count(real, project, canonical_attr);
269 let t_syn = fanout::triangle_count(syn, project, canonical_attr);
270 let t_a = fanout::triangle_count(real_a, project, canonical_attr);
271 let t_b = fanout::triangle_count(real_b, project, canonical_attr);
272 let tr_raw = fanout::triangle_log_ratio_gap(t_real, t_syn);
273 let tr_bl = fanout::triangle_log_ratio_gap(t_a, t_b);
274 let p3_triangle_log_ratio = per_metric(
275 "P3_TriangleLogRatio",
276 tr_raw,
277 tr_bl,
278 degradation::degradation_ratio(tr_raw, tr_bl),
279 );
280
281 Ok(EntityMetrics {
282 entity_column: entity_col.to_string(),
283 p1_ietd,
284 p1_autocorr,
285 p2_active_lifetime,
286 p2_burst_len_by_threshold,
287 p2_je_line_burst,
288 p3_fanout_by_attr,
289 p3_clustering,
290 p3_triangle_log_ratio,
291 p4_rule_results: Vec::new(),
292 p4_mean_gap: per_metric("P4_MeanGap", 0.0, 0.0, 0.0),
293 })
294}
295
296fn project_entity(r: &Record, col: &str) -> Option<String> {
297 match col {
298 "Source" => Some(r.source.clone()),
299 "TradingPartner" => r.trading_partner.clone(),
300 "GLAccount" => Some(r.gl_account.clone()),
301 "CostCenter" => r.cost_center.clone(),
302 "ProfitCenter" => r.profit_center.clone(),
303 _ => None,
304 }
305}
306
307fn make_attr_projector(attr: &str) -> fn(&Record) -> Option<String> {
308 match attr {
309 "GLAccount" => fanout::gl_account_of,
310 "CostCenter" => fanout::cost_center_of,
311 "ProfitCenter" => fanout::profit_center_of,
312 "TradingPartner" => fanout::trading_partner_attr_of,
313 _ => fanout::gl_account_of,
314 }
315}
316
317fn summary(records: &[Record], profile: &EntityProfile) -> CorpusSummary {
318 let entities_p: std::collections::HashSet<String> = records
319 .iter()
320 .filter_map(|r| project_entity(r, &profile.primary_entity))
321 .collect();
322 let entities_s: std::collections::HashSet<String> = profile
323 .secondary_entity
324 .as_ref()
325 .map(|c| {
326 records
327 .iter()
328 .filter_map(|r| project_entity(r, c))
329 .collect()
330 })
331 .unwrap_or_default();
332 let mut period_start = None;
333 let mut period_end = None;
334 for r in records {
335 period_start =
336 Some(period_start.map_or(r.entry_date, |d: chrono::NaiveDate| d.min(r.entry_date)));
337 period_end =
338 Some(period_end.map_or(r.entry_date, |d: chrono::NaiveDate| d.max(r.entry_date)));
339 }
340 CorpusSummary {
341 path: "(in-memory)".to_string(),
342 n_rows: records.len(),
343 n_entities_primary: entities_p.len(),
344 n_entities_secondary: entities_s.len(),
345 period_start,
346 period_end,
347 }
348}
349
350fn collect_baseline_values(
351 per_entity: &BTreeMap<String, EntityMetrics>,
352 profile: &EntityProfile,
353) -> BaselineValues {
354 let primary = per_entity.get(&profile.primary_entity).cloned();
355 let mut p2_burst_len = BTreeMap::new();
356 let mut p3_fanout = BTreeMap::new();
357 let mut bv = BaselineValues {
358 p1_ietd_w1_days: 0.0,
359 p1_autocorr_gap: 0.0,
360 p2_active_lifetime_w1: 0.0,
361 p2_burst_len_by_threshold: BTreeMap::new(),
362 p2_je_line_burst_w1: 0.0,
363 p3_fanout_by_attr: BTreeMap::new(),
364 p3_clustering_gap: 0.0,
365 p3_triangle_log_ratio: 0.0,
366 p4_mean_gap: 0.0,
367 };
368 if let Some(p) = primary {
369 bv.p1_ietd_w1_days = p.p1_ietd.baseline;
370 bv.p1_autocorr_gap = p.p1_autocorr.baseline;
371 bv.p2_active_lifetime_w1 = p.p2_active_lifetime.baseline;
372 for (t, pm) in &p.p2_burst_len_by_threshold {
373 p2_burst_len.insert(*t, pm.baseline);
374 }
375 bv.p2_burst_len_by_threshold = p2_burst_len;
376 bv.p2_je_line_burst_w1 = p.p2_je_line_burst.baseline;
377 for (a, pm) in &p.p3_fanout_by_attr {
378 p3_fanout.insert(a.clone(), pm.baseline);
379 }
380 bv.p3_fanout_by_attr = p3_fanout;
381 bv.p3_clustering_gap = p.p3_clustering.baseline;
382 bv.p3_triangle_log_ratio = p.p3_triangle_log_ratio.baseline;
383 bv.p4_mean_gap = p.p4_mean_gap.baseline;
384 }
385 bv
386}
387
388fn compute_composite_bf(
404 per_entity: &BTreeMap<String, EntityMetrics>,
405) -> (f64, f64, f64, usize, usize, usize) {
406 let mut included: Vec<f64> = Vec::new();
407 let mut vol_corrected: Vec<f64> = Vec::new();
408 let mut n_excluded_degen: usize = 0;
409 let mut n_excluded_volume: usize = 0;
410
411 let mut push = |pm: &PerMetric| {
412 if pm.is_degenerate_baseline {
413 n_excluded_degen += 1;
414 } else {
415 included.push(pm.dr);
416 if pm.is_volume_bounded {
417 n_excluded_volume += 1;
418 } else {
419 vol_corrected.push(pm.dr);
420 }
421 }
422 };
423
424 for em in per_entity.values() {
425 push(&em.p1_ietd);
426 push(&em.p1_autocorr);
427 push(&em.p2_active_lifetime);
428 for pm in em.p2_burst_len_by_threshold.values() {
429 push(pm);
430 }
431 push(&em.p2_je_line_burst);
432 for pm in em.p3_fanout_by_attr.values() {
433 push(pm);
434 }
435 push(&em.p3_clustering);
436 push(&em.p3_triangle_log_ratio);
437 push(&em.p4_mean_gap);
438 }
439
440 let n_aggregated = included.len();
441 if included.is_empty() {
442 return (0.0, 0.0, 0.0, 0, n_excluded_degen, n_excluded_volume);
443 }
444
445 let mean = included.iter().sum::<f64>() / included.len() as f64;
446
447 let median = {
448 let mut sorted = included.clone();
449 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
450 let mid = sorted.len() / 2;
451 if sorted.len().is_multiple_of(2) {
452 (sorted[mid - 1] + sorted[mid]) / 2.0
453 } else {
454 sorted[mid]
455 }
456 };
457
458 let vol_corrected_mean = if vol_corrected.is_empty() {
459 0.0
460 } else {
461 vol_corrected.iter().sum::<f64>() / vol_corrected.len() as f64
462 };
463
464 (
465 mean,
466 median,
467 vol_corrected_mean,
468 n_aggregated,
469 n_excluded_degen,
470 n_excluded_volume,
471 )
472}
473
474fn build_gate_result(
475 thresholds: &GateThresholds,
476 per_entity: &BTreeMap<String, EntityMetrics>,
477 composite: f64,
478) -> GateResult {
479 let mut failures = Vec::new();
480 for (name, em) in per_entity {
481 let metric_checks: Vec<(&str, f64)> = vec![
482 ("P1_IETD", em.p1_ietd.dr),
483 ("P1_Autocorr", em.p1_autocorr.dr),
484 ("P2_ActiveLifetime", em.p2_active_lifetime.dr),
485 ("P2_JELineBurst", em.p2_je_line_burst.dr),
486 ("P3_Clustering", em.p3_clustering.dr),
487 ("P3_TriangleLogRatio", em.p3_triangle_log_ratio.dr),
488 ("P4_MeanGap", em.p4_mean_gap.dr),
489 ];
490 for (mname, dr) in metric_checks {
491 if dr > thresholds.fail_if_dr_above {
492 failures.push(format!(
493 "{}/{} DR={:.3} > {:.2}",
494 name, mname, dr, thresholds.fail_if_dr_above
495 ));
496 }
497 }
498 for (t, pm) in &em.p2_burst_len_by_threshold {
499 if pm.dr > thresholds.fail_if_dr_above {
500 failures.push(format!(
501 "{}/P2_BurstLen_{}d DR={:.3} > {:.2}",
502 name, t, pm.dr, thresholds.fail_if_dr_above
503 ));
504 }
505 }
506 for (attr, pm) in &em.p3_fanout_by_attr {
507 if pm.dr > thresholds.fail_if_dr_above {
508 failures.push(format!(
509 "{}/P3_Fanout_{} DR={:.3} > {:.2}",
510 name, attr, pm.dr, thresholds.fail_if_dr_above
511 ));
512 }
513 }
514 }
515 if composite > thresholds.fail_if_composite_above {
516 failures.push(format!(
517 "Composite BF={:.3} > {:.2}",
518 composite, thresholds.fail_if_composite_above
519 ));
520 }
521 GateResult {
522 fail_if_dr_above: thresholds.fail_if_dr_above,
523 fail_if_composite_above: thresholds.fail_if_composite_above,
524 passed: failures.is_empty(),
525 failures,
526 }
527}
528
529#[cfg(test)]
530mod tests {
531 use super::*;
532 use chrono::NaiveDate;
533
534 fn make_records(source: &str, days: &[u32], je_prefix: &str) -> Vec<Record> {
535 days.iter()
536 .enumerate()
537 .map(|(i, &d)| Record {
538 source: source.into(),
539 gl_account: "1100".into(),
540 cost_center: Some("CC1".into()),
541 profit_center: Some("PC1".into()),
542 trading_partner: Some("TP1".into()),
543 je_number: format!("{je_prefix}-{i:03}"),
544 je_line_number: "001".into(),
545 effective_date: NaiveDate::from_ymd_opt(2022, 1, d).unwrap(),
546 entry_date: NaiveDate::from_ymd_opt(2022, 1, d).unwrap(),
547 created_at: None,
548 functional_amount: 100.0,
549 header_text: String::new(),
550 line_text: String::new(),
551 })
552 .collect()
553 }
554
555 #[test]
556 fn compute_report_identical_produces_low_composite() {
557 let mut real = make_records("SRC_A", &[3, 4, 5, 6, 7, 10, 11, 12, 13, 14], "JA");
558 real.extend(make_records(
559 "SRC_B",
560 &[3, 5, 7, 10, 12, 14, 17, 19, 21, 24],
561 "JB",
562 ));
563
564 let cfg = BehavioralFidelityConfig::gl_default();
565 let report = compute_report(&cfg, &real, &real)
566 .expect("compute_report should succeed on identical inputs");
567
568 assert!(
570 report.composite_bf_score < 1.0,
571 "identical data composite should be well below 1.0, got {}",
572 report.composite_bf_score
573 );
574 assert!(
575 report.per_entity.contains_key("Source"),
576 "primary entity 'Source' must be present"
577 );
578 assert!(
579 report.per_entity.contains_key("TradingPartner"),
580 "secondary entity 'TradingPartner' must be present"
581 );
582 }
583
584 #[test]
585 fn compute_report_gates_pass_on_identical() {
586 let real = make_records("SRC_A", &[3, 4, 5, 6, 7, 10, 11, 12, 13, 14], "JA");
587 let cfg = BehavioralFidelityConfig::gl_default();
588 let report = compute_report(&cfg, &real, &real).expect("compute_report should succeed");
589 assert!(
590 report.gates.passed,
591 "gates should pass on identical data; failures: {:?}",
592 report.gates.failures
593 );
594 }
595
596 #[test]
597 fn compute_report_summary_counts_entities() {
598 let mut real = make_records("SRC_A", &[3, 4, 5], "JA");
599 real.extend(make_records("SRC_B", &[6, 7, 8], "JB"));
600 let cfg = BehavioralFidelityConfig::gl_default();
601 let report = compute_report(&cfg, &real, &real).expect("compute_report");
602 assert_eq!(report.reference_corpus.n_rows, 6);
603 assert_eq!(report.reference_corpus.n_entities_primary, 2); assert_eq!(report.synthetic.n_rows, 6);
605 }
606
607 #[test]
608 fn noise_floor_baseline_populated_from_primary() {
609 let real = make_records("SRC_A", &[3, 4, 5, 6, 7, 10, 11, 12], "JA");
610 let cfg = BehavioralFidelityConfig::gl_default();
611 let report = compute_report(&cfg, &real, &real).expect("compute_report");
612 let em_primary = report
614 .per_entity
615 .get("Source")
616 .expect("Source entity present");
617 assert!(
618 (report.noise_floor.p1_ietd_w1_days - em_primary.p1_ietd.baseline).abs() < 1e-9,
619 "noise_floor.p1_ietd_w1_days must match primary baseline"
620 );
621 }
622
623 #[test]
624 fn compute_report_version_and_seed_set() {
625 let real = make_records("SRC_A", &[3, 4, 5], "JA");
626 let cfg = BehavioralFidelityConfig::gl_default();
627 let report = compute_report(&cfg, &real, &real).expect("compute_report");
628 assert_eq!(report.generator_id, "datasynth");
629 assert!(!report.generator_version.is_empty());
630 assert_eq!(report.seed, 42);
631 }
632
633 #[test]
634 fn per_entity_has_p4_rule_results_for_primary() {
635 let real = make_records("SRC_A", &[3, 4, 5, 6, 7, 10, 11, 12], "JA");
636 let cfg = BehavioralFidelityConfig::gl_default();
637 let report = compute_report(&cfg, &real, &real).expect("compute_report");
638 let em = report
639 .per_entity
640 .get("Source")
641 .expect("Source entity present");
642 assert_eq!(
643 em.p4_rule_results.len(),
644 10,
645 "canonical rule set has 10 rules"
646 );
647 }
648
649 fn make_per_entity_with_metrics(
654 healthy_drs: &[f64],
655 degenerate_count: usize,
656 ) -> BTreeMap<String, EntityMetrics> {
657 use crate::behavioral_fidelity::report::PerMetric;
658
659 let healthy_pm = |dr: f64| PerMetric {
661 raw: dr,
662 baseline: 1.0, dr,
664 is_degenerate_baseline: false,
665 is_volume_bounded: false,
666 };
667 let degenerate_pm = || PerMetric {
668 raw: 1.0,
669 baseline: 0.0, dr: degradation::DEGENERATE_BASELINE_CAP,
671 is_degenerate_baseline: true,
672 is_volume_bounded: false,
673 };
674
675 let mut p3_fanout = BTreeMap::new();
678 for (i, &dr) in healthy_drs.iter().enumerate() {
679 p3_fanout.insert(format!("attr_{i}"), healthy_pm(dr));
680 }
681 let mut p2_burst = BTreeMap::new();
683 for i in 0..degenerate_count {
684 p2_burst.insert(i as i64, degenerate_pm());
685 }
686
687 let em = EntityMetrics {
688 entity_column: "Source".into(),
689 p1_ietd: healthy_pm(0.0),
690 p1_autocorr: healthy_pm(0.0),
691 p2_active_lifetime: healthy_pm(0.0),
692 p2_burst_len_by_threshold: p2_burst,
693 p2_je_line_burst: healthy_pm(0.0),
694 p3_fanout_by_attr: p3_fanout,
695 p3_clustering: healthy_pm(0.0),
696 p3_triangle_log_ratio: healthy_pm(0.0),
697 p4_rule_results: vec![],
698 p4_mean_gap: healthy_pm(0.0),
699 };
700 let mut map = BTreeMap::new();
701 map.insert("Source".to_string(), em);
702 map
703 }
704
705 #[test]
706 fn composite_excludes_degenerate_baseline_metrics() {
707 let per_entity = make_per_entity_with_metrics(&[10.0, 10.0, 10.0, 10.0, 10.0], 1);
711 let (composite, _median, _vol, n_agg, n_excl, _n_vol) = compute_composite_bf(&per_entity);
712
713 assert_eq!(n_excl, 1, "exactly 1 degenerate metric should be excluded");
719 assert!(n_agg >= 1, "at least one healthy metric must be aggregated");
720 assert!(
723 composite < 100.0 / (n_agg + n_excl) as f64 + 1e-6,
724 "composite {composite} should be far below the old degenerate-dominated value"
725 );
726 }
727
728 #[test]
729 fn composite_returns_zero_when_all_metrics_degenerate() {
730 use crate::behavioral_fidelity::report::PerMetric;
733 let degen = PerMetric {
734 raw: 1.0,
735 baseline: 0.0,
736 dr: degradation::DEGENERATE_BASELINE_CAP,
737 is_degenerate_baseline: true,
738 is_volume_bounded: false,
739 };
740 let em = EntityMetrics {
741 entity_column: "Source".into(),
742 p1_ietd: degen.clone(),
743 p1_autocorr: degen.clone(),
744 p2_active_lifetime: degen.clone(),
745 p2_burst_len_by_threshold: BTreeMap::new(),
746 p2_je_line_burst: degen.clone(),
747 p3_fanout_by_attr: BTreeMap::new(),
748 p3_clustering: degen.clone(),
749 p3_triangle_log_ratio: degen.clone(),
750 p4_rule_results: vec![],
751 p4_mean_gap: degen,
752 };
753 let mut per_entity = BTreeMap::new();
754 per_entity.insert("Source".to_string(), em);
755
756 let (composite, _median, _vol, n_agg, n_excl, _n_vol) = compute_composite_bf(&per_entity);
757 assert_eq!(composite, 0.0, "all-degenerate composite should be 0.0");
758 assert_eq!(n_agg, 0);
759 assert_eq!(n_excl, 7, "7 fixed metrics, all degenerate");
760 }
761
762 #[test]
765 fn compute_composite_bf_returns_mean_and_median() {
766 let per_entity = make_per_entity_with_metrics(&[1.0, 5.0, 10.0, 20.0, 100.0], 0);
772 let (mean, median, _vol, n, excl, _n_vol) = compute_composite_bf(&per_entity);
773 assert_eq!(excl, 0);
774 assert_eq!(n, 12, "7 fixed + 5 fanout metrics");
775 assert!(
776 mean > median,
777 "mean ({mean:.3}) should exceed median ({median:.3}) for right-skewed distribution"
778 );
779 assert!(mean > 5.0, "mean dragged up by outlier 100");
781 assert!(median < mean, "median robust to outlier");
783 }
784
785 #[test]
786 fn compute_composite_bf_median_robust_to_outlier() {
787 let per_entity = make_per_entity_with_metrics(&[5.0, 10.0, 12.0, 15.0, 1000.0], 0);
791 let (mean, median, _vol, n, excl, _n_vol) = compute_composite_bf(&per_entity);
792 assert_eq!(excl, 0);
793 assert_eq!(n, 12);
794 assert!(
796 mean > 50.0,
797 "mean should be pulled up by outlier 1000; got {mean:.3}"
798 );
799 assert!(
801 median < 10.0,
802 "median should be robust to outlier; got {median:.3}"
803 );
804 }
805
806 #[test]
807 fn n_metrics_aggregated_and_excluded_on_report() {
808 let real = make_records("SRC_A", &[3, 4, 5, 6, 7, 10, 11, 12, 13, 14], "JA");
810 let cfg = BehavioralFidelityConfig::gl_default();
811 let report = compute_report(&cfg, &real, &real).expect("compute_report");
812 assert!(
815 report.n_metrics_aggregated + report.n_metrics_excluded_degenerate > 0,
816 "total metric count must be positive"
817 );
818 let total = report.n_metrics_aggregated + report.n_metrics_excluded_degenerate;
820 assert!(total >= 7, "at least 7 fixed metrics per entity");
821 }
822
823 #[test]
826 fn is_volume_bounded_flags_p1_ietd() {
827 assert!(
828 degradation::is_volume_bounded("P1_IETD_W1_days"),
829 "P1_IETD_W1_days must be flagged as volume-bounded"
830 );
831 assert!(
832 degradation::is_volume_bounded("P3_Fanout_W1_GLAccount"),
833 "P3_Fanout_W1_GLAccount must be flagged as volume-bounded"
834 );
835 assert!(
836 degradation::is_volume_bounded("P3_Fanout_W1_CostCenter"),
837 "P3_Fanout_W1_CostCenter must be flagged as volume-bounded"
838 );
839 assert!(
840 degradation::is_volume_bounded("P2_BurstLen_W1_7d"),
841 "P2_BurstLen_W1_7d must be flagged as volume-bounded"
842 );
843 assert!(
844 !degradation::is_volume_bounded("P3_ClusteringGap"),
845 "P3_ClusteringGap must NOT be volume-bounded"
846 );
847 assert!(
848 !degradation::is_volume_bounded("P4_MeanGap"),
849 "P4_MeanGap must NOT be volume-bounded"
850 );
851 assert!(
852 !degradation::is_volume_bounded("P1_AutocorrGap"),
853 "P1_AutocorrGap must NOT be volume-bounded"
854 );
855 }
856
857 #[test]
858 fn compute_composite_bf_volume_corrected_excludes_volume_bounded() {
859 use crate::behavioral_fidelity::report::PerMetric;
860
861 let vol_bounded_pm = PerMetric {
866 raw: 50.0,
867 baseline: 1.0,
868 dr: 50.0,
869 is_degenerate_baseline: false,
870 is_volume_bounded: true,
871 };
872 let healthy_pm = PerMetric {
873 raw: 10.0,
874 baseline: 1.0,
875 dr: 10.0,
876 is_degenerate_baseline: false,
877 is_volume_bounded: false,
878 };
879
880 let em = EntityMetrics {
881 entity_column: "Source".into(),
882 p1_ietd: vol_bounded_pm.clone(), p1_autocorr: healthy_pm.clone(),
884 p2_active_lifetime: healthy_pm.clone(),
885 p2_burst_len_by_threshold: BTreeMap::new(),
886 p2_je_line_burst: healthy_pm.clone(),
887 p3_fanout_by_attr: BTreeMap::new(),
888 p3_clustering: healthy_pm.clone(),
889 p3_triangle_log_ratio: healthy_pm.clone(),
890 p4_rule_results: vec![],
891 p4_mean_gap: healthy_pm.clone(),
892 };
893 let mut per_entity = BTreeMap::new();
894 per_entity.insert("Source".to_string(), em);
895
896 let (mean, _median, vol_corrected, n_agg, n_excl_degen, n_excl_vol) =
897 compute_composite_bf(&per_entity);
898
899 assert_eq!(n_agg, 7, "all 7 metrics are non-degenerate");
901 assert_eq!(n_excl_degen, 0, "no degenerate metrics");
902 assert_eq!(n_excl_vol, 1, "exactly 1 volume-bounded metric (p1_ietd)");
903
904 let expected_mean = (50.0 + 6.0 * 10.0) / 7.0;
906 assert!(
907 (mean - expected_mean).abs() < 1e-9,
908 "mean={mean:.6} expected={expected_mean:.6}"
909 );
910
911 assert!(
913 (vol_corrected - 10.0).abs() < 1e-9,
914 "vol_corrected={vol_corrected:.6} expected=10.0"
915 );
916 assert!(
917 vol_corrected < mean,
918 "volume-corrected ({vol_corrected:.3}) must be below headline mean ({mean:.3}) when vb metric has high DR"
919 );
920 }
921}