Skip to main content

datasynth_eval/behavioral_fidelity/
ietd.rs

1//! P1 — Inter-event time distribution + within-entity autocorrelation.
2
3use std::collections::HashMap;
4
5use chrono::NaiveDate;
6
7use super::math::{pearson_lag1_correlation, wasserstein_1};
8use super::types::Record;
9
10/// Result of P1 on one (real, synthetic) pair for one entity column.
11#[derive(Debug, Clone, PartialEq)]
12pub struct P1Outcome {
13    pub ietd_w1_days: f64,
14    pub autocorr_real: f64,
15    pub autocorr_syn: f64,
16    pub autocorr_gap: f64,
17}
18
19/// Compute pooled IETD W₁ and lag-1 within-entity autocorrelation gap.
20///
21/// `entity_of` projects each Record to its entity identifier; `date_of`
22/// projects to its day-resolution timestamp. The pooled IETD is the union
23/// of within-entity inter-event time sequences.
24pub fn compute_p1<F, G>(real: &[Record], syn: &[Record], entity_of: F, date_of: G) -> P1Outcome
25where
26    F: Fn(&Record) -> Option<String> + Copy,
27    G: Fn(&Record) -> NaiveDate + Copy,
28{
29    let iets_real = pooled_iets(real, entity_of, date_of);
30    let iets_syn = pooled_iets(syn, entity_of, date_of);
31    let w1 = wasserstein_1(&iets_real, &iets_syn);
32
33    let auto_real = pooled_autocorr(real, entity_of, date_of);
34    let auto_syn = pooled_autocorr(syn, entity_of, date_of);
35    P1Outcome {
36        ietd_w1_days: w1,
37        autocorr_real: auto_real,
38        autocorr_syn: auto_syn,
39        autocorr_gap: (auto_real - auto_syn).abs(),
40    }
41}
42
43fn group_by_entity<F>(records: &[Record], entity_of: F) -> HashMap<String, Vec<&Record>>
44where
45    F: Fn(&Record) -> Option<String> + Copy,
46{
47    let mut by: HashMap<String, Vec<&Record>> = HashMap::new();
48    for r in records {
49        if let Some(e) = entity_of(r) {
50            by.entry(e).or_default().push(r);
51        }
52    }
53    by
54}
55
56fn pooled_iets<F, G>(records: &[Record], entity_of: F, date_of: G) -> Vec<f64>
57where
58    F: Fn(&Record) -> Option<String> + Copy,
59    G: Fn(&Record) -> NaiveDate + Copy,
60{
61    let mut out = Vec::new();
62    for (_e, mut rows) in group_by_entity(records, entity_of) {
63        if rows.len() < 2 {
64            continue;
65        }
66        rows.sort_by_key(|r| date_of(r));
67        for w in rows.windows(2) {
68            let d = (date_of(w[1]) - date_of(w[0])).num_days() as f64;
69            if d >= 0.0 {
70                out.push(d);
71            }
72        }
73    }
74    out
75}
76
77fn pooled_autocorr<F, G>(records: &[Record], entity_of: F, date_of: G) -> f64
78where
79    F: Fn(&Record) -> Option<String> + Copy,
80    G: Fn(&Record) -> NaiveDate + Copy,
81{
82    let mut acc = 0.0;
83    let mut n = 0;
84    for (_e, mut rows) in group_by_entity(records, entity_of) {
85        if rows.len() < 3 {
86            continue;
87        }
88        rows.sort_by_key(|r| date_of(r));
89        let iets: Vec<f64> = rows
90            .windows(2)
91            .map(|w| (date_of(w[1]) - date_of(w[0])).num_days() as f64)
92            .collect();
93        if let Some(r) = pearson_lag1_correlation(&iets) {
94            acc += r;
95            n += 1;
96        }
97    }
98    if n == 0 {
99        0.0
100    } else {
101        acc / n as f64
102    }
103}
104
105/// Convenience: project Record -> `Source`.
106pub fn source_of(r: &Record) -> Option<String> {
107    Some(r.source.clone())
108}
109
110/// Convenience: project Record -> `TradingPartner`.
111pub fn trading_partner_of(r: &Record) -> Option<String> {
112    r.trading_partner.clone()
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118    use chrono::NaiveDate;
119
120    fn rec(src: &str, year: i32, mon: u32, day: u32) -> Record {
121        Record {
122            source: src.into(),
123            gl_account: "1".into(),
124            cost_center: None,
125            profit_center: None,
126            trading_partner: None,
127            je_number: format!("JE-{src}-{day}"),
128            je_line_number: "001".into(),
129            effective_date: NaiveDate::from_ymd_opt(year, mon, day).unwrap(),
130            entry_date: NaiveDate::from_ymd_opt(year, mon, day).unwrap(),
131            created_at: None,
132            functional_amount: 1.0,
133            header_text: String::new(),
134            line_text: String::new(),
135        }
136    }
137
138    #[test]
139    fn p1_identical_data_w1_zero_autocorr_gap_zero() {
140        let real = vec![
141            rec("A", 2022, 1, 1),
142            rec("A", 2022, 1, 2),
143            rec("A", 2022, 1, 3),
144            rec("A", 2022, 1, 4),
145            rec("B", 2022, 1, 1),
146            rec("B", 2022, 1, 5),
147            rec("B", 2022, 1, 9),
148        ];
149        let out = compute_p1(&real, &real, source_of, |r| r.entry_date);
150        assert!(out.ietd_w1_days.abs() < 1e-9);
151        assert!(out.autocorr_gap.abs() < 1e-9);
152    }
153
154    #[test]
155    fn p1_compressed_vs_uniform_detects_shift() {
156        let real = vec![
157            rec("A", 2022, 1, 1),
158            rec("A", 2022, 1, 2),
159            rec("A", 2022, 1, 3),
160            rec("A", 2022, 1, 4),
161            rec("B", 2022, 1, 1),
162            rec("B", 2022, 1, 5),
163            rec("B", 2022, 1, 9),
164        ];
165        let syn = vec![
166            rec("A", 2022, 1, 1),
167            rec("A", 2022, 1, 6),
168            rec("A", 2022, 1, 11),
169            rec("A", 2022, 1, 16),
170            rec("B", 2022, 1, 1),
171            rec("B", 2022, 1, 5),
172            rec("B", 2022, 1, 9),
173        ];
174        let out = compute_p1(&real, &syn, source_of, |r| r.entry_date);
175        assert!(
176            out.ietd_w1_days > 0.5,
177            "expected non-trivial W1, got {}",
178            out.ietd_w1_days
179        );
180    }
181}