datasynth-eval 5.34.0

Evaluation framework for synthetic financial data quality and coherence
Documentation
//! Relational-fidelity metric (engine feature A5).
//!
//! Corpus account-flow manifolds are noisier and richer than synthetic ones — the inverse-audit
//! work had to scale the memory boost β from ~3 (synthetic) to 8–15 (corpus) because synthetic GLs
//! are relationally *too clean* (FINDINGS §37/§40). This analyzer measures the relational structure
//! of a generated GL's account-flow graph — account-pair diversity, edge-weight tail, reciprocity
//! (a cycle proxy), and hub structure — so the fidelity round can target corpus-like relational
//! noise rather than only matching amount/IET marginals.
//!
//! Decoupled from the full model: it consumes lightweight [`FlowEdge`]s; [`flow_edges_from_entries`]
//! builds them from journal entries (one dominant credit→debit flow per entry). Optional reference
//! bands ([`RelationalFidelityThresholds`], supplied at runtime from corpus aggregates — never
//! committed) flag the metrics where synthetic is "too clean".

use crate::error::EvalResult;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;

const EPS: f64 = 1e-9;

/// A directed account-flow edge (money flows `src` → `dst`) with a magnitude weight.
#[derive(Debug, Clone)]
pub struct FlowEdge {
    /// Source account (the credit leg).
    pub src: String,
    /// Destination account (the debit leg).
    pub dst: String,
    /// Flow magnitude (e.g. the entry total).
    pub weight: f64,
}

/// Optional reference bands; a metric below its band is flagged "too clean". All `None` → no flags.
#[derive(Debug, Clone, Default)]
pub struct RelationalFidelityThresholds {
    /// Minimum acceptable distinct-pairs / edges ratio.
    pub min_pair_diversity: Option<f64>,
    /// Minimum acceptable edge-weight p99/p50 tail ratio.
    pub min_edge_weight_tail_ratio: Option<f64>,
    /// Minimum acceptable reciprocity (cycle proxy).
    pub min_reciprocity: Option<f64>,
}

/// Relational-structure summary of an account-flow graph.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RelationalFidelityReport {
    /// Distinct accounts touched (src ∪ dst).
    pub n_accounts: usize,
    /// Directed flow instances considered.
    pub n_edges: usize,
    /// Distinct directed account pairs.
    pub distinct_pairs: usize,
    /// `distinct_pairs / n_edges` — higher is richer (less repetitive flow).
    pub pair_diversity: f64,
    /// p99/p50 of per-pair aggregate edge weight — the heavy-tail of flow magnitude.
    pub edge_weight_tail_ratio: f64,
    /// Fraction of distinct pairs `(a,b)` whose reverse `(b,a)` also exists — a cheap cycle /
    /// round-trip prevalence proxy (the relational families the per-JE arm is blind to).
    pub reciprocity: f64,
    /// Mean out-degree (distinct destinations per source account).
    pub mean_out_degree: f64,
    /// Maximum out-degree (the dominant hub).
    pub max_out_degree: usize,
    /// Names of metrics below their reference band (empty when no thresholds supplied / all pass).
    pub too_clean: Vec<String>,
}

/// Computes the [`RelationalFidelityReport`].
#[derive(Debug, Clone, Default)]
pub struct RelationalFidelityAnalyzer {
    thresholds: RelationalFidelityThresholds,
}

impl RelationalFidelityAnalyzer {
    /// New analyzer with no reference bands (descriptive only).
    pub fn new() -> Self {
        Self::default()
    }

    /// New analyzer with reference bands (e.g. corpus aggregates) to flag "too clean" metrics.
    pub fn with_thresholds(thresholds: RelationalFidelityThresholds) -> Self {
        Self { thresholds }
    }

    /// Summarize the relational structure of an account-flow graph.
    pub fn analyze(&self, edges: &[FlowEdge]) -> EvalResult<RelationalFidelityReport> {
        let n_edges = edges.len();
        let mut pair_weights: BTreeMap<(&str, &str), f64> = BTreeMap::new();
        let mut out_dsts: BTreeMap<&str, std::collections::BTreeSet<&str>> = BTreeMap::new();
        let mut accounts: std::collections::BTreeSet<&str> = std::collections::BTreeSet::new();
        for e in edges {
            *pair_weights.entry((&e.src, &e.dst)).or_default() += e.weight;
            out_dsts.entry(&e.src).or_default().insert(&e.dst);
            accounts.insert(&e.src);
            accounts.insert(&e.dst);
        }

        let distinct_pairs = pair_weights.len();
        let pair_diversity = distinct_pairs as f64 / n_edges.max(1) as f64;

        let mut weights: Vec<f64> = pair_weights.values().copied().collect();
        weights.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
        let p50 = percentile(&weights, 50.0);
        let p99 = percentile(&weights, 99.0);
        let edge_weight_tail_ratio = if p50 > EPS { p99 / p50 } else { 0.0 };

        let reciprocal = pair_weights
            .keys()
            .filter(|(a, b)| pair_weights.contains_key(&(*b, *a)))
            .count();
        let reciprocity = reciprocal as f64 / distinct_pairs.max(1) as f64;

        let degrees: Vec<usize> = out_dsts.values().map(|s| s.len()).collect();
        let max_out_degree = degrees.iter().copied().max().unwrap_or(0);
        let mean_out_degree = if degrees.is_empty() {
            0.0
        } else {
            degrees.iter().sum::<usize>() as f64 / degrees.len() as f64
        };

        let mut too_clean = Vec::new();
        let t = &self.thresholds;
        if t.min_pair_diversity.is_some_and(|m| pair_diversity < m) {
            too_clean.push("pair_diversity".to_string());
        }
        if t.min_edge_weight_tail_ratio
            .is_some_and(|m| edge_weight_tail_ratio < m)
        {
            too_clean.push("edge_weight_tail_ratio".to_string());
        }
        if t.min_reciprocity.is_some_and(|m| reciprocity < m) {
            too_clean.push("reciprocity".to_string());
        }

        Ok(RelationalFidelityReport {
            n_accounts: accounts.len(),
            n_edges,
            distinct_pairs,
            pair_diversity,
            edge_weight_tail_ratio,
            reciprocity,
            mean_out_degree,
            max_out_degree,
            too_clean,
        })
    }
}

/// Build dominant account-flow edges from journal entries: one directed `credit → debit` edge per
/// entry (first credit and first debit line), weighted by the entry's total debit. Entries without
/// both a debit and a credit line are skipped.
pub fn flow_edges_from_entries(entries: &[datasynth_core::models::JournalEntry]) -> Vec<FlowEdge> {
    use rust_decimal::prelude::ToPrimitive;
    let mut out = Vec::new();
    for e in entries {
        let src = e
            .lines
            .iter()
            .find(|l| !l.is_debit())
            .map(|l| &l.gl_account);
        let dst = e.lines.iter().find(|l| l.is_debit()).map(|l| &l.gl_account);
        if let (Some(src), Some(dst)) = (src, dst) {
            out.push(FlowEdge {
                src: src.clone(),
                dst: dst.clone(),
                weight: e.total_debit().to_f64().unwrap_or(0.0),
            });
        }
    }
    out
}

fn percentile(sorted: &[f64], p: f64) -> f64 {
    if sorted.is_empty() {
        return 0.0;
    }
    let idx = ((p / 100.0) * (sorted.len() as f64 - 1.0)).round() as usize;
    sorted[idx.min(sorted.len() - 1)]
}

#[cfg(test)]
mod tests {
    use super::*;

    fn e(src: &str, dst: &str, w: f64) -> FlowEdge {
        FlowEdge {
            src: src.to_string(),
            dst: dst.to_string(),
            weight: w,
        }
    }

    #[test]
    fn measures_diversity_reciprocity_and_tail() {
        // A→B and the reverse B→A (a 2-cycle), plus a repeated A→C (low diversity contribution),
        // and one heavy-weight pair driving the tail.
        let edges = vec![
            e("A", "B", 100.0),
            e("B", "A", 120.0),
            e("A", "C", 50.0),
            e("A", "C", 60.0),
            e("D", "E", 9000.0),
        ];
        let r = RelationalFidelityAnalyzer::new().analyze(&edges).unwrap();
        assert_eq!(r.n_edges, 5);
        assert_eq!(r.distinct_pairs, 4); // (A,B),(B,A),(A,C),(D,E)
        assert_eq!(r.n_accounts, 5);
        // (A,B) & (B,A) are reciprocal → 2 of 4 distinct pairs.
        assert!((r.reciprocity - 0.5).abs() < 1e-9);
        // D→E aggregate (9000) ≫ median → tail ratio well above 1.
        assert!(r.edge_weight_tail_ratio > 1.0);
        // A has out-degree 2 (B, C); it's the hub.
        assert_eq!(r.max_out_degree, 2);
    }

    #[test]
    fn flags_too_clean_against_reference_band() {
        // A single repeated pair → diversity 0.25, no reciprocity, flat weights.
        let edges = vec![e("A", "B", 100.0); 4];
        let thresholds = RelationalFidelityThresholds {
            min_pair_diversity: Some(0.5),
            min_reciprocity: Some(0.1),
            min_edge_weight_tail_ratio: None,
        };
        let r = RelationalFidelityAnalyzer::with_thresholds(thresholds)
            .analyze(&edges)
            .unwrap();
        assert_eq!(r.distinct_pairs, 1);
        assert!(r.too_clean.contains(&"pair_diversity".to_string()));
        assert!(r.too_clean.contains(&"reciprocity".to_string()));
    }

    #[test]
    fn empty_is_safe() {
        let r = RelationalFidelityAnalyzer::new().analyze(&[]).unwrap();
        assert_eq!(r.n_edges, 0);
        assert_eq!(r.pair_diversity, 0.0);
        assert_eq!(r.edge_weight_tail_ratio, 0.0);
        assert!(r.too_clean.is_empty());
    }
}