Skip to main content

datasynth_eval/coherence/
trend_analysis.rs

1//! Trend plausibility evaluator.
2//!
3//! Validates that multi-period journal entry data exhibits internally consistent
4//! financial trends. Checks revenue stability, expense ratio stability,
5//! balance sheet growth consistency, and directional consistency between
6//! revenue and accounts receivable.
7//!
8//! Accounts are classified by GL account prefix (first character):
9//! - 1xxx → Assets
10//! - 2xxx → Liabilities
11//! - 4xxx → Revenue
12//! - 5xxx–8xxx → Expenses
13
14use datasynth_core::models::JournalEntry;
15use rust_decimal::Decimal;
16use serde::{Deserialize, Serialize};
17use std::collections::BTreeMap;
18
19// ─── Result types ─────────────────────────────────────────────────────────────
20
21/// Result of a single trend consistency check.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct TrendConsistencyCheck {
24    /// Name of the check (e.g. "RevenueStability").
25    pub check_type: String,
26    /// Number of consecutive period pairs analyzed.
27    pub periods_analyzed: usize,
28    /// True when the check passes.
29    pub is_consistent: bool,
30    /// Human-readable explanation of the result.
31    pub details: String,
32}
33
34/// Aggregate result of the trend plausibility evaluator.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct TrendPlausibilityResult {
37    /// Number of unique fiscal periods found in the data.
38    pub period_count: usize,
39    /// Individual check results.
40    pub consistency_checks: Vec<TrendConsistencyCheck>,
41    /// Fraction of checks that pass (0.0–1.0).
42    pub overall_plausibility_score: f64,
43    /// True when overall_plausibility_score ≥ 0.75.
44    pub passes: bool,
45}
46
47// ─── Per-period aggregates ────────────────────────────────────────────────────
48
49/// Financial totals for a single fiscal period.
50#[derive(Debug, Default, Clone)]
51struct PeriodTotals {
52    revenue: Decimal,
53    expenses: Decimal,
54    assets: Decimal,
55    liabilities: Decimal,
56    /// Net credit to AR accounts (1100–1199 range).
57    ar_net: Decimal,
58}
59
60/// Fiscal period key: (fiscal_year, fiscal_period).
61type PeriodKey = (u16, u8);
62
63// ─── Account classification helpers ──────────────────────────────────────────
64
65fn is_revenue(account: &str) -> bool {
66    account.starts_with('4')
67}
68
69fn is_expense(account: &str) -> bool {
70    matches!(
71        account.chars().next(),
72        Some('5') | Some('6') | Some('7') | Some('8')
73    )
74}
75
76fn is_asset(account: &str) -> bool {
77    account.starts_with('1')
78}
79
80fn is_liability(account: &str) -> bool {
81    account.starts_with('2')
82}
83
84/// Accounts Receivable: GL codes 1100–1199.
85fn is_ar(account: &str) -> bool {
86    account.starts_with("11")
87}
88
89// ─── Aggregation ──────────────────────────────────────────────────────────────
90
91fn aggregate_by_period(entries: &[JournalEntry]) -> BTreeMap<PeriodKey, PeriodTotals> {
92    let mut map: BTreeMap<PeriodKey, PeriodTotals> = BTreeMap::new();
93
94    for entry in entries {
95        let key = (entry.header.fiscal_year, entry.header.fiscal_period);
96        let totals = map.entry(key).or_default();
97
98        for line in &entry.lines {
99            let account = &line.gl_account;
100            let net = line.debit_amount - line.credit_amount;
101
102            if is_revenue(account) {
103                // Revenue is credited; net negative means credit balance → more revenue
104                totals.revenue += line.credit_amount - line.debit_amount;
105            }
106            if is_expense(account) {
107                totals.expenses += line.debit_amount - line.credit_amount;
108            }
109            if is_asset(account) {
110                totals.assets += net;
111            }
112            if is_liability(account) {
113                totals.liabilities += net;
114            }
115            if is_ar(account) {
116                totals.ar_net += net;
117            }
118        }
119    }
120
121    map
122}
123
124// ─── Numeric helpers ──────────────────────────────────────────────────────────
125
126fn to_f64(d: Decimal) -> f64 {
127    d.to_string().parse::<f64>().unwrap_or(0.0)
128}
129
130/// Coefficient of variation: std_dev / mean (returns 0.0 when mean ≈ 0).
131fn coefficient_of_variation(values: &[f64]) -> f64 {
132    if values.len() < 2 {
133        return 0.0;
134    }
135    let mean = values.iter().sum::<f64>() / values.len() as f64;
136    if mean.abs() < 1e-9 {
137        return 0.0;
138    }
139    let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / values.len() as f64;
140    variance.sqrt() / mean.abs()
141}
142
143// ─── Individual checks ────────────────────────────────────────────────────────
144
145/// Check 1: Revenue doesn't swing > 50% period-over-period.
146fn check_revenue_stability(periods: &[&PeriodTotals]) -> TrendConsistencyCheck {
147    let check_type = "RevenueStability".to_string();
148
149    if periods.len() < 2 {
150        return TrendConsistencyCheck {
151            check_type,
152            periods_analyzed: periods.len().saturating_sub(1),
153            is_consistent: true,
154            details: "Insufficient periods for comparison".to_string(),
155        };
156    }
157
158    let mut violations = 0usize;
159    let mut comparisons = 0usize;
160
161    for window in periods.windows(2) {
162        let prev = to_f64(window[0].revenue);
163        let curr = to_f64(window[1].revenue);
164
165        if prev.abs() < 1.0 {
166            // Skip if previous period revenue is essentially zero
167            continue;
168        }
169
170        comparisons += 1;
171        let change = ((curr - prev) / prev.abs()).abs();
172        if change > 0.50 {
173            violations += 1;
174        }
175    }
176
177    let is_consistent = violations == 0 || comparisons == 0;
178    let details = if comparisons == 0 {
179        "All revenue values near zero; check vacuously passes".to_string()
180    } else {
181        format!("{violations} of {comparisons} period-over-period revenue swings exceeded 50%")
182    };
183
184    TrendConsistencyCheck {
185        check_type,
186        periods_analyzed: comparisons,
187        is_consistent,
188        details,
189    }
190}
191
192/// Check 2: Expense/revenue ratio CV < 0.30.
193fn check_expense_ratio_stability(periods: &[&PeriodTotals]) -> TrendConsistencyCheck {
194    let check_type = "ExpenseRatioStability".to_string();
195
196    if periods.len() < 2 {
197        return TrendConsistencyCheck {
198            check_type,
199            periods_analyzed: 0,
200            is_consistent: true,
201            details: "Insufficient periods for comparison".to_string(),
202        };
203    }
204
205    let ratios: Vec<f64> = periods
206        .iter()
207        .filter_map(|p| {
208            let rev = to_f64(p.revenue);
209            let exp = to_f64(p.expenses);
210            if rev.abs() < 1.0 {
211                None
212            } else {
213                Some(exp / rev)
214            }
215        })
216        .collect();
217
218    if ratios.len() < 2 {
219        return TrendConsistencyCheck {
220            check_type,
221            periods_analyzed: 0,
222            is_consistent: true,
223            details: "Insufficient non-zero revenue periods for ratio analysis".to_string(),
224        };
225    }
226
227    let cv = coefficient_of_variation(&ratios);
228    let is_consistent = cv < 0.30;
229
230    TrendConsistencyCheck {
231        check_type,
232        periods_analyzed: ratios.len(),
233        is_consistent,
234        details: format!("Expense/revenue ratio CV = {cv:.3} (threshold: < 0.30)"),
235    }
236}
237
238/// Check 3: Asset growth ≈ liability growth (within 25%).
239fn check_balance_sheet_growth_consistency(periods: &[&PeriodTotals]) -> TrendConsistencyCheck {
240    let check_type = "BalanceSheetGrowthConsistency".to_string();
241
242    if periods.len() < 2 {
243        return TrendConsistencyCheck {
244            check_type,
245            periods_analyzed: 0,
246            is_consistent: true,
247            details: "Insufficient periods for comparison".to_string(),
248        };
249    }
250
251    let mut violations = 0usize;
252    let mut comparisons = 0usize;
253
254    for window in periods.windows(2) {
255        let asset_prev = to_f64(window[0].assets);
256        let asset_curr = to_f64(window[1].assets);
257        let liab_prev = to_f64(window[0].liabilities);
258        let liab_curr = to_f64(window[1].liabilities);
259
260        if asset_prev.abs() < 1.0 && liab_prev.abs() < 1.0 {
261            continue;
262        }
263
264        comparisons += 1;
265        let asset_growth = if asset_prev.abs() > 1.0 {
266            (asset_curr - asset_prev) / asset_prev.abs()
267        } else {
268            0.0
269        };
270        let liab_growth = if liab_prev.abs() > 1.0 {
271            (liab_curr - liab_prev) / liab_prev.abs()
272        } else {
273            0.0
274        };
275
276        if (asset_growth - liab_growth).abs() > 0.25 {
277            violations += 1;
278        }
279    }
280
281    let is_consistent = violations == 0 || comparisons == 0;
282    TrendConsistencyCheck {
283        check_type,
284        periods_analyzed: comparisons,
285        is_consistent,
286        details: format!(
287            "{violations} of {comparisons} periods showed asset/liability growth divergence > 25%"
288        ),
289    }
290}
291
292/// Check 4: If revenue grows, AR should grow in the same direction.
293fn check_directional_consistency(periods: &[&PeriodTotals]) -> TrendConsistencyCheck {
294    let check_type = "DirectionalConsistency".to_string();
295
296    if periods.len() < 2 {
297        return TrendConsistencyCheck {
298            check_type,
299            periods_analyzed: 0,
300            is_consistent: true,
301            details: "Insufficient periods for comparison".to_string(),
302        };
303    }
304
305    let mut violations = 0usize;
306    let mut comparisons = 0usize;
307
308    for window in periods.windows(2) {
309        let rev_delta = to_f64(window[1].revenue) - to_f64(window[0].revenue);
310        let ar_delta = to_f64(window[1].ar_net) - to_f64(window[0].ar_net);
311
312        // Only test if both revenue and AR are non-trivially present
313        let rev_magnitude = to_f64(window[0].revenue)
314            .abs()
315            .max(to_f64(window[1].revenue).abs());
316        if rev_magnitude < 1.0 {
317            continue;
318        }
319
320        comparisons += 1;
321        // Directional mismatch: revenue grows but AR shrinks, or vice versa (significant change)
322        let significant_rev_change = rev_delta.abs() > rev_magnitude * 0.10;
323        if significant_rev_change && rev_delta * ar_delta < 0.0 {
324            violations += 1;
325        }
326    }
327
328    let is_consistent = violations == 0 || comparisons == 0;
329    TrendConsistencyCheck {
330        check_type,
331        periods_analyzed: comparisons,
332        is_consistent,
333        details: format!(
334            "{violations} of {comparisons} periods showed revenue/AR directional mismatch"
335        ),
336    }
337}
338
339// ─── Public API ───────────────────────────────────────────────────────────────
340
341/// Analyze trend plausibility across all journal entries.
342///
343/// Groups entries by fiscal period, computes per-period account totals, and
344/// evaluates four trend consistency checks.
345///
346/// # Returns
347/// A `TrendPlausibilityResult` with individual check results and an aggregate
348/// plausibility score. Passes when ≥ 75% of checks are consistent.
349pub fn analyze_trends(entries: &[JournalEntry]) -> TrendPlausibilityResult {
350    let period_map = aggregate_by_period(entries);
351    let period_count = period_map.len();
352
353    // Collect period totals in chronological order (BTreeMap is sorted by key).
354    let ordered: Vec<&PeriodTotals> = period_map.values().collect();
355
356    let checks = vec![
357        check_revenue_stability(&ordered),
358        check_expense_ratio_stability(&ordered),
359        check_balance_sheet_growth_consistency(&ordered),
360        check_directional_consistency(&ordered),
361    ];
362
363    let passing = checks.iter().filter(|c| c.is_consistent).count();
364    let total = checks.len();
365    let overall_plausibility_score = if total > 0 {
366        passing as f64 / total as f64
367    } else {
368        1.0
369    };
370
371    let passes = overall_plausibility_score >= 0.75;
372
373    TrendPlausibilityResult {
374        period_count,
375        consistency_checks: checks,
376        overall_plausibility_score,
377        passes,
378    }
379}
380
381// ─── Unit tests ───────────────────────────────────────────────────────────────
382
383#[cfg(test)]
384#[allow(clippy::unwrap_used)]
385mod tests {
386    use super::*;
387    use datasynth_core::models::{JournalEntry, JournalEntryHeader, JournalEntryLine};
388    use rust_decimal_macros::dec;
389
390    fn date(y: i32, m: u32, d: u32) -> chrono::NaiveDate {
391        chrono::NaiveDate::from_ymd_opt(y, m, d).unwrap()
392    }
393
394    /// Create a JE with one debit and one credit line in a given fiscal period.
395    fn make_je(period: u8, debit_acct: &str, credit_acct: &str, amount: Decimal) -> JournalEntry {
396        // Use month = period for simplicity (valid for periods 1–12)
397        let m = period.clamp(1, 12);
398        let posting_date = date(2024, m as u32, 1);
399        let mut header = JournalEntryHeader::new("C001".to_string(), posting_date);
400        header.fiscal_period = period;
401        let doc_id = header.document_id;
402        let mut entry = JournalEntry::new(header);
403        entry.add_line(JournalEntryLine::debit(
404            doc_id,
405            1,
406            debit_acct.to_string(),
407            amount,
408        ));
409        entry.add_line(JournalEntryLine::credit(
410            doc_id,
411            2,
412            credit_acct.to_string(),
413            amount,
414        ));
415        entry
416    }
417
418    /// Helper: build stable revenue across N periods (identical amount).
419    fn stable_revenue_entries(periods: u8, amount: Decimal) -> Vec<JournalEntry> {
420        (1..=periods)
421            .map(|p| make_je(p, "1100", "4000", amount)) // debit AR, credit Revenue
422            .collect()
423    }
424
425    #[test]
426    fn test_empty_entries() {
427        let result = analyze_trends(&[]);
428        assert_eq!(result.period_count, 0);
429        // All checks vacuously pass (insufficient periods)
430        assert!(result.passes);
431    }
432
433    #[test]
434    fn test_single_period() {
435        let entries = stable_revenue_entries(1, dec!(100_000));
436        let result = analyze_trends(&entries);
437        assert_eq!(result.period_count, 1);
438        // All checks vacuously pass
439        assert!(result.passes);
440    }
441
442    #[test]
443    fn test_stable_revenue_passes() {
444        // Identical revenue each period → zero variance → passes
445        let entries = stable_revenue_entries(6, dec!(100_000));
446        let result = analyze_trends(&entries);
447        assert_eq!(result.period_count, 6);
448        let rev_check = result
449            .consistency_checks
450            .iter()
451            .find(|c| c.check_type == "RevenueStability")
452            .unwrap();
453        assert!(rev_check.is_consistent, "{}", rev_check.details);
454    }
455
456    #[test]
457    fn test_volatile_revenue_fails() {
458        // Double revenue each period → > 50% swing
459        let mut entries = Vec::new();
460        let mut amount = dec!(10_000);
461        for period in 1u8..=4 {
462            entries.push(make_je(period, "1100", "4000", amount));
463            amount *= dec!(3); // 200% increase → far above 50% threshold
464        }
465        let result = analyze_trends(&entries);
466        let rev_check = result
467            .consistency_checks
468            .iter()
469            .find(|c| c.check_type == "RevenueStability")
470            .unwrap();
471        assert!(!rev_check.is_consistent, "3× revenue growth should fail");
472    }
473
474    #[test]
475    fn test_plausibility_score_range() {
476        let entries = stable_revenue_entries(4, dec!(50_000));
477        let result = analyze_trends(&entries);
478        assert!(
479            result.overall_plausibility_score >= 0.0 && result.overall_plausibility_score <= 1.0
480        );
481    }
482
483    #[test]
484    fn test_passes_threshold() {
485        // Stable data should have score ≥ 0.75
486        let entries = stable_revenue_entries(6, dec!(100_000));
487        let result = analyze_trends(&entries);
488        assert!(
489            result.passes,
490            "Stable data should pass. Score: {}",
491            result.overall_plausibility_score
492        );
493    }
494
495    #[test]
496    fn test_period_count_correct() {
497        let entries = stable_revenue_entries(3, dec!(50_000));
498        let result = analyze_trends(&entries);
499        assert_eq!(result.period_count, 3);
500    }
501
502    #[test]
503    fn test_check_count() {
504        let entries = stable_revenue_entries(4, dec!(100_000));
505        let result = analyze_trends(&entries);
506        assert_eq!(result.consistency_checks.len(), 4);
507        let names: Vec<&str> = result
508            .consistency_checks
509            .iter()
510            .map(|c| c.check_type.as_str())
511            .collect();
512        assert!(names.contains(&"RevenueStability"));
513        assert!(names.contains(&"ExpenseRatioStability"));
514        assert!(names.contains(&"BalanceSheetGrowthConsistency"));
515        assert!(names.contains(&"DirectionalConsistency"));
516    }
517
518    #[test]
519    fn test_cv_calculation() {
520        // Test coefficient_of_variation directly
521        let values = vec![1.0, 1.0, 1.0, 1.0];
522        assert!((coefficient_of_variation(&values) - 0.0).abs() < 1e-9);
523
524        let values2 = vec![1.0, 2.0, 3.0, 4.0];
525        let cv = coefficient_of_variation(&values2);
526        assert!(cv > 0.0);
527    }
528}