Skip to main content

datasynth_eval/coherence/
trend_analysis.rs

1//! Trend plausibility evaluator.
2//!
3//! Validates that multi-period journal entry data exhibits internally consistent
4//! financial trends. Checks revenue stability, expense ratio stability,
5//! balance sheet growth consistency, and directional consistency between
6//! revenue and accounts receivable.
7//!
8//! Accounts are classified by GL account prefix (first character):
9//! - 1xxx → Assets
10//! - 2xxx → Liabilities
11//! - 4xxx → Revenue
12//! - 5xxx–8xxx → Expenses
13
14use datasynth_core::models::JournalEntry;
15use rust_decimal::Decimal;
16use serde::{Deserialize, Serialize};
17use std::collections::BTreeMap;
18
19// ─── Result types ─────────────────────────────────────────────────────────────
20
21/// Result of a single trend consistency check.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct TrendConsistencyCheck {
24    /// Name of the check (e.g. "RevenueStability").
25    pub check_type: String,
26    /// Number of consecutive period pairs analyzed.
27    pub periods_analyzed: usize,
28    /// True when the check passes.
29    pub is_consistent: bool,
30    /// Human-readable explanation of the result.
31    pub details: String,
32}
33
34/// Aggregate result of the trend plausibility evaluator.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct TrendPlausibilityResult {
37    /// Number of unique fiscal periods found in the data.
38    pub period_count: usize,
39    /// Individual check results.
40    pub consistency_checks: Vec<TrendConsistencyCheck>,
41    /// Fraction of checks that pass (0.0–1.0).
42    pub overall_plausibility_score: f64,
43    /// True when overall_plausibility_score ≥ 0.75.
44    pub passes: bool,
45}
46
47// ─── Per-period aggregates ────────────────────────────────────────────────────
48
49/// Financial totals for a single fiscal period.
50#[derive(Debug, Default, Clone)]
51struct PeriodTotals {
52    revenue: Decimal,
53    expenses: Decimal,
54    assets: Decimal,
55    liabilities: Decimal,
56    /// Net credit to AR accounts (1100–1199 range).
57    ar_net: Decimal,
58}
59
60/// Fiscal period key: (fiscal_year, fiscal_period).
61type PeriodKey = (u16, u8);
62
63// ─── Account classification helpers ──────────────────────────────────────────
64
65fn is_revenue(account: &str) -> bool {
66    account.starts_with('4')
67}
68
69fn is_expense(account: &str) -> bool {
70    matches!(
71        account.chars().next(),
72        Some('5') | Some('6') | Some('7') | Some('8')
73    )
74}
75
76fn is_asset(account: &str) -> bool {
77    account.starts_with('1')
78}
79
80fn is_liability(account: &str) -> bool {
81    account.starts_with('2')
82}
83
84/// Accounts Receivable: GL codes 1100–1199.
85fn is_ar(account: &str) -> bool {
86    account.starts_with("11")
87}
88
89// ─── Aggregation ──────────────────────────────────────────────────────────────
90
91fn aggregate_by_period(entries: &[JournalEntry]) -> BTreeMap<PeriodKey, PeriodTotals> {
92    let mut map: BTreeMap<PeriodKey, PeriodTotals> = BTreeMap::new();
93
94    for entry in entries {
95        let key = (entry.header.fiscal_year, entry.header.fiscal_period);
96        let totals = map.entry(key).or_default();
97
98        for line in &entry.lines {
99            let account = &line.gl_account;
100            let net = line.debit_amount - line.credit_amount;
101
102            if is_revenue(account) {
103                // Revenue is credited; net negative means credit balance → more revenue
104                totals.revenue += line.credit_amount - line.debit_amount;
105            }
106            if is_expense(account) {
107                totals.expenses += line.debit_amount - line.credit_amount;
108            }
109            if is_asset(account) {
110                totals.assets += net;
111            }
112            if is_liability(account) {
113                totals.liabilities += net;
114            }
115            if is_ar(account) {
116                totals.ar_net += net;
117            }
118        }
119    }
120
121    map
122}
123
124// ─── Numeric helpers ──────────────────────────────────────────────────────────
125
126fn to_f64(d: Decimal) -> f64 {
127    d.to_string().parse::<f64>().unwrap_or(0.0)
128}
129
130/// Coefficient of variation: std_dev / mean (returns 0.0 when mean ≈ 0).
131fn coefficient_of_variation(values: &[f64]) -> f64 {
132    if values.len() < 2 {
133        return 0.0;
134    }
135    let mean = values.iter().sum::<f64>() / values.len() as f64;
136    if mean.abs() < 1e-9 {
137        return 0.0;
138    }
139    let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / values.len() as f64;
140    variance.sqrt() / mean.abs()
141}
142
143// ─── Individual checks ────────────────────────────────────────────────────────
144
145/// Check 1: Revenue doesn't swing > 50% period-over-period.
146fn check_revenue_stability(periods: &[&PeriodTotals]) -> TrendConsistencyCheck {
147    let check_type = "RevenueStability".to_string();
148
149    if periods.len() < 2 {
150        return TrendConsistencyCheck {
151            check_type,
152            periods_analyzed: periods.len().saturating_sub(1),
153            is_consistent: true,
154            details: "Insufficient periods for comparison".to_string(),
155        };
156    }
157
158    let mut violations = 0usize;
159    let mut comparisons = 0usize;
160
161    for window in periods.windows(2) {
162        let prev = to_f64(window[0].revenue);
163        let curr = to_f64(window[1].revenue);
164
165        if prev.abs() < 1.0 {
166            // Skip if previous period revenue is essentially zero
167            continue;
168        }
169
170        comparisons += 1;
171        let change = ((curr - prev) / prev.abs()).abs();
172        if change > 0.50 {
173            violations += 1;
174        }
175    }
176
177    let is_consistent = violations == 0 || comparisons == 0;
178    let details = if comparisons == 0 {
179        "All revenue values near zero; check vacuously passes".to_string()
180    } else {
181        format!("{violations} of {comparisons} period-over-period revenue swings exceeded 50%")
182    };
183
184    TrendConsistencyCheck {
185        check_type,
186        periods_analyzed: comparisons,
187        is_consistent,
188        details,
189    }
190}
191
192/// Check 2: Expense/revenue ratio CV < 0.30.
193fn check_expense_ratio_stability(periods: &[&PeriodTotals]) -> TrendConsistencyCheck {
194    let check_type = "ExpenseRatioStability".to_string();
195
196    if periods.len() < 2 {
197        return TrendConsistencyCheck {
198            check_type,
199            periods_analyzed: 0,
200            is_consistent: true,
201            details: "Insufficient periods for comparison".to_string(),
202        };
203    }
204
205    let ratios: Vec<f64> = periods
206        .iter()
207        .filter_map(|p| {
208            let rev = to_f64(p.revenue);
209            let exp = to_f64(p.expenses);
210            if rev.abs() < 1.0 {
211                None
212            } else {
213                Some(exp / rev)
214            }
215        })
216        .collect();
217
218    if ratios.len() < 2 {
219        return TrendConsistencyCheck {
220            check_type,
221            periods_analyzed: 0,
222            is_consistent: true,
223            details: "Insufficient non-zero revenue periods for ratio analysis".to_string(),
224        };
225    }
226
227    let cv = coefficient_of_variation(&ratios);
228    let is_consistent = cv < 0.30;
229
230    TrendConsistencyCheck {
231        check_type,
232        periods_analyzed: ratios.len(),
233        is_consistent,
234        details: format!("Expense/revenue ratio CV = {cv:.3} (threshold: < 0.30)"),
235    }
236}
237
238/// Check 3: Asset growth ≈ liability growth (within 25%).
239fn check_balance_sheet_growth_consistency(periods: &[&PeriodTotals]) -> TrendConsistencyCheck {
240    let check_type = "BalanceSheetGrowthConsistency".to_string();
241
242    if periods.len() < 2 {
243        return TrendConsistencyCheck {
244            check_type,
245            periods_analyzed: 0,
246            is_consistent: true,
247            details: "Insufficient periods for comparison".to_string(),
248        };
249    }
250
251    let mut violations = 0usize;
252    let mut comparisons = 0usize;
253
254    for window in periods.windows(2) {
255        let asset_prev = to_f64(window[0].assets);
256        let asset_curr = to_f64(window[1].assets);
257        let liab_prev = to_f64(window[0].liabilities);
258        let liab_curr = to_f64(window[1].liabilities);
259
260        if asset_prev.abs() < 1.0 && liab_prev.abs() < 1.0 {
261            continue;
262        }
263
264        comparisons += 1;
265        let asset_growth = if asset_prev.abs() > 1.0 {
266            (asset_curr - asset_prev) / asset_prev.abs()
267        } else {
268            0.0
269        };
270        let liab_growth = if liab_prev.abs() > 1.0 {
271            (liab_curr - liab_prev) / liab_prev.abs()
272        } else {
273            0.0
274        };
275
276        if (asset_growth - liab_growth).abs() > 0.25 {
277            violations += 1;
278        }
279    }
280
281    let is_consistent = violations == 0 || comparisons == 0;
282    TrendConsistencyCheck {
283        check_type,
284        periods_analyzed: comparisons,
285        is_consistent,
286        details: format!(
287            "{violations} of {comparisons} periods showed asset/liability growth divergence > 25%"
288        ),
289    }
290}
291
292/// Check 4: If revenue grows, AR should grow in the same direction.
293fn check_directional_consistency(periods: &[&PeriodTotals]) -> TrendConsistencyCheck {
294    let check_type = "DirectionalConsistency".to_string();
295
296    if periods.len() < 2 {
297        return TrendConsistencyCheck {
298            check_type,
299            periods_analyzed: 0,
300            is_consistent: true,
301            details: "Insufficient periods for comparison".to_string(),
302        };
303    }
304
305    let mut violations = 0usize;
306    let mut comparisons = 0usize;
307
308    for window in periods.windows(2) {
309        let rev_delta = to_f64(window[1].revenue) - to_f64(window[0].revenue);
310        let ar_delta = to_f64(window[1].ar_net) - to_f64(window[0].ar_net);
311
312        // Only test if both revenue and AR are non-trivially present
313        let rev_magnitude = to_f64(window[0].revenue)
314            .abs()
315            .max(to_f64(window[1].revenue).abs());
316        if rev_magnitude < 1.0 {
317            continue;
318        }
319
320        comparisons += 1;
321        // Directional mismatch: revenue grows but AR shrinks, or vice versa (significant change)
322        let significant_rev_change = rev_delta.abs() > rev_magnitude * 0.10;
323        if significant_rev_change && rev_delta * ar_delta < 0.0 {
324            violations += 1;
325        }
326    }
327
328    let is_consistent = violations == 0 || comparisons == 0;
329    TrendConsistencyCheck {
330        check_type,
331        periods_analyzed: comparisons,
332        is_consistent,
333        details: format!(
334            "{violations} of {comparisons} periods showed revenue/AR directional mismatch"
335        ),
336    }
337}
338
339// ─── Public API ───────────────────────────────────────────────────────────────
340
341/// Analyze trend plausibility across all journal entries.
342///
343/// Groups entries by fiscal period, computes per-period account totals, and
344/// evaluates four trend consistency checks.
345///
346/// # Returns
347/// A `TrendPlausibilityResult` with individual check results and an aggregate
348/// plausibility score. Passes when ≥ 75% of checks are consistent.
349pub fn analyze_trends(entries: &[JournalEntry]) -> TrendPlausibilityResult {
350    let period_map = aggregate_by_period(entries);
351    let period_count = period_map.len();
352
353    // Collect period totals in chronological order (BTreeMap is sorted by key).
354    let ordered: Vec<&PeriodTotals> = period_map.values().collect();
355
356    let checks = vec![
357        check_revenue_stability(&ordered),
358        check_expense_ratio_stability(&ordered),
359        check_balance_sheet_growth_consistency(&ordered),
360        check_directional_consistency(&ordered),
361    ];
362
363    let passing = checks.iter().filter(|c| c.is_consistent).count();
364    let total = checks.len();
365    let overall_plausibility_score = if total > 0 {
366        passing as f64 / total as f64
367    } else {
368        1.0
369    };
370
371    let passes = overall_plausibility_score >= 0.75;
372
373    TrendPlausibilityResult {
374        period_count,
375        consistency_checks: checks,
376        overall_plausibility_score,
377        passes,
378    }
379}
380
381// ─── Unit tests ───────────────────────────────────────────────────────────────
382
383#[cfg(test)]
384mod tests {
385    use super::*;
386    use datasynth_core::models::{JournalEntry, JournalEntryHeader, JournalEntryLine};
387    use rust_decimal_macros::dec;
388
389    fn date(y: i32, m: u32, d: u32) -> chrono::NaiveDate {
390        chrono::NaiveDate::from_ymd_opt(y, m, d).unwrap()
391    }
392
393    /// Create a JE with one debit and one credit line in a given fiscal period.
394    fn make_je(period: u8, debit_acct: &str, credit_acct: &str, amount: Decimal) -> JournalEntry {
395        // Use month = period for simplicity (valid for periods 1–12)
396        let m = period.clamp(1, 12);
397        let posting_date = date(2024, m as u32, 1);
398        let mut header = JournalEntryHeader::new("C001".to_string(), posting_date);
399        header.fiscal_period = period;
400        let doc_id = header.document_id;
401        let mut entry = JournalEntry::new(header);
402        entry.add_line(JournalEntryLine::debit(
403            doc_id,
404            1,
405            debit_acct.to_string(),
406            amount,
407        ));
408        entry.add_line(JournalEntryLine::credit(
409            doc_id,
410            2,
411            credit_acct.to_string(),
412            amount,
413        ));
414        entry
415    }
416
417    /// Helper: build stable revenue across N periods (identical amount).
418    fn stable_revenue_entries(periods: u8, amount: Decimal) -> Vec<JournalEntry> {
419        (1..=periods)
420            .map(|p| make_je(p, "1100", "4000", amount)) // debit AR, credit Revenue
421            .collect()
422    }
423
424    #[test]
425    fn test_empty_entries() {
426        let result = analyze_trends(&[]);
427        assert_eq!(result.period_count, 0);
428        // All checks vacuously pass (insufficient periods)
429        assert!(result.passes);
430    }
431
432    #[test]
433    fn test_single_period() {
434        let entries = stable_revenue_entries(1, dec!(100_000));
435        let result = analyze_trends(&entries);
436        assert_eq!(result.period_count, 1);
437        // All checks vacuously pass
438        assert!(result.passes);
439    }
440
441    #[test]
442    fn test_stable_revenue_passes() {
443        // Identical revenue each period → zero variance → passes
444        let entries = stable_revenue_entries(6, dec!(100_000));
445        let result = analyze_trends(&entries);
446        assert_eq!(result.period_count, 6);
447        let rev_check = result
448            .consistency_checks
449            .iter()
450            .find(|c| c.check_type == "RevenueStability")
451            .unwrap();
452        assert!(rev_check.is_consistent, "{}", rev_check.details);
453    }
454
455    #[test]
456    fn test_volatile_revenue_fails() {
457        // Double revenue each period → > 50% swing
458        let mut entries = Vec::new();
459        let mut amount = dec!(10_000);
460        for period in 1u8..=4 {
461            entries.push(make_je(period, "1100", "4000", amount));
462            amount *= dec!(3); // 200% increase → far above 50% threshold
463        }
464        let result = analyze_trends(&entries);
465        let rev_check = result
466            .consistency_checks
467            .iter()
468            .find(|c| c.check_type == "RevenueStability")
469            .unwrap();
470        assert!(!rev_check.is_consistent, "3× revenue growth should fail");
471    }
472
473    #[test]
474    fn test_plausibility_score_range() {
475        let entries = stable_revenue_entries(4, dec!(50_000));
476        let result = analyze_trends(&entries);
477        assert!(
478            result.overall_plausibility_score >= 0.0 && result.overall_plausibility_score <= 1.0
479        );
480    }
481
482    #[test]
483    fn test_passes_threshold() {
484        // Stable data should have score ≥ 0.75
485        let entries = stable_revenue_entries(6, dec!(100_000));
486        let result = analyze_trends(&entries);
487        assert!(
488            result.passes,
489            "Stable data should pass. Score: {}",
490            result.overall_plausibility_score
491        );
492    }
493
494    #[test]
495    fn test_period_count_correct() {
496        let entries = stable_revenue_entries(3, dec!(50_000));
497        let result = analyze_trends(&entries);
498        assert_eq!(result.period_count, 3);
499    }
500
501    #[test]
502    fn test_check_count() {
503        let entries = stable_revenue_entries(4, dec!(100_000));
504        let result = analyze_trends(&entries);
505        assert_eq!(result.consistency_checks.len(), 4);
506        let names: Vec<&str> = result
507            .consistency_checks
508            .iter()
509            .map(|c| c.check_type.as_str())
510            .collect();
511        assert!(names.contains(&"RevenueStability"));
512        assert!(names.contains(&"ExpenseRatioStability"));
513        assert!(names.contains(&"BalanceSheetGrowthConsistency"));
514        assert!(names.contains(&"DirectionalConsistency"));
515    }
516
517    #[test]
518    fn test_cv_calculation() {
519        // Test coefficient_of_variation directly
520        let values = vec![1.0, 1.0, 1.0, 1.0];
521        assert!((coefficient_of_variation(&values) - 0.0).abs() < 1e-9);
522
523        let values2 = vec![1.0, 2.0, 3.0, 4.0];
524        let cv = coefficient_of_variation(&values2);
525        assert!(cv > 0.0);
526    }
527}