1use datasynth_core::models::JournalEntry;
7use rust_decimal::Decimal;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct JeRiskScoringResult {
16 pub total_entries: usize,
18 pub scored_entries: usize,
20 pub risk_distribution: RiskDistribution,
22 pub risk_attributes: Vec<RiskAttributeStats>,
24 pub anomaly_separability: f64,
27 pub passes: bool,
29}
30
31#[derive(Debug, Clone, Default, Serialize, Deserialize)]
33pub struct RiskDistribution {
34 pub low_risk: usize,
36 pub medium_risk: usize,
38 pub high_risk: usize,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct RiskAttributeStats {
45 pub attribute: String,
47 pub count: usize,
49 pub percentage: f64,
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
57enum RiskAttribute {
58 RoundNumber,
59 UnusualHour,
60 WeekendHoliday,
61 NonStandardUser,
62 BelowApprovalThreshold,
63 ManualToAutomatedAccount,
64 LargeRoundTrip,
65}
66
67impl RiskAttribute {
68 fn name(self) -> &'static str {
69 match self {
70 Self::RoundNumber => "RoundNumber",
71 Self::UnusualHour => "UnusualHour",
72 Self::WeekendHoliday => "WeekendHoliday",
73 Self::NonStandardUser => "NonStandardUser",
74 Self::BelowApprovalThreshold => "BelowApprovalThreshold",
75 Self::ManualToAutomatedAccount => "ManualToAutomatedAccount",
76 Self::LargeRoundTrip => "LargeRoundTrip",
77 }
78 }
79
80 fn weight(self) -> f64 {
81 match self {
82 Self::RoundNumber => 0.10,
83 Self::UnusualHour => 0.15,
84 Self::WeekendHoliday => 0.15,
85 Self::NonStandardUser => 0.15,
86 Self::BelowApprovalThreshold => 0.15,
87 Self::ManualToAutomatedAccount => 0.15,
88 Self::LargeRoundTrip => 0.15,
89 }
90 }
91
92 fn all() -> &'static [RiskAttribute] {
93 &[
94 Self::RoundNumber,
95 Self::UnusualHour,
96 Self::WeekendHoliday,
97 Self::NonStandardUser,
98 Self::BelowApprovalThreshold,
99 Self::ManualToAutomatedAccount,
100 Self::LargeRoundTrip,
101 ]
102 }
103}
104
105const APPROVAL_THRESHOLDS: &[u64] = &[1000, 2500, 5000, 10000, 25000, 50000, 100000];
109
110const AUTOMATED_ACCOUNT_PREFIXES: &[&str] = &["100", "101", "102", "200", "201", "110", "111"];
113
114fn is_round_number(amount: Decimal) -> bool {
115 let thousand = Decimal::from(1000u32);
116 amount > Decimal::ZERO && (amount % thousand).is_zero()
117}
118
119fn is_unusual_hour(hour: u32) -> bool {
120 !(7..=21).contains(&hour)
121}
122
123fn is_weekend(weekday: chrono::Weekday) -> bool {
124 weekday == chrono::Weekday::Sat || weekday == chrono::Weekday::Sun
125}
126
127fn is_below_approval_threshold(amount: Decimal) -> bool {
128 for &threshold in APPROVAL_THRESHOLDS {
129 let low = Decimal::from(threshold - 100);
130 let high = Decimal::from(threshold - 1);
131 if amount >= low && amount <= high {
132 return true;
133 }
134 }
135 false
136}
137
138fn is_manual_to_automated_account(entry: &JournalEntry) -> bool {
139 use datasynth_core::models::TransactionSource;
140 if entry.header.source != TransactionSource::Manual {
141 return false;
142 }
143 entry.lines.iter().any(|line| {
144 AUTOMATED_ACCOUNT_PREFIXES
145 .iter()
146 .any(|prefix| line.gl_account.starts_with(prefix))
147 })
148}
149
150fn has_round_trip(entry: &JournalEntry) -> bool {
151 let debited: std::collections::HashSet<_> = entry
153 .lines
154 .iter()
155 .filter(|l| l.debit_amount > Decimal::ZERO)
156 .map(|l| l.gl_account.as_str())
157 .collect();
158 let credited: std::collections::HashSet<_> = entry
159 .lines
160 .iter()
161 .filter(|l| l.credit_amount > Decimal::ZERO)
162 .map(|l| l.gl_account.as_str())
163 .collect();
164 debited.intersection(&credited).next().is_some()
165}
166
167fn build_user_posting_counts(entries: &[JournalEntry]) -> HashMap<String, usize> {
171 let mut counts: HashMap<String, usize> = HashMap::new();
172 for entry in entries {
173 *counts.entry(entry.header.created_by.clone()).or_default() += 1;
174 }
175 counts
176}
177
178fn score_entry(
182 entry: &JournalEntry,
183 user_counts: &HashMap<String, usize>,
184) -> (f64, Vec<RiskAttribute>) {
185 use chrono::Datelike as _;
186 use chrono::Timelike as _;
187
188 let mut triggered = Vec::new();
189
190 let total_debit: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
192
193 if is_round_number(total_debit) {
195 triggered.push(RiskAttribute::RoundNumber);
196 }
197
198 let hour = entry.header.created_at.hour();
200 if is_unusual_hour(hour) {
201 triggered.push(RiskAttribute::UnusualHour);
202 }
203
204 if is_weekend(entry.header.posting_date.weekday()) {
206 triggered.push(RiskAttribute::WeekendHoliday);
207 }
208
209 let user_count = user_counts
211 .get(&entry.header.created_by)
212 .copied()
213 .unwrap_or(0);
214 if user_count < 5 {
215 triggered.push(RiskAttribute::NonStandardUser);
216 }
217
218 if is_below_approval_threshold(total_debit) {
220 triggered.push(RiskAttribute::BelowApprovalThreshold);
221 }
222
223 if is_manual_to_automated_account(entry) {
225 triggered.push(RiskAttribute::ManualToAutomatedAccount);
226 }
227
228 if has_round_trip(entry) {
230 triggered.push(RiskAttribute::LargeRoundTrip);
231 }
232
233 let raw_score: f64 = triggered.iter().map(|a| a.weight()).sum();
234 let score = raw_score.min(1.0_f64);
235
236 (score, triggered)
237}
238
239pub fn score_entries(entries: &[JournalEntry]) -> JeRiskScoringResult {
243 let user_counts = build_user_posting_counts(entries);
244
245 let mut distribution = RiskDistribution::default();
246 let mut attribute_counts: HashMap<RiskAttribute, usize> = HashMap::new();
247 let mut anomaly_scores: Vec<f64> = Vec::new();
248 let mut clean_scores: Vec<f64> = Vec::new();
249
250 for entry in entries {
251 let (score, triggered) = score_entry(entry, &user_counts);
252
253 if score < 0.30 {
255 distribution.low_risk += 1;
256 } else if score < 0.60 {
257 distribution.medium_risk += 1;
258 } else {
259 distribution.high_risk += 1;
260 }
261
262 for attr in &triggered {
264 *attribute_counts.entry(*attr).or_default() += 1;
265 }
266
267 if entry.header.is_anomaly || entry.header.is_fraud {
269 anomaly_scores.push(score);
270 } else {
271 clean_scores.push(score);
272 }
273 }
274
275 let total = entries.len();
276 let risk_attributes: Vec<RiskAttributeStats> = RiskAttribute::all()
277 .iter()
278 .map(|&attr| {
279 let count = attribute_counts.get(&attr).copied().unwrap_or(0);
280 let percentage = if total > 0 {
281 count as f64 / total as f64 * 100.0
282 } else {
283 0.0
284 };
285 RiskAttributeStats {
286 attribute: attr.name().to_string(),
287 count,
288 percentage,
289 }
290 })
291 .collect();
292
293 let avg = |v: &[f64]| -> f64 {
294 if v.is_empty() {
295 0.0
296 } else {
297 v.iter().sum::<f64>() / v.len() as f64
298 }
299 };
300
301 let anomaly_separability = if anomaly_scores.is_empty() {
302 1.0
304 } else {
305 avg(&anomaly_scores) - avg(&clean_scores)
306 };
307
308 let passes = anomaly_separability > 0.10;
309
310 JeRiskScoringResult {
311 total_entries: total,
312 scored_entries: total,
313 risk_distribution: distribution,
314 risk_attributes,
315 anomaly_separability,
316 passes,
317 }
318}
319
320#[cfg(test)]
323mod tests {
324 use super::*;
325 use datasynth_core::models::{
326 JournalEntry, JournalEntryHeader, JournalEntryLine, TransactionSource,
327 };
328 use rust_decimal_macros::dec;
329
330 fn make_date(year: i32, month: u32, day: u32) -> chrono::NaiveDate {
331 chrono::NaiveDate::from_ymd_opt(year, month, day).unwrap()
332 }
333
334 fn weekday_date() -> chrono::NaiveDate {
335 make_date(2024, 1, 3)
337 }
338
339 fn weekend_date() -> chrono::NaiveDate {
340 make_date(2024, 1, 6)
342 }
343
344 fn make_je(
345 company: &str,
346 posting_date: chrono::NaiveDate,
347 debit_account: &str,
348 credit_account: &str,
349 amount: Decimal,
350 user: &str,
351 source: TransactionSource,
352 ) -> JournalEntry {
353 let mut header = JournalEntryHeader::new(company.to_string(), posting_date);
354 header.created_by = user.to_string();
355 header.source = source;
356 let doc_id = header.document_id;
357 let mut entry = JournalEntry::new(header);
358 entry.add_line(JournalEntryLine::debit(
359 doc_id,
360 1,
361 debit_account.to_string(),
362 amount,
363 ));
364 entry.add_line(JournalEntryLine::credit(
365 doc_id,
366 2,
367 credit_account.to_string(),
368 amount,
369 ));
370 entry
371 }
372
373 fn simple_je(amount: Decimal) -> JournalEntry {
374 make_je(
375 "C001",
376 weekday_date(),
377 "6000",
378 "2000",
379 amount,
380 "alice",
381 TransactionSource::Automated,
382 )
383 }
384
385 #[test]
388 fn test_round_number_detected() {
389 assert!(is_round_number(dec!(1000)));
390 assert!(is_round_number(dec!(5000)));
391 assert!(is_round_number(dec!(100000)));
392 }
393
394 #[test]
395 fn test_non_round_number() {
396 assert!(!is_round_number(dec!(1234.56)));
397 assert!(!is_round_number(dec!(999)));
398 assert!(!is_round_number(dec!(0)));
399 }
400
401 #[test]
404 fn test_weekend_detected() {
405 let entry = make_je(
406 "C001",
407 weekend_date(),
408 "6000",
409 "2000",
410 dec!(500),
411 "alice",
412 TransactionSource::Automated,
413 );
414 let counts = build_user_posting_counts(std::slice::from_ref(&entry));
415 let (_score, triggered) = score_entry(&entry, &counts);
416 assert!(
417 triggered.contains(&RiskAttribute::WeekendHoliday),
418 "Saturday should trigger WeekendHoliday"
419 );
420 }
421
422 #[test]
423 fn test_weekday_not_flagged() {
424 let entry = make_je(
425 "C001",
426 weekday_date(),
427 "6000",
428 "2000",
429 dec!(500),
430 "alice",
431 TransactionSource::Automated,
432 );
433 let mut entries: Vec<JournalEntry> = (0..10)
435 .map(|_| {
436 make_je(
437 "C001",
438 weekday_date(),
439 "6000",
440 "2000",
441 dec!(500),
442 "alice",
443 TransactionSource::Automated,
444 )
445 })
446 .collect();
447 entries.push(entry.clone());
448 let counts = build_user_posting_counts(&entries);
449 let (_score, triggered) = score_entry(&entry, &counts);
450 assert!(
451 !triggered.contains(&RiskAttribute::WeekendHoliday),
452 "Wednesday should not trigger WeekendHoliday"
453 );
454 }
455
456 #[test]
459 fn test_score_within_range() {
460 let entries: Vec<JournalEntry> = vec![simple_je(dec!(500)), simple_je(dec!(1000))];
461 let counts = build_user_posting_counts(&entries);
462 for entry in &entries {
463 let (score, _) = score_entry(entry, &counts);
464 assert!((0.0..=1.0).contains(&score), "Score {score} out of [0,1]");
465 }
466 }
467
468 #[test]
469 fn test_multi_attribute_higher_score() {
470 let risky = make_je(
472 "C001",
473 weekend_date(),
474 "6000",
475 "2000",
476 dec!(5000), "alice",
478 TransactionSource::Automated,
479 );
480 let clean = make_je(
481 "C001",
482 weekday_date(),
483 "6000",
484 "2000",
485 dec!(1234),
486 "alice",
487 TransactionSource::Automated,
488 );
489 let mut entries = vec![risky.clone()];
490 for _ in 0..10 {
492 entries.push(make_je(
493 "C001",
494 weekday_date(),
495 "6000",
496 "2000",
497 dec!(100),
498 "alice",
499 TransactionSource::Automated,
500 ));
501 }
502 entries.push(clean.clone());
503 let counts = build_user_posting_counts(&entries);
504 let (risky_score, _) = score_entry(&risky, &counts);
505 let (clean_score, _) = score_entry(&clean, &counts);
506 assert!(
507 risky_score >= clean_score,
508 "Risky entry ({risky_score}) should score >= clean ({clean_score})"
509 );
510 }
511
512 #[test]
515 fn test_below_approval_threshold() {
516 assert!(is_below_approval_threshold(dec!(4999)));
517 assert!(is_below_approval_threshold(dec!(4950)));
518 assert!(!is_below_approval_threshold(dec!(5000)));
519 assert!(!is_below_approval_threshold(dec!(6000)));
520 }
521
522 #[test]
525 fn test_round_trip_detected() {
526 let header = JournalEntryHeader::new("C001".to_string(), weekday_date());
527 let doc_id = header.document_id;
528 let mut entry = JournalEntry::new(header);
529 entry.add_line(JournalEntryLine::debit(
531 doc_id,
532 1,
533 "1000".to_string(),
534 dec!(100),
535 ));
536 entry.add_line(JournalEntryLine::credit(
537 doc_id,
538 2,
539 "1000".to_string(),
540 dec!(100),
541 ));
542 assert!(
543 has_round_trip(&entry),
544 "Same account debit+credit should be detected"
545 );
546 }
547
548 #[test]
549 fn test_no_round_trip() {
550 let entry = simple_je(dec!(100));
551 assert!(
552 !has_round_trip(&entry),
553 "Different accounts should not trigger round-trip"
554 );
555 }
556
557 #[test]
560 fn test_score_entries_basic() {
561 let entries: Vec<JournalEntry> = (0..20)
562 .map(|i| {
563 make_je(
564 "C001",
565 weekday_date(),
566 "6000",
567 "2000",
568 Decimal::from(i * 100 + 50),
569 "alice",
570 TransactionSource::Automated,
571 )
572 })
573 .collect();
574 let result = score_entries(&entries);
575 assert_eq!(result.total_entries, 20);
576 assert_eq!(result.scored_entries, 20);
577 assert_eq!(
578 result.risk_distribution.low_risk
579 + result.risk_distribution.medium_risk
580 + result.risk_distribution.high_risk,
581 20
582 );
583 assert_eq!(result.risk_attributes.len(), RiskAttribute::all().len());
584 }
585
586 #[test]
587 fn test_anomaly_separability_passes_with_no_labels() {
588 let entries: Vec<JournalEntry> = (0..5).map(|_| simple_je(dec!(100))).collect();
589 let result = score_entries(&entries);
590 assert!(result.passes, "No anomaly labels → should pass");
592 }
593
594 #[test]
595 fn test_anomaly_separability_with_flagged_entries() {
596 let mut entries: Vec<JournalEntry> = Vec::new();
597
598 for _ in 0..5 {
600 let mut e = make_je(
601 "C001",
602 weekday_date(),
603 "6000",
604 "2000",
605 dec!(123),
606 "bob",
607 TransactionSource::Automated,
608 );
609 e.header.is_anomaly = false;
611 entries.push(e);
612 }
613 for _ in 0..10 {
615 entries.push(make_je(
616 "C001",
617 weekday_date(),
618 "6000",
619 "2000",
620 dec!(50),
621 "bob",
622 TransactionSource::Automated,
623 ));
624 }
625
626 for _ in 0..5 {
628 let mut e = make_je(
629 "C001",
630 weekend_date(),
631 "6000",
632 "2000",
633 dec!(5000),
634 "zz_rare_user",
635 TransactionSource::Automated,
636 );
637 e.header.is_anomaly = true;
638 entries.push(e);
639 }
640
641 let result = score_entries(&entries);
642 assert!(
643 result.anomaly_separability > 0.0,
644 "Anomaly entries should have higher average score than clean entries"
645 );
646 }
647}