1use rust_decimal::Decimal;
20use rustledger_plugin_types::{DirectiveData, DirectiveWrapper};
21use std::collections::{BTreeMap, HashSet};
22use std::str::FromStr;
23
24#[derive(Debug, Clone)]
26pub struct TransferConfig {
27 pub date_window_days: i64,
29 pub amount_tolerance: Decimal,
31}
32
33impl Default for TransferConfig {
34 fn default() -> Self {
35 Self {
36 date_window_days: 3,
37 amount_tolerance: Decimal::new(1, 2), }
39 }
40}
41
42#[derive(Debug, Clone)]
44pub struct TransferMatch {
45 pub from_group: usize,
47 pub from_index: usize,
49 pub from_account: Option<String>,
51 pub from_filename: Option<String>,
53 pub from_lineno: Option<u32>,
55 pub to_group: usize,
57 pub to_index: usize,
59 pub to_account: Option<String>,
61 pub to_filename: Option<String>,
63 pub to_lineno: Option<u32>,
65 pub amount: Decimal,
67 pub currency: String,
69 pub confidence: f64,
71 pub date: String,
73}
74
75#[must_use]
84pub fn find_transfers(
85 groups: &[(String, Vec<DirectiveWrapper>)],
86 config: &TransferConfig,
87) -> Vec<TransferMatch> {
88 let mut matches = Vec::new();
89 let mut globally_matched: HashSet<(usize, usize)> = HashSet::new();
92
93 let group_accounts: Vec<&str> = groups.iter().map(|(a, _)| a.as_str()).collect();
94
95 for (g1, (_, directives1)) in groups.iter().enumerate() {
97 for (g2, (_, directives2)) in groups.iter().enumerate() {
98 if g2 <= g1 {
99 continue; }
101
102 find_matches_between(
103 g1,
104 directives1,
105 g2,
106 directives2,
107 &group_accounts,
108 config,
109 &mut matches,
110 &mut globally_matched,
111 );
112 }
113 }
114
115 matches
116}
117
118#[must_use]
134pub fn find_transfers_in_ledger(
135 directives: &[DirectiveWrapper],
136 config: &TransferConfig,
137) -> Vec<TransferMatch> {
138 let mut by_account: BTreeMap<String, Vec<DirectiveWrapper>> = BTreeMap::new();
140 for d in directives {
141 if let Some(account) = first_posting_account(d) {
142 by_account
143 .entry(account.to_string())
144 .or_default()
145 .push(d.clone());
146 }
147 }
148 let groups: Vec<(String, Vec<DirectiveWrapper>)> = by_account.into_iter().collect();
149 find_transfers(&groups, config)
150}
151
152#[allow(clippy::too_many_arguments)]
154fn find_matches_between(
155 g1: usize,
156 directives1: &[DirectiveWrapper],
157 g2: usize,
158 directives2: &[DirectiveWrapper],
159 group_accounts: &[&str],
160 config: &TransferConfig,
161 matches: &mut Vec<TransferMatch>,
162 globally_matched: &mut HashSet<(usize, usize)>,
163) {
164 for (i, d1) in directives1.iter().enumerate() {
165 if globally_matched.contains(&(g1, i)) {
166 continue;
167 }
168
169 let Some((amount1, currency1)) = first_posting_amount_currency(d1) else {
170 continue;
171 };
172
173 for (j, d2) in directives2.iter().enumerate() {
174 if globally_matched.contains(&(g2, j)) {
175 continue;
176 }
177
178 let Some((amount2, currency2)) = first_posting_amount_currency(d2) else {
179 continue;
180 };
181
182 if currency1 != currency2 {
184 continue;
185 }
186
187 let sum = (amount1 + amount2).abs();
189 if sum > config.amount_tolerance {
190 continue;
191 }
192
193 if !within_date_window(&d1.date, &d2.date, config.date_window_days) {
195 continue;
196 }
197
198 if shares_link(d1, d2) {
202 globally_matched.insert((g1, i));
203 globally_matched.insert((g2, j));
204 break;
205 }
206
207 let same_date = d1.date == d2.date;
208
209 let mut confidence: f64 = 0.7; let kw1 = classify_keywords(d1);
213 let kw2 = classify_keywords(d2);
214 let strong = kw1.strong || kw2.strong;
215 let weak = kw1.weak || kw2.weak;
216 if strong || (weak && same_date) {
217 confidence += 0.2;
218 }
219
220 if same_date {
221 confidence += 0.1;
222 }
223
224 let confidence = confidence.min(1.0);
225
226 let (from_group, from_index, to_group, to_index, from, to) =
228 if amount1.is_sign_negative() {
229 (g1, i, g2, j, d1, d2)
230 } else {
231 (g2, j, g1, i, d2, d1)
232 };
233
234 matches.push(TransferMatch {
235 from_group,
236 from_index,
237 from_account: group_accounts
238 .get(from_group)
239 .map(|s| (*s).to_string())
240 .filter(|s| !s.is_empty()),
241 from_filename: from.filename.clone(),
242 from_lineno: from.lineno,
243 to_group,
244 to_index,
245 to_account: group_accounts
246 .get(to_group)
247 .map(|s| (*s).to_string())
248 .filter(|s| !s.is_empty()),
249 to_filename: to.filename.clone(),
250 to_lineno: to.lineno,
251 amount: amount1.abs(),
252 currency: currency1.to_string(),
253 confidence,
254 date: from.date.clone(),
255 });
256
257 globally_matched.insert((g1, i));
258 globally_matched.insert((g2, j));
259 break; }
261 }
262}
263
264fn first_posting_amount_currency(d: &DirectiveWrapper) -> Option<(Decimal, &str)> {
266 if let DirectiveData::Transaction(txn) = &d.data
267 && let Some(posting) = txn.postings.first()
268 && let Some(units) = &posting.units
269 {
270 let amount = Decimal::from_str(&units.number).ok()?;
271 return Some((amount, &units.currency));
272 }
273 None
274}
275
276fn first_posting_account(d: &DirectiveWrapper) -> Option<&str> {
278 if let DirectiveData::Transaction(txn) = &d.data
279 && let Some(posting) = txn.postings.first()
280 {
281 return Some(posting.account.as_str());
282 }
283 None
284}
285
286fn shares_link(a: &DirectiveWrapper, b: &DirectiveWrapper) -> bool {
291 let (DirectiveData::Transaction(txn_a), DirectiveData::Transaction(txn_b)) = (&a.data, &b.data)
292 else {
293 return false;
294 };
295 if txn_a.links.is_empty() || txn_b.links.is_empty() {
296 return false;
297 }
298 let set: HashSet<&str> = txn_a.links.iter().map(String::as_str).collect();
299 txn_b.links.iter().any(|l| set.contains(l.as_str()))
300}
301
302fn within_date_window(date1: &str, date2: &str, days: i64) -> bool {
304 let d1: jiff::civil::Date = match date1.parse() {
306 Ok(d) => d,
307 Err(_) => return false,
308 };
309 let d2: jiff::civil::Date = match date2.parse() {
310 Ok(d) => d,
311 Err(_) => return false,
312 };
313 let Ok(span) = d2.since(d1) else {
314 return false;
315 };
316 let diff = span.get_days().abs();
317 i64::from(diff) <= days
318}
319
320const STRONG_KEYWORDS: &[&str] = &["transfer", "xfer", "internal", "sweep", "move"];
322
323const WEAK_KEYWORDS: &[&str] = &["payment", "ach", "wire"];
327
328#[derive(Default, Clone, Copy)]
329struct KeywordHit {
330 strong: bool,
331 weak: bool,
332}
333
334fn classify_keywords(d: &DirectiveWrapper) -> KeywordHit {
335 let DirectiveData::Transaction(txn) = &d.data else {
336 return KeywordHit::default();
337 };
338 let mut hit = KeywordHit::default();
339 let narration_lower = txn.narration.to_lowercase();
340 let payee_lower = txn.payee.as_deref().unwrap_or("").to_lowercase();
341 let scan = |needles: &[&str]| -> bool {
342 needles
343 .iter()
344 .any(|kw| narration_lower.contains(kw) || payee_lower.contains(kw))
345 };
346 hit.strong = scan(STRONG_KEYWORDS);
347 hit.weak = scan(WEAK_KEYWORDS);
348 hit
349}
350
351#[cfg(test)]
352mod tests {
353 use super::*;
354 use rustledger_plugin_types::{AmountData, PostingData, TransactionData};
355
356 fn make_txn(date: &str, narration: &str, amount: &str, currency: &str) -> DirectiveWrapper {
357 make_txn_with(date, narration, amount, currency, "Assets:Bank", vec![])
358 }
359
360 fn make_txn_with(
361 date: &str,
362 narration: &str,
363 amount: &str,
364 currency: &str,
365 account: &str,
366 links: Vec<String>,
367 ) -> DirectiveWrapper {
368 DirectiveWrapper {
369 directive_type: "transaction".to_string(),
370 date: date.to_string(),
371 filename: None,
372 lineno: None,
373 data: DirectiveData::Transaction(TransactionData {
374 flag: "*".to_string(),
375 payee: None,
376 narration: narration.to_string(),
377 tags: vec![],
378 links,
379 metadata: vec![],
380 postings: vec![PostingData {
381 account: account.to_string(),
382 units: Some(AmountData {
383 number: amount.to_string(),
384 currency: currency.to_string(),
385 }),
386 cost: None,
387 price: None,
388 flag: None,
389 metadata: vec![],
390 span: None,
391 }],
392 }),
393 }
394 }
395
396 fn make_txn_loc(
397 date: &str,
398 narration: &str,
399 amount: &str,
400 currency: &str,
401 account: &str,
402 filename: &str,
403 lineno: u32,
404 ) -> DirectiveWrapper {
405 let mut d = make_txn_with(date, narration, amount, currency, account, vec![]);
406 d.filename = Some(filename.to_string());
407 d.lineno = Some(lineno);
408 d
409 }
410
411 #[test]
412 fn matches_opposite_amounts_same_date() {
413 let groups = vec![
414 (
415 "Assets:Checking".to_string(),
416 vec![make_txn(
417 "2024-01-15",
418 "Transfer to savings",
419 "-500.00",
420 "USD",
421 )],
422 ),
423 (
424 "Assets:Savings".to_string(),
425 vec![make_txn(
426 "2024-01-15",
427 "Transfer from checking",
428 "500.00",
429 "USD",
430 )],
431 ),
432 ];
433 let matches = find_transfers(&groups, &TransferConfig::default());
434 assert_eq!(matches.len(), 1);
435 assert_eq!(matches[0].amount, Decimal::new(50000, 2));
436 assert!(matches[0].confidence > 0.8); }
438
439 #[test]
440 fn matches_within_date_window() {
441 let groups = vec![
442 (
443 "Assets:Checking".to_string(),
444 vec![make_txn("2024-01-15", "ACH payment", "-200.00", "USD")],
445 ),
446 (
447 "Assets:CreditCard".to_string(),
448 vec![make_txn("2024-01-17", "Payment received", "200.00", "USD")],
449 ),
450 ];
451 let matches = find_transfers(&groups, &TransferConfig::default());
452 assert_eq!(matches.len(), 1);
453 }
454
455 #[test]
456 fn no_match_outside_date_window() {
457 let groups = vec![
458 (
459 "Assets:Checking".to_string(),
460 vec![make_txn("2024-01-15", "Transfer", "-500.00", "USD")],
461 ),
462 (
463 "Assets:Savings".to_string(),
464 vec![make_txn("2024-01-25", "Transfer", "500.00", "USD")],
465 ),
466 ];
467 let matches = find_transfers(&groups, &TransferConfig::default());
468 assert!(matches.is_empty());
469 }
470
471 #[test]
472 fn no_match_different_currency() {
473 let groups = vec![
474 (
475 "Assets:Checking".to_string(),
476 vec![make_txn("2024-01-15", "Transfer", "-500.00", "USD")],
477 ),
478 (
479 "Assets:Savings".to_string(),
480 vec![make_txn("2024-01-15", "Transfer", "500.00", "EUR")],
481 ),
482 ];
483 let matches = find_transfers(&groups, &TransferConfig::default());
484 assert!(matches.is_empty());
485 }
486
487 #[test]
488 fn no_match_same_sign() {
489 let groups = vec![
490 (
491 "Assets:Checking".to_string(),
492 vec![make_txn("2024-01-15", "Deposit", "500.00", "USD")],
493 ),
494 (
495 "Assets:Savings".to_string(),
496 vec![make_txn("2024-01-15", "Deposit", "500.00", "USD")],
497 ),
498 ];
499 let matches = find_transfers(&groups, &TransferConfig::default());
500 assert!(matches.is_empty());
501 }
502
503 #[test]
504 fn no_match_different_amounts() {
505 let groups = vec![
506 (
507 "Assets:Checking".to_string(),
508 vec![make_txn("2024-01-15", "Transfer", "-500.00", "USD")],
509 ),
510 (
511 "Assets:Savings".to_string(),
512 vec![make_txn("2024-01-15", "Transfer", "499.00", "USD")],
513 ),
514 ];
515 let matches = find_transfers(&groups, &TransferConfig::default());
516 assert!(matches.is_empty());
517 }
518
519 #[test]
520 fn transfer_keywords_boost_confidence() {
521 let groups = vec![
522 (
523 "Assets:Checking".to_string(),
524 vec![make_txn(
525 "2024-01-15",
526 "TRANSFER TO SAVINGS",
527 "-500.00",
528 "USD",
529 )],
530 ),
531 (
532 "Assets:Savings".to_string(),
533 vec![make_txn(
534 "2024-01-15",
535 "TRANSFER FROM CHECKING",
536 "500.00",
537 "USD",
538 )],
539 ),
540 ];
541 let matches = find_transfers(&groups, &TransferConfig::default());
542 assert_eq!(matches.len(), 1);
543 assert!(matches[0].confidence >= 0.9);
545 }
546
547 #[test]
548 fn no_keywords_lower_confidence() {
549 let groups = vec![
550 (
551 "Assets:Checking".to_string(),
552 vec![make_txn("2024-01-15", "Something", "-500.00", "USD")],
553 ),
554 (
555 "Assets:Savings".to_string(),
556 vec![make_txn("2024-01-17", "Something else", "500.00", "USD")],
557 ),
558 ];
559 let matches = find_transfers(&groups, &TransferConfig::default());
560 assert_eq!(matches.len(), 1);
561 assert!(matches[0].confidence < 0.8);
563 }
564
565 #[test]
566 fn multiple_transfers() {
567 let groups = vec![
568 (
569 "Assets:Checking".to_string(),
570 vec![
571 make_txn("2024-01-15", "Transfer 1", "-500.00", "USD"),
572 make_txn("2024-01-20", "Transfer 2", "-300.00", "USD"),
573 ],
574 ),
575 (
576 "Assets:Savings".to_string(),
577 vec![
578 make_txn("2024-01-15", "Transfer 1", "500.00", "USD"),
579 make_txn("2024-01-20", "Transfer 2", "300.00", "USD"),
580 ],
581 ),
582 ];
583 let matches = find_transfers(&groups, &TransferConfig::default());
584 assert_eq!(matches.len(), 2);
585 }
586
587 #[test]
588 fn one_to_one_matching() {
589 let groups = vec![
591 (
592 "Assets:Checking".to_string(),
593 vec![
594 make_txn("2024-01-15", "Transfer", "-500.00", "USD"),
595 make_txn("2024-01-15", "Transfer", "-500.00", "USD"),
596 ],
597 ),
598 (
599 "Assets:Savings".to_string(),
600 vec![make_txn("2024-01-15", "Transfer", "500.00", "USD")],
601 ),
602 ];
603 let matches = find_transfers(&groups, &TransferConfig::default());
604 assert_eq!(matches.len(), 1);
605 }
606
607 #[test]
608 fn three_groups() {
609 let groups = vec![
610 (
611 "Assets:Checking".to_string(),
612 vec![make_txn("2024-01-15", "Transfer", "-500.00", "USD")],
613 ),
614 (
615 "Assets:Savings".to_string(),
616 vec![make_txn("2024-01-15", "Transfer", "500.00", "USD")],
617 ),
618 (
619 "Assets:CreditCard".to_string(),
620 vec![make_txn("2024-01-15", "Payment", "200.00", "USD")],
621 ),
622 ];
623 let matches = find_transfers(&groups, &TransferConfig::default());
624 assert_eq!(matches.len(), 1);
626 }
627
628 #[test]
629 fn empty_groups() {
630 let groups: Vec<(String, Vec<DirectiveWrapper>)> = vec![];
631 let matches = find_transfers(&groups, &TransferConfig::default());
632 assert!(matches.is_empty());
633 }
634
635 #[test]
638 fn in_ledger_groups_by_first_posting_account() {
639 let directives = vec![
641 make_txn_with(
642 "2024-01-15",
643 "Transfer to savings",
644 "-500.00",
645 "USD",
646 "Assets:Checking",
647 vec![],
648 ),
649 make_txn_with(
650 "2024-01-15",
651 "Transfer from checking",
652 "500.00",
653 "USD",
654 "Assets:Savings",
655 vec![],
656 ),
657 ];
658 let matches = find_transfers_in_ledger(&directives, &TransferConfig::default());
659 assert_eq!(matches.len(), 1);
660 assert_eq!(matches[0].from_account.as_deref(), Some("Assets:Checking"));
661 assert_eq!(matches[0].to_account.as_deref(), Some("Assets:Savings"));
662 }
663
664 #[test]
665 fn in_ledger_does_not_match_within_same_account() {
666 let directives = vec![
668 make_txn_with(
669 "2024-01-15",
670 "Out",
671 "-500.00",
672 "USD",
673 "Assets:Checking",
674 vec![],
675 ),
676 make_txn_with(
677 "2024-01-15",
678 "In",
679 "500.00",
680 "USD",
681 "Assets:Checking",
682 vec![],
683 ),
684 ];
685 let matches = find_transfers_in_ledger(&directives, &TransferConfig::default());
686 assert!(matches.is_empty());
687 }
688
689 #[test]
690 fn transfer_match_carries_filename_and_lineno() {
691 let groups = vec![
692 (
693 "Assets:Checking".to_string(),
694 vec![make_txn_loc(
695 "2024-01-15",
696 "Transfer",
697 "-500.00",
698 "USD",
699 "Assets:Checking",
700 "checking.bean",
701 42,
702 )],
703 ),
704 (
705 "Assets:Savings".to_string(),
706 vec![make_txn_loc(
707 "2024-01-15",
708 "Transfer",
709 "500.00",
710 "USD",
711 "Assets:Savings",
712 "savings.bean",
713 18,
714 )],
715 ),
716 ];
717 let matches = find_transfers(&groups, &TransferConfig::default());
718 assert_eq!(matches.len(), 1);
719 let m = &matches[0];
720 assert_eq!(m.from_filename.as_deref(), Some("checking.bean"));
721 assert_eq!(m.from_lineno, Some(42));
722 assert_eq!(m.to_filename.as_deref(), Some("savings.bean"));
723 assert_eq!(m.to_lineno, Some(18));
724 }
725
726 #[test]
727 fn already_linked_pair_is_skipped() {
728 let groups = vec![
729 (
730 "Assets:Checking".to_string(),
731 vec![make_txn_with(
732 "2024-01-15",
733 "Transfer",
734 "-500.00",
735 "USD",
736 "Assets:Checking",
737 vec!["xfer-001".to_string()],
738 )],
739 ),
740 (
741 "Assets:Savings".to_string(),
742 vec![make_txn_with(
743 "2024-01-15",
744 "Transfer",
745 "500.00",
746 "USD",
747 "Assets:Savings",
748 vec!["xfer-001".to_string()],
749 )],
750 ),
751 ];
752 let matches = find_transfers(&groups, &TransferConfig::default());
753 assert!(
754 matches.is_empty(),
755 "already-linked pair must not be re-detected; got {matches:?}"
756 );
757 }
758
759 #[test]
760 fn unrelated_links_do_not_block_match() {
761 let groups = vec![
762 (
763 "Assets:Checking".to_string(),
764 vec![make_txn_with(
765 "2024-01-15",
766 "Transfer",
767 "-500.00",
768 "USD",
769 "Assets:Checking",
770 vec!["batch-import-A".to_string()],
771 )],
772 ),
773 (
774 "Assets:Savings".to_string(),
775 vec![make_txn_with(
776 "2024-01-15",
777 "Transfer",
778 "500.00",
779 "USD",
780 "Assets:Savings",
781 vec!["batch-import-B".to_string()],
782 )],
783 ),
784 ];
785 let matches = find_transfers(&groups, &TransferConfig::default());
786 assert_eq!(matches.len(), 1);
787 }
788
789 #[test]
790 fn weak_keyword_does_not_boost_when_dates_differ() {
791 let groups = vec![
792 (
793 "Assets:Checking".to_string(),
794 vec![make_txn("2024-01-15", "PAYMENT", "-200.00", "USD")],
795 ),
796 (
797 "Liabilities:Card".to_string(),
798 vec![make_txn("2024-01-17", "PAYMENT", "200.00", "USD")],
799 ),
800 ];
801 let matches = find_transfers(&groups, &TransferConfig::default());
802 assert_eq!(matches.len(), 1);
803 assert!(
804 (matches[0].confidence - 0.7).abs() < 1e-9,
805 "weak keyword + different dates must stay at base 0.7; got {}",
806 matches[0].confidence
807 );
808 }
809
810 #[test]
811 fn weak_keyword_boosts_on_same_date() {
812 let groups = vec![
813 (
814 "Assets:Checking".to_string(),
815 vec![make_txn("2024-01-15", "PAYMENT", "-200.00", "USD")],
816 ),
817 (
818 "Liabilities:Card".to_string(),
819 vec![make_txn("2024-01-15", "PAYMENT", "200.00", "USD")],
820 ),
821 ];
822 let matches = find_transfers(&groups, &TransferConfig::default());
823 assert_eq!(matches.len(), 1);
824 assert!(matches[0].confidence > 0.95);
826 }
827
828 #[test]
829 fn strong_keyword_boosts_even_on_different_dates() {
830 let groups = vec![
831 (
832 "Assets:Checking".to_string(),
833 vec![make_txn("2024-01-15", "TRANSFER", "-500.00", "USD")],
834 ),
835 (
836 "Assets:Savings".to_string(),
837 vec![make_txn("2024-01-17", "TRANSFER", "500.00", "USD")],
838 ),
839 ];
840 let matches = find_transfers(&groups, &TransferConfig::default());
841 assert_eq!(matches.len(), 1);
842 assert!(
844 (matches[0].confidence - 0.9).abs() < 1e-9,
845 "strong keyword + different dates: expect 0.9, got {}",
846 matches[0].confidence
847 );
848 }
849}