1use std::collections::hash_map::DefaultHasher;
17use std::hash::{Hash, Hasher};
18use std::str::FromStr;
19
20use rustledger_plugin_types::{
21 AmountData, CostData, PostingData, PriceAnnotationData, TransactionData,
22};
23
24const ABSENT: u8 = 0;
27const PRESENT: u8 = 1;
28
29#[must_use]
38pub fn structural_hash(date: &str, txn: &TransactionData) -> u64 {
39 let TransactionData {
42 flag,
43 payee,
44 narration,
45 tags,
46 links,
47 metadata: _, postings,
49 } = txn;
50
51 let mut hasher = DefaultHasher::new();
52 date.hash(&mut hasher);
53 flag.hash(&mut hasher);
54 payee.hash(&mut hasher);
55 narration.hash(&mut hasher);
56
57 let mut sorted_tags: Vec<&String> = tags.iter().collect();
61 sorted_tags.sort();
62 sorted_tags.dedup();
63 sorted_tags.len().hash(&mut hasher);
64 for tag in sorted_tags {
65 tag.hash(&mut hasher);
66 }
67
68 let mut sorted_links: Vec<&String> = links.iter().collect();
69 sorted_links.sort();
70 sorted_links.dedup();
71 sorted_links.len().hash(&mut hasher);
72 for link in sorted_links {
73 link.hash(&mut hasher);
74 }
75
76 postings.len().hash(&mut hasher);
78 for posting in postings {
79 hash_posting(posting, &mut hasher);
80 }
81
82 hasher.finish()
83}
84
85fn hash_posting<H: Hasher>(posting: &PostingData, hasher: &mut H) {
87 let PostingData {
89 account,
90 units,
91 cost,
92 price,
93 flag,
94 metadata: _, } = posting;
96
97 account.hash(hasher);
98 match units {
99 Some(u) => {
100 PRESENT.hash(hasher);
101 hash_amount(u, hasher);
102 }
103 None => ABSENT.hash(hasher),
104 }
105 match cost {
106 Some(c) => {
107 PRESENT.hash(hasher);
108 hash_cost(c, hasher);
109 }
110 None => ABSENT.hash(hasher),
111 }
112 match price {
113 Some(p) => {
114 PRESENT.hash(hasher);
115 hash_price(p, hasher);
116 }
117 None => ABSENT.hash(hasher),
118 }
119 flag.hash(hasher);
120}
121
122fn hash_amount<H: Hasher>(amount: &AmountData, hasher: &mut H) {
124 let AmountData { number, currency } = amount;
125 number.hash(hasher);
126 currency.hash(hasher);
127}
128
129fn hash_cost<H: Hasher>(cost: &CostData, hasher: &mut H) {
131 let CostData {
132 number_per,
133 number_total,
134 currency,
135 date,
136 label,
137 merge,
138 } = cost;
139 number_per.hash(hasher);
140 number_total.hash(hasher);
141 currency.hash(hasher);
142 date.hash(hasher);
143 label.hash(hasher);
144 merge.hash(hasher);
145}
146
147fn hash_price<H: Hasher>(price: &PriceAnnotationData, hasher: &mut H) {
149 let PriceAnnotationData {
150 is_total,
151 amount,
152 number,
153 currency,
154 } = price;
155 is_total.hash(hasher);
156 match amount {
157 Some(a) => {
158 PRESENT.hash(hasher);
159 hash_amount(a, hasher);
160 }
161 None => ABSENT.hash(hasher),
162 }
163 number.hash(hasher);
164 currency.hash(hasher);
165}
166
167#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
181pub struct Fingerprint(pub [u8; 16]);
182
183impl Fingerprint {
184 #[must_use]
189 pub fn compute(date: &str, amount: Option<&str>, text: &str) -> Self {
190 let normalized = normalize_text(text);
191 let mut hasher = blake3::Hasher::new();
192 hasher.update(date.as_bytes());
193 hasher.update(b"|");
194 if let Some(amt) = amount {
195 let normalized_amt = rust_decimal::Decimal::from_str(amt)
197 .map_or_else(|_| amt.to_string(), |d| d.normalize().to_string());
198 hasher.update(normalized_amt.as_bytes());
199 }
200 hasher.update(b"|");
201 hasher.update(normalized.as_bytes());
202 let hash = hasher.finalize();
203 let mut bytes = [0u8; 16];
204 bytes.copy_from_slice(&hash.as_bytes()[..16]);
205 Self(bytes)
206 }
207
208 #[must_use]
212 pub fn from_transaction(date: &str, txn: &TransactionData) -> Self {
213 let amount = txn
214 .postings
215 .first()
216 .and_then(|p| p.units.as_ref())
217 .map(|u| u.number.as_str());
218
219 let mut text = String::new();
220 if let Some(ref payee) = txn.payee {
221 text.push_str(payee);
222 text.push(' ');
223 }
224 text.push_str(&txn.narration);
225
226 Self::compute(date, amount, &text)
227 }
228
229 #[must_use]
231 pub fn to_hex(&self) -> String {
232 let mut s = String::with_capacity(32);
233 for byte in &self.0 {
234 use std::fmt::Write;
235 write!(s, "{byte:02x}").expect("hex write to String cannot fail");
237 }
238 s
239 }
240
241 pub fn from_hex(s: &str) -> Result<Self, FingerprintError> {
247 if s.len() != 32 {
248 return Err(FingerprintError::InvalidLength(s.len()));
249 }
250 let mut bytes = [0u8; 16];
251 for (i, chunk) in s.as_bytes().chunks(2).enumerate() {
252 let hex_str = std::str::from_utf8(chunk).map_err(|_| FingerprintError::InvalidHex)?;
253 bytes[i] = u8::from_str_radix(hex_str, 16).map_err(|_| FingerprintError::InvalidHex)?;
254 }
255 Ok(Self(bytes))
256 }
257}
258
259impl std::fmt::Display for Fingerprint {
260 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
261 f.write_str(&self.to_hex())
262 }
263}
264
265#[derive(Debug, Clone, PartialEq, Eq)]
267pub enum FingerprintError {
268 InvalidLength(usize),
270 InvalidHex,
272}
273
274impl std::fmt::Display for FingerprintError {
275 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
276 match self {
277 Self::InvalidLength(len) => {
278 write!(f, "fingerprint hex must be 32 chars, got {len}")
279 }
280 Self::InvalidHex => f.write_str("invalid hex in fingerprint"),
281 }
282 }
283}
284
285impl std::error::Error for FingerprintError {}
286
287fn normalize_text(text: &str) -> String {
289 text.to_lowercase()
290 .split_whitespace()
291 .collect::<Vec<_>>()
292 .join(" ")
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298 use rustledger_plugin_types::{
299 AmountData, CostData, PostingData, PriceAnnotationData, TransactionData,
300 };
301
302 fn make_txn(payee: Option<&str>, narration: &str, amount: &str) -> TransactionData {
303 TransactionData {
304 flag: "*".to_string(),
305 payee: payee.map(String::from),
306 narration: narration.to_string(),
307 tags: vec![],
308 links: vec![],
309 metadata: vec![],
310 postings: vec![PostingData {
311 account: "Assets:Bank".to_string(),
312 units: Some(AmountData {
313 number: amount.to_string(),
314 currency: "USD".to_string(),
315 }),
316 cost: None,
317 price: None,
318 flag: None,
319 metadata: vec![],
320 }],
321 }
322 }
323
324 #[test]
325 fn identical_transactions_produce_same_hash() {
326 let txn1 = make_txn(Some("Store"), "Groceries", "-50.00");
327 let txn2 = make_txn(Some("Store"), "Groceries", "-50.00");
328 assert_eq!(
329 structural_hash("2024-01-15", &txn1),
330 structural_hash("2024-01-15", &txn2)
331 );
332 }
333
334 #[test]
335 fn different_dates_produce_different_hash() {
336 let txn = make_txn(Some("Store"), "Groceries", "-50.00");
337 assert_ne!(
338 structural_hash("2024-01-15", &txn),
339 structural_hash("2024-01-16", &txn)
340 );
341 }
342
343 #[test]
344 fn different_amounts_produce_different_hash() {
345 let txn1 = make_txn(Some("Store"), "Groceries", "-50.00");
346 let txn2 = make_txn(Some("Store"), "Groceries", "-51.00");
347 assert_ne!(
348 structural_hash("2024-01-15", &txn1),
349 structural_hash("2024-01-15", &txn2)
350 );
351 }
352
353 #[test]
354 fn metadata_does_not_affect_hash() {
355 let mut txn1 = make_txn(Some("Store"), "Groceries", "-50.00");
356 let txn2 = make_txn(Some("Store"), "Groceries", "-50.00");
357 txn1.metadata.push((
358 "source".to_string(),
359 rustledger_plugin_types::MetaValueData::String("test".to_string()),
360 ));
361 assert_eq!(
362 structural_hash("2024-01-15", &txn1),
363 structural_hash("2024-01-15", &txn2)
364 );
365 }
366
367 #[test]
368 fn tag_order_does_not_affect_hash() {
369 let mut txn1 = make_txn(None, "Test", "100");
370 txn1.tags = vec!["a".to_string(), "b".to_string()];
371 let mut txn2 = make_txn(None, "Test", "100");
372 txn2.tags = vec!["b".to_string(), "a".to_string()];
373 assert_eq!(
374 structural_hash("2024-01-15", &txn1),
375 structural_hash("2024-01-15", &txn2)
376 );
377 }
378
379 #[test]
380 fn different_tags_produce_different_hash() {
381 let mut txn1 = make_txn(None, "Test", "100");
382 txn1.tags = vec!["a".to_string()];
383 let mut txn2 = make_txn(None, "Test", "100");
384 txn2.tags = vec!["b".to_string()];
385 assert_ne!(
386 structural_hash("2024-01-15", &txn1),
387 structural_hash("2024-01-15", &txn2)
388 );
389 }
390
391 #[test]
394 fn fingerprint_deterministic() {
395 let fp1 = Fingerprint::compute("2024-01-15", Some("-50.00"), "WHOLE FOODS");
396 let fp2 = Fingerprint::compute("2024-01-15", Some("-50.00"), "WHOLE FOODS");
397 assert_eq!(fp1, fp2);
398 }
399
400 #[test]
401 fn fingerprint_different_dates() {
402 let fp1 = Fingerprint::compute("2024-01-15", Some("-50.00"), "Store");
403 let fp2 = Fingerprint::compute("2024-01-16", Some("-50.00"), "Store");
404 assert_ne!(fp1, fp2);
405 }
406
407 #[test]
408 fn fingerprint_different_amounts() {
409 let fp1 = Fingerprint::compute("2024-01-15", Some("-50.00"), "Store");
410 let fp2 = Fingerprint::compute("2024-01-15", Some("-51.00"), "Store");
411 assert_ne!(fp1, fp2);
412 }
413
414 #[test]
415 fn fingerprint_normalizes_text() {
416 let fp1 = Fingerprint::compute("2024-01-15", Some("-50"), "WHOLE FOODS MARKET");
418 let fp2 = Fingerprint::compute("2024-01-15", Some("-50"), "whole foods market");
419 assert_eq!(fp1, fp2);
420 }
421
422 #[test]
423 fn fingerprint_from_transaction() {
424 let txn = make_txn(Some("Store"), "Groceries", "-50.00");
425 let fp = Fingerprint::from_transaction("2024-01-15", &txn);
426 let expected = Fingerprint::compute("2024-01-15", Some("-50.00"), "Store Groceries");
428 assert_eq!(fp, expected);
429 }
430
431 #[test]
432 fn fingerprint_hex_roundtrip() {
433 let fp = Fingerprint::compute("2024-01-15", Some("-50.00"), "Test");
434 let hex = fp.to_hex();
435 assert_eq!(hex.len(), 32);
436 let fp2 = Fingerprint::from_hex(&hex).unwrap();
437 assert_eq!(fp, fp2);
438 }
439
440 #[test]
441 fn fingerprint_display() {
442 let fp = Fingerprint::compute("2024-01-15", Some("-50.00"), "Test");
443 let display = format!("{fp}");
444 assert_eq!(display, fp.to_hex());
445 }
446
447 #[test]
448 fn fingerprint_from_hex_invalid_length() {
449 let err = Fingerprint::from_hex("abcd").unwrap_err();
450 assert_eq!(err, FingerprintError::InvalidLength(4));
451 }
452
453 #[test]
454 fn fingerprint_from_hex_invalid_chars() {
455 let err = Fingerprint::from_hex("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz").unwrap_err();
456 assert_eq!(err, FingerprintError::InvalidHex);
457 }
458
459 #[test]
462 fn distinct_costs_produce_different_hashes() {
463 let mut txn1 = make_txn(Some("Store"), "Buy shares", "100.00");
464 txn1.postings[0].cost = Some(CostData {
465 number_per: Some("10.00".to_string()),
466 number_total: None,
467 currency: Some("USD".to_string()),
468 date: None,
469 label: None,
470 merge: false,
471 });
472 let mut txn2 = make_txn(Some("Store"), "Buy shares", "100.00");
473 txn2.postings[0].cost = Some(CostData {
474 number_per: Some("11.00".to_string()),
475 number_total: None,
476 currency: Some("USD".to_string()),
477 date: None,
478 label: None,
479 merge: false,
480 });
481 assert_ne!(
482 structural_hash("2024-01-15", &txn1),
483 structural_hash("2024-01-15", &txn2)
484 );
485 }
486
487 #[test]
488 fn distinct_prices_produce_different_hashes() {
489 let mut txn1 = make_txn(Some("Store"), "Buy shares", "100.00");
490 txn1.postings[0].price = Some(PriceAnnotationData {
491 is_total: false,
492 amount: Some(AmountData {
493 number: "10.00".to_string(),
494 currency: "USD".to_string(),
495 }),
496 number: None,
497 currency: None,
498 });
499 let mut txn2 = make_txn(Some("Store"), "Buy shares", "100.00");
500 txn2.postings[0].price = Some(PriceAnnotationData {
501 is_total: false,
502 amount: Some(AmountData {
503 number: "11.00".to_string(),
504 currency: "USD".to_string(),
505 }),
506 number: None,
507 currency: None,
508 });
509 assert_ne!(
510 structural_hash("2024-01-15", &txn1),
511 structural_hash("2024-01-15", &txn2)
512 );
513 }
514
515 #[test]
516 fn reordered_postings_produce_different_hashes() {
517 let posting_a = PostingData {
518 account: "Assets:Bank".to_string(),
519 units: Some(AmountData {
520 number: "-50.00".to_string(),
521 currency: "USD".to_string(),
522 }),
523 cost: None,
524 price: None,
525 flag: None,
526 metadata: vec![],
527 };
528 let posting_b = PostingData {
529 account: "Expenses:Food".to_string(),
530 units: Some(AmountData {
531 number: "50.00".to_string(),
532 currency: "USD".to_string(),
533 }),
534 cost: None,
535 price: None,
536 flag: None,
537 metadata: vec![],
538 };
539
540 let txn1 = TransactionData {
541 flag: "*".to_string(),
542 payee: None,
543 narration: "Test".to_string(),
544 tags: vec![],
545 links: vec![],
546 metadata: vec![],
547 postings: vec![posting_a.clone(), posting_b.clone()],
548 };
549 let txn2 = TransactionData {
550 flag: "*".to_string(),
551 payee: None,
552 narration: "Test".to_string(),
553 tags: vec![],
554 links: vec![],
555 metadata: vec![],
556 postings: vec![posting_b, posting_a],
557 };
558 assert_ne!(
559 structural_hash("2024-01-15", &txn1),
560 structural_hash("2024-01-15", &txn2)
561 );
562 }
563
564 #[test]
565 fn none_vs_empty_payee_differ() {
566 let txn_none = make_txn(None, "Test", "100");
567 let txn_empty = TransactionData {
568 flag: "*".to_string(),
569 payee: Some(String::new()),
570 narration: "Test".to_string(),
571 tags: vec![],
572 links: vec![],
573 metadata: vec![],
574 postings: vec![PostingData {
575 account: "Assets:Bank".to_string(),
576 units: Some(AmountData {
577 number: "100".to_string(),
578 currency: "USD".to_string(),
579 }),
580 cost: None,
581 price: None,
582 flag: None,
583 metadata: vec![],
584 }],
585 };
586 assert_ne!(
587 structural_hash("2024-01-15", &txn_none),
588 structural_hash("2024-01-15", &txn_empty)
589 );
590 }
591
592 #[test]
593 fn empty_vs_absent_tags_are_duplicates() {
594 let txn1 = make_txn(None, "Test", "100");
596 let txn2 = TransactionData {
597 flag: "*".to_string(),
598 payee: None,
599 narration: "Test".to_string(),
600 tags: vec![],
601 links: vec![],
602 metadata: vec![],
603 postings: vec![PostingData {
604 account: "Assets:Bank".to_string(),
605 units: Some(AmountData {
606 number: "100".to_string(),
607 currency: "USD".to_string(),
608 }),
609 cost: None,
610 price: None,
611 flag: None,
612 metadata: vec![],
613 }],
614 };
615 assert_eq!(
616 structural_hash("2024-01-15", &txn1),
617 structural_hash("2024-01-15", &txn2)
618 );
619 }
620
621 #[test]
622 fn distinct_posting_counts_differ() {
623 let txn1 = make_txn(None, "Test", "100");
624 let txn2 = TransactionData {
625 flag: "*".to_string(),
626 payee: None,
627 narration: "Test".to_string(),
628 tags: vec![],
629 links: vec![],
630 metadata: vec![],
631 postings: vec![
632 PostingData {
633 account: "Assets:Bank".to_string(),
634 units: Some(AmountData {
635 number: "100".to_string(),
636 currency: "USD".to_string(),
637 }),
638 cost: None,
639 price: None,
640 flag: None,
641 metadata: vec![],
642 },
643 PostingData {
644 account: "Expenses:Food".to_string(),
645 units: Some(AmountData {
646 number: "-100".to_string(),
647 currency: "USD".to_string(),
648 }),
649 cost: None,
650 price: None,
651 flag: None,
652 metadata: vec![],
653 },
654 ],
655 };
656 assert_ne!(
657 structural_hash("2024-01-15", &txn1),
658 structural_hash("2024-01-15", &txn2)
659 );
660 }
661
662 #[test]
663 fn distinct_flags_differ() {
664 let mut txn1 = make_txn(None, "Test", "100");
665 txn1.flag = "*".to_string();
666 let mut txn2 = make_txn(None, "Test", "100");
667 txn2.flag = "!".to_string();
668 assert_ne!(
669 structural_hash("2024-01-15", &txn1),
670 structural_hash("2024-01-15", &txn2)
671 );
672 }
673
674 #[test]
675 fn link_order_independence() {
676 let mut txn1 = make_txn(None, "Test", "100");
677 txn1.links = vec!["link-a".to_string(), "link-b".to_string()];
678 let mut txn2 = make_txn(None, "Test", "100");
679 txn2.links = vec!["link-b".to_string(), "link-a".to_string()];
680 assert_eq!(
681 structural_hash("2024-01-15", &txn1),
682 structural_hash("2024-01-15", &txn2)
683 );
684 }
685
686 #[test]
687 fn duplicate_tags_are_deduped() {
688 let mut txn1 = make_txn(None, "Test", "100");
689 txn1.tags = vec!["a".to_string(), "a".to_string()];
690 let mut txn2 = make_txn(None, "Test", "100");
691 txn2.tags = vec!["a".to_string()];
692 assert_eq!(
693 structural_hash("2024-01-15", &txn1),
694 structural_hash("2024-01-15", &txn2)
695 );
696 }
697
698 #[test]
699 fn posting_flag_affects_hash() {
700 let mut txn1 = make_txn(None, "Test", "100");
701 txn1.postings[0].flag = Some("!".to_string());
702 let txn2 = make_txn(None, "Test", "100");
703 assert_ne!(
704 structural_hash("2024-01-15", &txn1),
705 structural_hash("2024-01-15", &txn2)
706 );
707 }
708
709 #[test]
710 fn fingerprint_none_amount() {
711 let fp1 = Fingerprint::compute("2024-01-15", None, "Store");
712 let fp2 = Fingerprint::compute("2024-01-15", Some("-50.00"), "Store");
713 assert_ne!(fp1, fp2);
714 }
715
716 #[test]
717 fn fingerprint_normalizes_amount() {
718 let fp1 = Fingerprint::compute("2024-01-15", Some("50"), "Store");
720 let fp2 = Fingerprint::compute("2024-01-15", Some("50.00"), "Store");
721 assert_eq!(fp1, fp2);
722 }
723
724 #[test]
725 fn fingerprint_from_transaction_no_postings() {
726 let txn = TransactionData {
727 flag: "*".to_string(),
728 payee: Some("Store".to_string()),
729 narration: "Test".to_string(),
730 tags: vec![],
731 links: vec![],
732 metadata: vec![],
733 postings: vec![],
734 };
735 let fp = Fingerprint::from_transaction("2024-01-15", &txn);
736 let expected = Fingerprint::compute("2024-01-15", None, "Store Test");
738 assert_eq!(fp, expected);
739 }
740
741 #[test]
742 fn fingerprint_error_display() {
743 let err = FingerprintError::InvalidLength(10);
744 assert_eq!(err.to_string(), "fingerprint hex must be 32 chars, got 10");
745 let err = FingerprintError::InvalidHex;
746 assert_eq!(err.to_string(), "invalid hex in fingerprint");
747 }
748}