1use std::collections::hash_map::DefaultHasher;
17use std::hash::{Hash, Hasher};
18use std::str::FromStr;
19
20use rustledger_plugin_types::{
21 AmountData, CostData, PostingData, PriceAnnotationData, TransactionData,
22};
23
24const ABSENT: u8 = 0;
27const PRESENT: u8 = 1;
28
29#[must_use]
38pub fn structural_hash(date: &str, txn: &TransactionData) -> u64 {
39 let TransactionData {
42 flag,
43 payee,
44 narration,
45 tags,
46 links,
47 metadata: _, postings,
49 } = txn;
50
51 let mut hasher = DefaultHasher::new();
52 date.hash(&mut hasher);
53 flag.hash(&mut hasher);
54 payee.hash(&mut hasher);
55 narration.hash(&mut hasher);
56
57 let mut sorted_tags: Vec<&String> = tags.iter().collect();
61 sorted_tags.sort();
62 sorted_tags.dedup();
63 sorted_tags.len().hash(&mut hasher);
64 for tag in sorted_tags {
65 tag.hash(&mut hasher);
66 }
67
68 let mut sorted_links: Vec<&String> = links.iter().collect();
69 sorted_links.sort();
70 sorted_links.dedup();
71 sorted_links.len().hash(&mut hasher);
72 for link in sorted_links {
73 link.hash(&mut hasher);
74 }
75
76 postings.len().hash(&mut hasher);
78 for posting in postings {
79 hash_posting(posting, &mut hasher);
80 }
81
82 hasher.finish()
83}
84
85fn hash_posting<H: Hasher>(posting: &PostingData, hasher: &mut H) {
87 let PostingData {
89 account,
90 units,
91 cost,
92 price,
93 flag,
94 metadata: _, span: _, } = posting;
97
98 account.hash(hasher);
99 match units {
100 Some(u) => {
101 PRESENT.hash(hasher);
102 hash_amount(u, hasher);
103 }
104 None => ABSENT.hash(hasher),
105 }
106 match cost {
107 Some(c) => {
108 PRESENT.hash(hasher);
109 hash_cost(c, hasher);
110 }
111 None => ABSENT.hash(hasher),
112 }
113 match price {
114 Some(p) => {
115 PRESENT.hash(hasher);
116 hash_price(p, hasher);
117 }
118 None => ABSENT.hash(hasher),
119 }
120 flag.hash(hasher);
121}
122
123fn hash_amount<H: Hasher>(amount: &AmountData, hasher: &mut H) {
125 let AmountData { number, currency } = amount;
126 number.hash(hasher);
127 currency.hash(hasher);
128}
129
130fn hash_cost<H: Hasher>(cost: &CostData, hasher: &mut H) {
132 use rustledger_plugin_types::CostNumberData;
133 let CostData {
134 number,
135 currency,
136 date,
137 label,
138 merge,
139 } = cost;
140 match number {
145 None => 0u8.hash(hasher),
146 Some(CostNumberData::PerUnit { value: s }) => {
147 1u8.hash(hasher);
148 s.hash(hasher);
149 }
150 Some(CostNumberData::Total { value: s }) => {
151 2u8.hash(hasher);
152 s.hash(hasher);
153 }
154 Some(CostNumberData::PerUnitFromTotal { per_unit, total }) => {
155 3u8.hash(hasher);
161 per_unit.hash(hasher);
162 total.hash(hasher);
163 }
164 }
165 currency.hash(hasher);
166 date.hash(hasher);
167 label.hash(hasher);
168 merge.hash(hasher);
169}
170
171fn hash_price<H: Hasher>(price: &PriceAnnotationData, hasher: &mut H) {
173 let PriceAnnotationData {
174 is_total,
175 amount,
176 number,
177 currency,
178 } = price;
179 is_total.hash(hasher);
180 match amount {
181 Some(a) => {
182 PRESENT.hash(hasher);
183 hash_amount(a, hasher);
184 }
185 None => ABSENT.hash(hasher),
186 }
187 number.hash(hasher);
188 currency.hash(hasher);
189}
190
191#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
205pub struct Fingerprint(pub [u8; 16]);
206
207impl Fingerprint {
208 #[must_use]
213 pub fn compute(date: &str, amount: Option<&str>, text: &str) -> Self {
214 let normalized = normalize_text(text);
215 let mut hasher = blake3::Hasher::new();
216 hasher.update(date.as_bytes());
217 hasher.update(b"|");
218 if let Some(amt) = amount {
219 let normalized_amt = rust_decimal::Decimal::from_str(amt)
221 .map_or_else(|_| amt.to_string(), |d| d.normalize().to_string());
222 hasher.update(normalized_amt.as_bytes());
223 }
224 hasher.update(b"|");
225 hasher.update(normalized.as_bytes());
226 let hash = hasher.finalize();
227 let mut bytes = [0u8; 16];
228 bytes.copy_from_slice(&hash.as_bytes()[..16]);
229 Self(bytes)
230 }
231
232 #[must_use]
236 pub fn from_transaction(date: &str, txn: &TransactionData) -> Self {
237 let amount = txn
238 .postings
239 .first()
240 .and_then(|p| p.units.as_ref())
241 .map(|u| u.number.as_str());
242
243 let mut text = String::new();
244 if let Some(ref payee) = txn.payee {
245 text.push_str(payee);
246 text.push(' ');
247 }
248 text.push_str(&txn.narration);
249
250 Self::compute(date, amount, &text)
251 }
252
253 #[must_use]
255 pub fn to_hex(&self) -> String {
256 let mut s = String::with_capacity(32);
257 for byte in &self.0 {
258 use std::fmt::Write;
259 write!(s, "{byte:02x}").expect("hex write to String cannot fail");
261 }
262 s
263 }
264
265 pub fn from_hex(s: &str) -> Result<Self, FingerprintError> {
271 if s.len() != 32 {
272 return Err(FingerprintError::InvalidLength(s.len()));
273 }
274 let mut bytes = [0u8; 16];
275 for (i, chunk) in s.as_bytes().chunks(2).enumerate() {
276 let hex_str = std::str::from_utf8(chunk).map_err(|_| FingerprintError::InvalidHex)?;
277 bytes[i] = u8::from_str_radix(hex_str, 16).map_err(|_| FingerprintError::InvalidHex)?;
278 }
279 Ok(Self(bytes))
280 }
281}
282
283impl std::fmt::Display for Fingerprint {
284 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
285 f.write_str(&self.to_hex())
286 }
287}
288
289#[derive(Debug, Clone, PartialEq, Eq)]
291pub enum FingerprintError {
292 InvalidLength(usize),
294 InvalidHex,
296}
297
298impl std::fmt::Display for FingerprintError {
299 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
300 match self {
301 Self::InvalidLength(len) => {
302 write!(f, "fingerprint hex must be 32 chars, got {len}")
303 }
304 Self::InvalidHex => f.write_str("invalid hex in fingerprint"),
305 }
306 }
307}
308
309impl std::error::Error for FingerprintError {}
310
311fn normalize_text(text: &str) -> String {
313 text.to_lowercase()
314 .split_whitespace()
315 .collect::<Vec<_>>()
316 .join(" ")
317}
318
319#[cfg(test)]
320mod tests {
321 use super::*;
322 use rustledger_plugin_types::{
323 AmountData, CostData, PostingData, PriceAnnotationData, TransactionData,
324 };
325
326 fn make_txn(payee: Option<&str>, narration: &str, amount: &str) -> TransactionData {
327 TransactionData {
328 flag: "*".to_string(),
329 payee: payee.map(String::from),
330 narration: narration.to_string(),
331 tags: vec![],
332 links: vec![],
333 metadata: vec![],
334 postings: vec![PostingData {
335 account: "Assets:Bank".to_string(),
336 units: Some(AmountData {
337 number: amount.to_string(),
338 currency: "USD".to_string(),
339 }),
340 cost: None,
341 price: None,
342 flag: None,
343 metadata: vec![],
344 span: None,
345 }],
346 }
347 }
348
349 #[test]
350 fn identical_transactions_produce_same_hash() {
351 let txn1 = make_txn(Some("Store"), "Groceries", "-50.00");
352 let txn2 = make_txn(Some("Store"), "Groceries", "-50.00");
353 assert_eq!(
354 structural_hash("2024-01-15", &txn1),
355 structural_hash("2024-01-15", &txn2)
356 );
357 }
358
359 #[test]
360 fn different_dates_produce_different_hash() {
361 let txn = make_txn(Some("Store"), "Groceries", "-50.00");
362 assert_ne!(
363 structural_hash("2024-01-15", &txn),
364 structural_hash("2024-01-16", &txn)
365 );
366 }
367
368 #[test]
369 fn different_amounts_produce_different_hash() {
370 let txn1 = make_txn(Some("Store"), "Groceries", "-50.00");
371 let txn2 = make_txn(Some("Store"), "Groceries", "-51.00");
372 assert_ne!(
373 structural_hash("2024-01-15", &txn1),
374 structural_hash("2024-01-15", &txn2)
375 );
376 }
377
378 #[test]
379 fn metadata_does_not_affect_hash() {
380 let mut txn1 = make_txn(Some("Store"), "Groceries", "-50.00");
381 let txn2 = make_txn(Some("Store"), "Groceries", "-50.00");
382 txn1.metadata.push((
383 "source".to_string(),
384 rustledger_plugin_types::MetaValueData::String("test".to_string()),
385 ));
386 assert_eq!(
387 structural_hash("2024-01-15", &txn1),
388 structural_hash("2024-01-15", &txn2)
389 );
390 }
391
392 #[test]
393 fn tag_order_does_not_affect_hash() {
394 let mut txn1 = make_txn(None, "Test", "100");
395 txn1.tags = vec!["a".to_string(), "b".to_string()];
396 let mut txn2 = make_txn(None, "Test", "100");
397 txn2.tags = vec!["b".to_string(), "a".to_string()];
398 assert_eq!(
399 structural_hash("2024-01-15", &txn1),
400 structural_hash("2024-01-15", &txn2)
401 );
402 }
403
404 #[test]
405 fn different_tags_produce_different_hash() {
406 let mut txn1 = make_txn(None, "Test", "100");
407 txn1.tags = vec!["a".to_string()];
408 let mut txn2 = make_txn(None, "Test", "100");
409 txn2.tags = vec!["b".to_string()];
410 assert_ne!(
411 structural_hash("2024-01-15", &txn1),
412 structural_hash("2024-01-15", &txn2)
413 );
414 }
415
416 #[test]
419 fn fingerprint_deterministic() {
420 let fp1 = Fingerprint::compute("2024-01-15", Some("-50.00"), "WHOLE FOODS");
421 let fp2 = Fingerprint::compute("2024-01-15", Some("-50.00"), "WHOLE FOODS");
422 assert_eq!(fp1, fp2);
423 }
424
425 #[test]
426 fn fingerprint_different_dates() {
427 let fp1 = Fingerprint::compute("2024-01-15", Some("-50.00"), "Store");
428 let fp2 = Fingerprint::compute("2024-01-16", Some("-50.00"), "Store");
429 assert_ne!(fp1, fp2);
430 }
431
432 #[test]
433 fn fingerprint_different_amounts() {
434 let fp1 = Fingerprint::compute("2024-01-15", Some("-50.00"), "Store");
435 let fp2 = Fingerprint::compute("2024-01-15", Some("-51.00"), "Store");
436 assert_ne!(fp1, fp2);
437 }
438
439 #[test]
440 fn fingerprint_normalizes_text() {
441 let fp1 = Fingerprint::compute("2024-01-15", Some("-50"), "WHOLE FOODS MARKET");
443 let fp2 = Fingerprint::compute("2024-01-15", Some("-50"), "whole foods market");
444 assert_eq!(fp1, fp2);
445 }
446
447 #[test]
448 fn fingerprint_from_transaction() {
449 let txn = make_txn(Some("Store"), "Groceries", "-50.00");
450 let fp = Fingerprint::from_transaction("2024-01-15", &txn);
451 let expected = Fingerprint::compute("2024-01-15", Some("-50.00"), "Store Groceries");
453 assert_eq!(fp, expected);
454 }
455
456 #[test]
457 fn fingerprint_hex_roundtrip() {
458 let fp = Fingerprint::compute("2024-01-15", Some("-50.00"), "Test");
459 let hex = fp.to_hex();
460 assert_eq!(hex.len(), 32);
461 let fp2 = Fingerprint::from_hex(&hex).unwrap();
462 assert_eq!(fp, fp2);
463 }
464
465 #[test]
466 fn fingerprint_display() {
467 let fp = Fingerprint::compute("2024-01-15", Some("-50.00"), "Test");
468 let display = format!("{fp}");
469 assert_eq!(display, fp.to_hex());
470 }
471
472 #[test]
473 fn fingerprint_from_hex_invalid_length() {
474 let err = Fingerprint::from_hex("abcd").unwrap_err();
475 assert_eq!(err, FingerprintError::InvalidLength(4));
476 }
477
478 #[test]
479 fn fingerprint_from_hex_invalid_chars() {
480 let err = Fingerprint::from_hex("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz").unwrap_err();
481 assert_eq!(err, FingerprintError::InvalidHex);
482 }
483
484 #[test]
487 fn distinct_costs_produce_different_hashes() {
488 use rustledger_plugin_types::CostNumberData;
489 let mut txn1 = make_txn(Some("Store"), "Buy shares", "100.00");
490 txn1.postings[0].cost = Some(CostData {
491 number: Some(CostNumberData::PerUnit {
492 value: "10.00".to_string(),
493 }),
494 currency: Some("USD".to_string()),
495 date: None,
496 label: None,
497 merge: false,
498 });
499 let mut txn2 = make_txn(Some("Store"), "Buy shares", "100.00");
500 txn2.postings[0].cost = Some(CostData {
501 number: Some(CostNumberData::PerUnit {
502 value: "11.00".to_string(),
503 }),
504 currency: Some("USD".to_string()),
505 date: None,
506 label: None,
507 merge: false,
508 });
509 assert_ne!(
510 structural_hash("2024-01-15", &txn1),
511 structural_hash("2024-01-15", &txn2)
512 );
513 }
514
515 #[test]
516 fn distinct_prices_produce_different_hashes() {
517 let mut txn1 = make_txn(Some("Store"), "Buy shares", "100.00");
518 txn1.postings[0].price = Some(PriceAnnotationData {
519 is_total: false,
520 amount: Some(AmountData {
521 number: "10.00".to_string(),
522 currency: "USD".to_string(),
523 }),
524 number: None,
525 currency: None,
526 });
527 let mut txn2 = make_txn(Some("Store"), "Buy shares", "100.00");
528 txn2.postings[0].price = Some(PriceAnnotationData {
529 is_total: false,
530 amount: Some(AmountData {
531 number: "11.00".to_string(),
532 currency: "USD".to_string(),
533 }),
534 number: None,
535 currency: None,
536 });
537 assert_ne!(
538 structural_hash("2024-01-15", &txn1),
539 structural_hash("2024-01-15", &txn2)
540 );
541 }
542
543 #[test]
544 fn reordered_postings_produce_different_hashes() {
545 let posting_a = PostingData {
546 account: "Assets:Bank".to_string(),
547 units: Some(AmountData {
548 number: "-50.00".to_string(),
549 currency: "USD".to_string(),
550 }),
551 cost: None,
552 price: None,
553 flag: None,
554 metadata: vec![],
555 span: None,
556 };
557 let posting_b = PostingData {
558 account: "Expenses:Food".to_string(),
559 units: Some(AmountData {
560 number: "50.00".to_string(),
561 currency: "USD".to_string(),
562 }),
563 cost: None,
564 price: None,
565 flag: None,
566 metadata: vec![],
567 span: None,
568 };
569
570 let txn1 = TransactionData {
571 flag: "*".to_string(),
572 payee: None,
573 narration: "Test".to_string(),
574 tags: vec![],
575 links: vec![],
576 metadata: vec![],
577 postings: vec![posting_a.clone(), posting_b.clone()],
578 };
579 let txn2 = TransactionData {
580 flag: "*".to_string(),
581 payee: None,
582 narration: "Test".to_string(),
583 tags: vec![],
584 links: vec![],
585 metadata: vec![],
586 postings: vec![posting_b, posting_a],
587 };
588 assert_ne!(
589 structural_hash("2024-01-15", &txn1),
590 structural_hash("2024-01-15", &txn2)
591 );
592 }
593
594 #[test]
595 fn none_vs_empty_payee_differ() {
596 let txn_none = make_txn(None, "Test", "100");
597 let txn_empty = TransactionData {
598 flag: "*".to_string(),
599 payee: Some(String::new()),
600 narration: "Test".to_string(),
601 tags: vec![],
602 links: vec![],
603 metadata: vec![],
604 postings: vec![PostingData {
605 account: "Assets:Bank".to_string(),
606 units: Some(AmountData {
607 number: "100".to_string(),
608 currency: "USD".to_string(),
609 }),
610 cost: None,
611 price: None,
612 flag: None,
613 metadata: vec![],
614 span: None,
615 }],
616 };
617 assert_ne!(
618 structural_hash("2024-01-15", &txn_none),
619 structural_hash("2024-01-15", &txn_empty)
620 );
621 }
622
623 #[test]
624 fn empty_vs_absent_tags_are_duplicates() {
625 let txn1 = make_txn(None, "Test", "100");
627 let txn2 = TransactionData {
628 flag: "*".to_string(),
629 payee: None,
630 narration: "Test".to_string(),
631 tags: vec![],
632 links: vec![],
633 metadata: vec![],
634 postings: vec![PostingData {
635 account: "Assets:Bank".to_string(),
636 units: Some(AmountData {
637 number: "100".to_string(),
638 currency: "USD".to_string(),
639 }),
640 cost: None,
641 price: None,
642 flag: None,
643 metadata: vec![],
644 span: None,
645 }],
646 };
647 assert_eq!(
648 structural_hash("2024-01-15", &txn1),
649 structural_hash("2024-01-15", &txn2)
650 );
651 }
652
653 #[test]
654 fn distinct_posting_counts_differ() {
655 let txn1 = make_txn(None, "Test", "100");
656 let txn2 = TransactionData {
657 flag: "*".to_string(),
658 payee: None,
659 narration: "Test".to_string(),
660 tags: vec![],
661 links: vec![],
662 metadata: vec![],
663 postings: vec![
664 PostingData {
665 account: "Assets:Bank".to_string(),
666 units: Some(AmountData {
667 number: "100".to_string(),
668 currency: "USD".to_string(),
669 }),
670 cost: None,
671 price: None,
672 flag: None,
673 metadata: vec![],
674 span: None,
675 },
676 PostingData {
677 account: "Expenses:Food".to_string(),
678 units: Some(AmountData {
679 number: "-100".to_string(),
680 currency: "USD".to_string(),
681 }),
682 cost: None,
683 price: None,
684 flag: None,
685 metadata: vec![],
686 span: None,
687 },
688 ],
689 };
690 assert_ne!(
691 structural_hash("2024-01-15", &txn1),
692 structural_hash("2024-01-15", &txn2)
693 );
694 }
695
696 #[test]
697 fn distinct_flags_differ() {
698 let mut txn1 = make_txn(None, "Test", "100");
699 txn1.flag = "*".to_string();
700 let mut txn2 = make_txn(None, "Test", "100");
701 txn2.flag = "!".to_string();
702 assert_ne!(
703 structural_hash("2024-01-15", &txn1),
704 structural_hash("2024-01-15", &txn2)
705 );
706 }
707
708 #[test]
709 fn link_order_independence() {
710 let mut txn1 = make_txn(None, "Test", "100");
711 txn1.links = vec!["link-a".to_string(), "link-b".to_string()];
712 let mut txn2 = make_txn(None, "Test", "100");
713 txn2.links = vec!["link-b".to_string(), "link-a".to_string()];
714 assert_eq!(
715 structural_hash("2024-01-15", &txn1),
716 structural_hash("2024-01-15", &txn2)
717 );
718 }
719
720 #[test]
721 fn duplicate_tags_are_deduped() {
722 let mut txn1 = make_txn(None, "Test", "100");
723 txn1.tags = vec!["a".to_string(), "a".to_string()];
724 let mut txn2 = make_txn(None, "Test", "100");
725 txn2.tags = vec!["a".to_string()];
726 assert_eq!(
727 structural_hash("2024-01-15", &txn1),
728 structural_hash("2024-01-15", &txn2)
729 );
730 }
731
732 #[test]
733 fn posting_flag_affects_hash() {
734 let mut txn1 = make_txn(None, "Test", "100");
735 txn1.postings[0].flag = Some("!".to_string());
736 let txn2 = make_txn(None, "Test", "100");
737 assert_ne!(
738 structural_hash("2024-01-15", &txn1),
739 structural_hash("2024-01-15", &txn2)
740 );
741 }
742
743 #[test]
744 fn fingerprint_none_amount() {
745 let fp1 = Fingerprint::compute("2024-01-15", None, "Store");
746 let fp2 = Fingerprint::compute("2024-01-15", Some("-50.00"), "Store");
747 assert_ne!(fp1, fp2);
748 }
749
750 #[test]
751 fn fingerprint_normalizes_amount() {
752 let fp1 = Fingerprint::compute("2024-01-15", Some("50"), "Store");
754 let fp2 = Fingerprint::compute("2024-01-15", Some("50.00"), "Store");
755 assert_eq!(fp1, fp2);
756 }
757
758 #[test]
759 fn fingerprint_from_transaction_no_postings() {
760 let txn = TransactionData {
761 flag: "*".to_string(),
762 payee: Some("Store".to_string()),
763 narration: "Test".to_string(),
764 tags: vec![],
765 links: vec![],
766 metadata: vec![],
767 postings: vec![],
768 };
769 let fp = Fingerprint::from_transaction("2024-01-15", &txn);
770 let expected = Fingerprint::compute("2024-01-15", None, "Store Test");
772 assert_eq!(fp, expected);
773 }
774
775 #[test]
776 fn fingerprint_error_display() {
777 let err = FingerprintError::InvalidLength(10);
778 assert_eq!(err.to_string(), "fingerprint hex must be 32 chars, got 10");
779 let err = FingerprintError::InvalidHex;
780 assert_eq!(err.to_string(), "invalid hex in fingerprint");
781 }
782}