1use std::collections::BTreeMap;
16
17use serde::{Deserialize, Serialize};
18
19use super::JournalEntry;
20
21fn fnv1a(s: &str) -> u64 {
23 let mut h: u64 = 0xcbf2_9ce4_8422_2325;
24 for b in s.bytes() {
25 h ^= b as u64;
26 h = h.wrapping_mul(0x0000_0100_0000_01b3);
27 }
28 h
29}
30
31fn is_balance_sheet(account: &str) -> Option<bool> {
33 match account.chars().next() {
34 Some('1') | Some('2') | Some('3') => Some(true), Some('4'..='9') => Some(false), _ => None,
37 }
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
42pub struct FingerprintListEntry {
43 pub l1_fingerprint: String,
44 pub structure: String,
46 pub je_count: usize,
47}
48
49#[derive(Debug, Clone, PartialEq)]
52pub struct JeFingerprint {
53 pub je_id: String,
54 pub l1_fingerprint: u64,
55 pub l2_fingerprint: u64,
56 pub bs_only: bool,
57 pub is_only: bool,
58 pub line_count: usize,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct StructuralFingerprintReport {
64 pub je_count: usize,
65 pub l1_distinct: usize,
66 pub l2_distinct: usize,
67 pub bs_only_count: usize,
68 pub is_only_count: usize,
69 pub mixed_count: usize,
70 pub top_l1_fingerprints: Vec<FingerprintListEntry>,
72 #[serde(skip)]
74 pub per_je: Vec<JeFingerprint>,
75}
76
77impl StructuralFingerprintReport {
78 pub const DEFAULT_LIST_CAP: usize = 500;
79
80 pub fn from_entries(entries: &[JournalEntry], list_cap: usize) -> Self {
82 let mut per_je = Vec::with_capacity(entries.len());
83 let mut l1_listing: BTreeMap<u64, (String, usize)> = BTreeMap::new();
85 let mut l2_set = std::collections::BTreeSet::new();
86 let mut bs_only_count = 0usize;
87 let mut is_only_count = 0usize;
88 let mut mixed_count = 0usize;
89
90 for je in entries {
91 let mut l1_parts: Vec<String> = Vec::with_capacity(je.lines.len());
93 let mut l2_parts: Vec<String> = Vec::with_capacity(je.lines.len());
94 let mut any_bs = false;
95 let mut any_is = false;
96 for line in &je.lines {
97 let dc = if line.debit_amount > line.credit_amount {
98 "Dr"
99 } else {
100 "Cr"
101 };
102 l1_parts.push(format!("{dc}:{}", line.gl_account));
103 let prefix: String = line.gl_account.chars().take(2).collect();
104 l2_parts.push(format!("{dc}:{prefix}"));
105 match is_balance_sheet(&line.gl_account) {
106 Some(true) => any_bs = true,
107 Some(false) => any_is = true,
108 None => {}
109 }
110 }
111 l1_parts.sort();
112 l2_parts.sort();
113 let l1_struct = l1_parts.join("|");
114 let l2_struct = l2_parts.join("|");
115 let l1 = fnv1a(&l1_struct);
116 let l2 = fnv1a(&l2_struct);
117
118 let bs_only = any_bs && !any_is;
119 let is_only = any_is && !any_bs;
120 if bs_only {
121 bs_only_count += 1;
122 } else if is_only {
123 is_only_count += 1;
124 } else {
125 mixed_count += 1;
126 }
127
128 l1_listing
129 .entry(l1)
130 .or_insert_with(|| (l1_struct.clone(), 0))
131 .1 += 1;
132 l2_set.insert(l2);
133
134 per_je.push(JeFingerprint {
135 je_id: je.header.document_id.to_string(),
136 l1_fingerprint: l1,
137 l2_fingerprint: l2,
138 bs_only,
139 is_only,
140 line_count: je.lines.len(),
141 });
142 }
143
144 let l1_distinct = l1_listing.len();
145 let mut top: Vec<FingerprintListEntry> = l1_listing
146 .into_iter()
147 .map(|(fp, (structure, count))| FingerprintListEntry {
148 l1_fingerprint: format!("{fp:016x}"),
149 structure,
150 je_count: count,
151 })
152 .collect();
153 top.sort_by(|a, b| {
154 b.je_count
155 .cmp(&a.je_count)
156 .then(a.structure.cmp(&b.structure))
157 });
158 top.truncate(list_cap);
159
160 Self {
161 je_count: entries.len(),
162 l1_distinct,
163 l2_distinct: l2_set.len(),
164 bs_only_count,
165 is_only_count,
166 mixed_count,
167 top_l1_fingerprints: top,
168 per_je,
169 }
170 }
171
172 pub fn per_je_csv(&self) -> String {
174 let mut s =
175 String::from("je_id,l1_fingerprint,l2_fingerprint,bs_only,is_only,line_count\n");
176 for r in &self.per_je {
177 s.push_str(&format!(
178 "{},{:016x},{:016x},{},{},{}\n",
179 r.je_id, r.l1_fingerprint, r.l2_fingerprint, r.bs_only, r.is_only, r.line_count
180 ));
181 }
182 s
183 }
184}
185
186#[cfg(test)]
187mod tests {
188 use super::*;
189 use crate::models::journal_entry::{JournalEntry, JournalEntryHeader, JournalEntryLine};
190 use chrono::NaiveDate;
191 use rust_decimal::Decimal;
192
193 fn je(lines: Vec<(&str, i64, i64)>) -> JournalEntry {
194 let mut e = JournalEntry::new(JournalEntryHeader::new(
195 "1000".to_string(),
196 NaiveDate::from_ymd_opt(2026, 3, 15).unwrap(),
197 ));
198 for (i, (acct, dr, cr)) in lines.into_iter().enumerate() {
199 let ln = if dr != 0 {
200 JournalEntryLine::debit(
201 e.header.document_id,
202 (i + 1) as u32,
203 acct.to_string(),
204 Decimal::from(dr),
205 )
206 } else {
207 JournalEntryLine::credit(
208 e.header.document_id,
209 (i + 1) as u32,
210 acct.to_string(),
211 Decimal::from(cr),
212 )
213 };
214 e.add_line(ln);
215 }
216 e
217 }
218
219 #[test]
220 fn same_structure_shares_l1_fingerprint() {
221 let entries = vec![
223 je(vec![("1100", 1000, 0), ("4000", 0, 1000)]),
224 je(vec![("1100", 7, 0), ("4000", 0, 7)]),
225 je(vec![("1200", 500, 0), ("2000", 0, 500)]), ];
227 let r = StructuralFingerprintReport::from_entries(&entries, 500);
228 assert_eq!(r.je_count, 3);
229 assert_eq!(r.l1_distinct, 2);
230 assert_eq!(r.per_je[0].l1_fingerprint, r.per_je[1].l1_fingerprint);
231 assert_ne!(r.per_je[0].l1_fingerprint, r.per_je[2].l1_fingerprint);
232 assert_eq!(r.top_l1_fingerprints[0].je_count, 2);
234 }
235
236 #[test]
237 fn bs_and_is_flags() {
238 let entries = vec![
239 je(vec![("1100", 1000, 0), ("2000", 0, 1000)]), je(vec![("4000", 0, 500), ("5000", 500, 0)]), je(vec![("1100", 1000, 0), ("4000", 0, 1000)]), ];
243 let r = StructuralFingerprintReport::from_entries(&entries, 500);
244 assert_eq!(r.bs_only_count, 1);
245 assert_eq!(r.is_only_count, 1);
246 assert_eq!(r.mixed_count, 1);
247 }
248
249 #[test]
250 fn deterministic() {
251 let entries = vec![je(vec![("1100", 1000, 0), ("4000", 0, 1000)])];
252 let a = StructuralFingerprintReport::from_entries(&entries, 500);
253 let b = StructuralFingerprintReport::from_entries(&entries, 500);
254 assert_eq!(a.per_je_csv(), b.per_je_csv());
255 assert_eq!(a.top_l1_fingerprints, b.top_l1_fingerprints);
256 }
257}