use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use super::JournalEntry;
fn fnv1a(s: &str) -> u64 {
let mut h: u64 = 0xcbf2_9ce4_8422_2325;
for b in s.bytes() {
h ^= b as u64;
h = h.wrapping_mul(0x0000_0100_0000_01b3);
}
h
}
fn is_balance_sheet(account: &str) -> Option<bool> {
match account.chars().next() {
Some('1') | Some('2') | Some('3') => Some(true), Some('4'..='9') => Some(false), _ => None,
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct FingerprintListEntry {
pub l1_fingerprint: String,
pub structure: String,
pub je_count: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub struct JeFingerprint {
pub je_id: String,
pub l1_fingerprint: u64,
pub l2_fingerprint: u64,
pub bs_only: bool,
pub is_only: bool,
pub line_count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StructuralFingerprintReport {
pub je_count: usize,
pub l1_distinct: usize,
pub l2_distinct: usize,
pub bs_only_count: usize,
pub is_only_count: usize,
pub mixed_count: usize,
pub top_l1_fingerprints: Vec<FingerprintListEntry>,
#[serde(skip)]
pub per_je: Vec<JeFingerprint>,
}
impl StructuralFingerprintReport {
pub const DEFAULT_LIST_CAP: usize = 500;
pub fn from_entries(entries: &[JournalEntry], list_cap: usize) -> Self {
let mut per_je = Vec::with_capacity(entries.len());
let mut l1_listing: BTreeMap<u64, (String, usize)> = BTreeMap::new();
let mut l2_set = std::collections::BTreeSet::new();
let mut bs_only_count = 0usize;
let mut is_only_count = 0usize;
let mut mixed_count = 0usize;
for je in entries {
let mut l1_parts: Vec<String> = Vec::with_capacity(je.lines.len());
let mut l2_parts: Vec<String> = Vec::with_capacity(je.lines.len());
let mut any_bs = false;
let mut any_is = false;
for line in &je.lines {
let dc = if line.debit_amount > line.credit_amount {
"Dr"
} else {
"Cr"
};
l1_parts.push(format!("{dc}:{}", line.gl_account));
let prefix: String = line.gl_account.chars().take(2).collect();
l2_parts.push(format!("{dc}:{prefix}"));
match is_balance_sheet(&line.gl_account) {
Some(true) => any_bs = true,
Some(false) => any_is = true,
None => {}
}
}
l1_parts.sort();
l2_parts.sort();
let l1_struct = l1_parts.join("|");
let l2_struct = l2_parts.join("|");
let l1 = fnv1a(&l1_struct);
let l2 = fnv1a(&l2_struct);
let bs_only = any_bs && !any_is;
let is_only = any_is && !any_bs;
if bs_only {
bs_only_count += 1;
} else if is_only {
is_only_count += 1;
} else {
mixed_count += 1;
}
l1_listing
.entry(l1)
.or_insert_with(|| (l1_struct.clone(), 0))
.1 += 1;
l2_set.insert(l2);
per_je.push(JeFingerprint {
je_id: je.header.document_id.to_string(),
l1_fingerprint: l1,
l2_fingerprint: l2,
bs_only,
is_only,
line_count: je.lines.len(),
});
}
let l1_distinct = l1_listing.len();
let mut top: Vec<FingerprintListEntry> = l1_listing
.into_iter()
.map(|(fp, (structure, count))| FingerprintListEntry {
l1_fingerprint: format!("{fp:016x}"),
structure,
je_count: count,
})
.collect();
top.sort_by(|a, b| {
b.je_count
.cmp(&a.je_count)
.then(a.structure.cmp(&b.structure))
});
top.truncate(list_cap);
Self {
je_count: entries.len(),
l1_distinct,
l2_distinct: l2_set.len(),
bs_only_count,
is_only_count,
mixed_count,
top_l1_fingerprints: top,
per_je,
}
}
pub fn per_je_csv(&self) -> String {
let mut s =
String::from("je_id,l1_fingerprint,l2_fingerprint,bs_only,is_only,line_count\n");
for r in &self.per_je {
s.push_str(&format!(
"{},{:016x},{:016x},{},{},{}\n",
r.je_id, r.l1_fingerprint, r.l2_fingerprint, r.bs_only, r.is_only, r.line_count
));
}
s
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::journal_entry::{JournalEntry, JournalEntryHeader, JournalEntryLine};
use chrono::NaiveDate;
use rust_decimal::Decimal;
fn je(lines: Vec<(&str, i64, i64)>) -> JournalEntry {
let mut e = JournalEntry::new(JournalEntryHeader::new(
"1000".to_string(),
NaiveDate::from_ymd_opt(2026, 3, 15).unwrap(),
));
for (i, (acct, dr, cr)) in lines.into_iter().enumerate() {
let ln = if dr != 0 {
JournalEntryLine::debit(
e.header.document_id,
(i + 1) as u32,
acct.to_string(),
Decimal::from(dr),
)
} else {
JournalEntryLine::credit(
e.header.document_id,
(i + 1) as u32,
acct.to_string(),
Decimal::from(cr),
)
};
e.add_line(ln);
}
e
}
#[test]
fn same_structure_shares_l1_fingerprint() {
let entries = vec![
je(vec![("1100", 1000, 0), ("4000", 0, 1000)]),
je(vec![("1100", 7, 0), ("4000", 0, 7)]),
je(vec![("1200", 500, 0), ("2000", 0, 500)]), ];
let r = StructuralFingerprintReport::from_entries(&entries, 500);
assert_eq!(r.je_count, 3);
assert_eq!(r.l1_distinct, 2);
assert_eq!(r.per_je[0].l1_fingerprint, r.per_je[1].l1_fingerprint);
assert_ne!(r.per_je[0].l1_fingerprint, r.per_je[2].l1_fingerprint);
assert_eq!(r.top_l1_fingerprints[0].je_count, 2);
}
#[test]
fn bs_and_is_flags() {
let entries = vec![
je(vec![("1100", 1000, 0), ("2000", 0, 1000)]), je(vec![("4000", 0, 500), ("5000", 500, 0)]), je(vec![("1100", 1000, 0), ("4000", 0, 1000)]), ];
let r = StructuralFingerprintReport::from_entries(&entries, 500);
assert_eq!(r.bs_only_count, 1);
assert_eq!(r.is_only_count, 1);
assert_eq!(r.mixed_count, 1);
}
#[test]
fn deterministic() {
let entries = vec![je(vec![("1100", 1000, 0), ("4000", 0, 1000)])];
let a = StructuralFingerprintReport::from_entries(&entries, 500);
let b = StructuralFingerprintReport::from_entries(&entries, 500);
assert_eq!(a.per_je_csv(), b.per_je_csv());
assert_eq!(a.top_l1_fingerprints, b.top_l1_fingerprints);
}
}