datasynth_generators/concentration/
trading_partner_pool.rs1use std::collections::BTreeMap;
17
18use datasynth_config::schema::TradingPartnerPoolPassConfig;
19use datasynth_core::models::JournalEntry;
20use rand_chacha::ChaCha8Rng;
21
22use super::{ConcentrationPass, ConcentrationStats};
23
24const PASS_NAME: &str = "trading_partner_pool";
25
26pub struct TradingPartnerPoolPass {
27 target_size: u64,
28}
29
30impl TradingPartnerPoolPass {
31 pub fn new(cfg: TradingPartnerPoolPassConfig) -> Self {
32 Self {
33 target_size: cfg.target_size.max(1) as u64,
35 }
36 }
37
38 #[inline]
41 fn pool_index(&self, tp: &str) -> u64 {
42 const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
43 const FNV_PRIME: u64 = 0x0000_0100_0000_01B3;
44 let mut h = FNV_OFFSET;
45 for b in tp.as_bytes() {
46 h ^= *b as u64;
47 h = h.wrapping_mul(FNV_PRIME);
48 }
49 h % self.target_size
50 }
51
52 #[inline]
53 fn canonical_tp(&self, original: &str) -> String {
54 format!("TP-{:06}", self.pool_index(original))
55 }
56}
57
58impl ConcentrationPass for TradingPartnerPoolPass {
59 fn name(&self) -> &'static str {
60 PASS_NAME
61 }
62
63 fn apply(&self, entries: &mut [JournalEntry], _rng: &mut ChaCha8Rng) -> ConcentrationStats {
64 let mut lines_modified: u64 = 0;
65 let mut entries_modified: usize = 0;
66 for je in entries.iter_mut() {
67 let mut je_touched = false;
68 for line in je.lines.iter_mut() {
69 if let Some(tp) = line.trading_partner.as_ref() {
70 let new_tp = self.canonical_tp(tp);
71 if &new_tp != tp {
72 line.trading_partner = Some(new_tp);
73 lines_modified += 1;
74 je_touched = true;
75 }
76 }
77 }
78 if je_touched {
79 entries_modified += 1;
80 }
81 }
82 let mut extra = BTreeMap::new();
83 extra.insert("lines_modified", lines_modified);
84 extra.insert("target_pool_size", self.target_size);
85 ConcentrationStats {
86 pass: PASS_NAME,
87 entries_examined: entries.len(),
88 entries_modified,
89 extra,
90 }
91 }
92}
93
94#[cfg(test)]
95mod tests {
96 use super::*;
97 use chrono::NaiveDate;
98 use datasynth_core::models::{JournalEntry, JournalEntryLine};
99 use rand::SeedableRng;
100 use std::collections::HashSet;
101
102 fn make_je(idx: usize, tp: Option<&str>) -> JournalEntry {
103 let mut je = JournalEntry::new_simple(
104 format!("JE{idx}"),
105 "C1".to_string(),
106 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
107 format!("test {idx}"),
108 );
109 let line = JournalEntryLine {
110 gl_account: "6000".to_string(),
111 trading_partner: tp.map(String::from),
112 ..JournalEntryLine::default()
113 };
114 je.lines.push(line);
115 je
116 }
117
118 #[test]
119 fn converges_to_target_pool_size() {
120 let mut entries: Vec<JournalEntry> = (0..200)
122 .map(|i| make_je(i, Some(&format!("V-{:06}", i))))
123 .collect();
124
125 let pass = TradingPartnerPoolPass::new(TradingPartnerPoolPassConfig { target_size: 25 });
126 let mut rng = ChaCha8Rng::seed_from_u64(7);
127 let stats = pass.apply(&mut entries, &mut rng);
128
129 let distinct: HashSet<&String> = entries
130 .iter()
131 .filter_map(|je| je.lines[0].trading_partner.as_ref())
132 .collect();
133 assert!(
134 distinct.len() <= 25,
135 "pool exceeded target: {}",
136 distinct.len()
137 );
138 assert!(
141 distinct.len() >= 12,
142 "pool under-filled below half: {}",
143 distinct.len()
144 );
145 assert_eq!(stats.entries_examined, 200);
146 assert_eq!(stats.extra["target_pool_size"], 25);
147 }
148
149 #[test]
150 fn deterministic_under_same_seed() {
151 let make_batch = || -> Vec<JournalEntry> {
152 (0..50)
153 .map(|i| make_je(i, Some(&format!("V-orig-{i:04}"))))
154 .collect()
155 };
156 let cfg = TradingPartnerPoolPassConfig { target_size: 8 };
157 let pass_a = TradingPartnerPoolPass::new(cfg.clone());
158 let pass_b = TradingPartnerPoolPass::new(cfg);
159
160 let mut batch_a = make_batch();
161 let mut batch_b = make_batch();
162 let mut rng_a = ChaCha8Rng::seed_from_u64(123);
163 let mut rng_b = ChaCha8Rng::seed_from_u64(123);
164 pass_a.apply(&mut batch_a, &mut rng_a);
165 pass_b.apply(&mut batch_b, &mut rng_b);
166
167 for (a, b) in batch_a.iter().zip(batch_b.iter()) {
168 assert_eq!(a.lines[0].trading_partner, b.lines[0].trading_partner);
169 }
170 }
171
172 #[test]
173 fn preserves_lines_without_trading_partner() {
174 let mut entries: Vec<JournalEntry> = (0..10).map(|i| make_je(i, None)).collect();
175 let pass = TradingPartnerPoolPass::new(TradingPartnerPoolPassConfig { target_size: 5 });
176 let mut rng = ChaCha8Rng::seed_from_u64(0);
177 let stats = pass.apply(&mut entries, &mut rng);
178 assert_eq!(stats.entries_modified, 0);
179 assert_eq!(stats.extra["lines_modified"], 0);
180 for je in &entries {
181 assert!(je.lines[0].trading_partner.is_none());
182 }
183 }
184
185 #[test]
186 fn zero_target_size_is_clamped_to_one() {
187 let pass = TradingPartnerPoolPass::new(TradingPartnerPoolPassConfig { target_size: 0 });
188 let mut entries = vec![make_je(0, Some("V-000001"))];
189 let mut rng = ChaCha8Rng::seed_from_u64(0);
190 let _ = pass.apply(&mut entries, &mut rng);
191 assert_eq!(
193 entries[0].lines[0].trading_partner.as_deref(),
194 Some("TP-000000")
195 );
196 }
197}