datasynth_generators/concentration/
source_blanking.rs1use std::collections::BTreeMap;
27
28use datasynth_config::schema::SourceBlankingPassConfig;
29use datasynth_core::models::JournalEntry;
30use rand::prelude::*;
31use rand_chacha::ChaCha8Rng;
32
33use super::{ConcentrationPass, ConcentrationStats};
34
35const PASS_NAME: &str = "source_blanking";
36
37pub struct SourceBlankingPass {
38 rate: f64,
39}
40
41impl SourceBlankingPass {
42 pub fn new(cfg: SourceBlankingPassConfig) -> Self {
43 Self {
44 rate: cfg.rate.clamp(0.0, 1.0),
46 }
47 }
48}
49
50impl ConcentrationPass for SourceBlankingPass {
51 fn name(&self) -> &'static str {
52 PASS_NAME
53 }
54
55 fn apply(&self, entries: &mut [JournalEntry], rng: &mut ChaCha8Rng) -> ConcentrationStats {
56 if self.rate == 0.0 {
57 return ConcentrationStats {
59 pass: PASS_NAME,
60 entries_examined: entries.len(),
61 entries_modified: 0,
62 extra: BTreeMap::new(),
63 };
64 }
65
66 let mut blanked: usize = 0;
67 let mut already_blank: u64 = 0;
68 for je in entries.iter_mut() {
69 if je.header.sap_source_code.is_none() {
70 already_blank += 1;
71 continue;
72 }
73 let draw: f64 = rng.random();
74 if draw < self.rate {
75 je.header.sap_source_code = None;
76 blanked += 1;
77 }
78 }
79
80 let mut extra = BTreeMap::new();
81 extra.insert("blanked", blanked as u64);
82 extra.insert("already_blank", already_blank);
83 let total = entries.len() as u64;
86 let target_bp = (self.rate * 10_000.0) as u64;
87 extra.insert("target_rate_bp", target_bp);
88 if let Some(eff_bp) = (blanked as u64 * 10_000).checked_div(total) {
89 extra.insert("effective_rate_bp", eff_bp);
90 }
91
92 ConcentrationStats {
93 pass: PASS_NAME,
94 entries_examined: entries.len(),
95 entries_modified: blanked,
96 extra,
97 }
98 }
99}
100
101#[cfg(test)]
102mod tests {
103 use super::*;
104 use chrono::NaiveDate;
105 use datasynth_core::models::{JournalEntry, JournalEntryLine};
106 use rand::SeedableRng;
107
108 fn make_je(idx: usize, source: Option<&str>) -> JournalEntry {
109 let mut je = JournalEntry::new_simple(
110 format!("JE{idx}"),
111 "C1".to_string(),
112 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
113 format!("test {idx}"),
114 );
115 je.header.sap_source_code = source.map(String::from);
116 let line = JournalEntryLine {
117 gl_account: "6000".to_string(),
118 ..JournalEntryLine::default()
119 };
120 je.lines.push(line);
121 je
122 }
123
124 #[test]
125 fn rate_zero_leaves_all_sources_intact() {
126 let mut entries: Vec<JournalEntry> = (0..100).map(|i| make_je(i, Some("BKPF"))).collect();
127 let pass = SourceBlankingPass::new(SourceBlankingPassConfig { rate: 0.0 });
128 let mut rng = ChaCha8Rng::seed_from_u64(0);
129 let stats = pass.apply(&mut entries, &mut rng);
130 assert_eq!(stats.entries_modified, 0);
131 for je in &entries {
132 assert_eq!(je.header.sap_source_code.as_deref(), Some("BKPF"));
133 }
134 }
135
136 #[test]
137 fn rate_one_blanks_every_source() {
138 let mut entries: Vec<JournalEntry> = (0..100).map(|i| make_je(i, Some("BKPF"))).collect();
139 let pass = SourceBlankingPass::new(SourceBlankingPassConfig { rate: 1.0 });
140 let mut rng = ChaCha8Rng::seed_from_u64(1);
141 let stats = pass.apply(&mut entries, &mut rng);
142 assert_eq!(stats.entries_modified, 100);
143 for je in &entries {
144 assert!(je.header.sap_source_code.is_none());
145 }
146 }
147
148 #[test]
149 fn rate_021_lands_in_corpus_band() {
150 let mut entries: Vec<JournalEntry> = (0..2000).map(|i| make_je(i, Some("BKPF"))).collect();
153 let pass = SourceBlankingPass::new(SourceBlankingPassConfig { rate: 0.21 });
154 let mut rng = ChaCha8Rng::seed_from_u64(7);
155 let stats = pass.apply(&mut entries, &mut rng);
156 let blanked = stats.entries_modified;
157 assert!(
158 (320..=520).contains(&blanked),
159 "rate=0.21 blanked={} (expected ~420)",
160 blanked
161 );
162 assert_eq!(stats.extra["target_rate_bp"], 2100);
163 }
164
165 #[test]
166 fn already_blank_jes_pass_through_uncounted() {
167 let mut entries: Vec<JournalEntry> = (0..100)
169 .map(|i| make_je(i, if i % 2 == 0 { Some("BKPF") } else { None }))
170 .collect();
171 let pass = SourceBlankingPass::new(SourceBlankingPassConfig { rate: 1.0 });
172 let mut rng = ChaCha8Rng::seed_from_u64(2);
173 let stats = pass.apply(&mut entries, &mut rng);
174 assert_eq!(stats.entries_modified, 50); assert_eq!(stats.extra["already_blank"], 50);
176 }
177
178 #[test]
179 fn deterministic_under_same_seed() {
180 let make_batch =
181 || -> Vec<JournalEntry> { (0..100).map(|i| make_je(i, Some("BKPF"))).collect() };
182 let cfg = SourceBlankingPassConfig { rate: 0.3 };
183 let pass_a = SourceBlankingPass::new(cfg.clone());
184 let pass_b = SourceBlankingPass::new(cfg);
185
186 let mut batch_a = make_batch();
187 let mut batch_b = make_batch();
188 let mut rng_a = ChaCha8Rng::seed_from_u64(42);
189 let mut rng_b = ChaCha8Rng::seed_from_u64(42);
190 pass_a.apply(&mut batch_a, &mut rng_a);
191 pass_b.apply(&mut batch_b, &mut rng_b);
192
193 for (a, b) in batch_a.iter().zip(batch_b.iter()) {
194 assert_eq!(a.header.sap_source_code, b.header.sap_source_code);
195 }
196 }
197}