use std::collections::BTreeMap;
use datasynth_config::schema::SourceBlankingPassConfig;
use datasynth_core::models::JournalEntry;
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use super::{ConcentrationPass, ConcentrationStats};
const PASS_NAME: &str = "source_blanking";
pub struct SourceBlankingPass {
rate: f64,
}
impl SourceBlankingPass {
pub fn new(cfg: SourceBlankingPassConfig) -> Self {
Self {
rate: cfg.rate.clamp(0.0, 1.0),
}
}
}
impl ConcentrationPass for SourceBlankingPass {
fn name(&self) -> &'static str {
PASS_NAME
}
fn apply(&self, entries: &mut [JournalEntry], rng: &mut ChaCha8Rng) -> ConcentrationStats {
if self.rate == 0.0 {
return ConcentrationStats {
pass: PASS_NAME,
entries_examined: entries.len(),
entries_modified: 0,
extra: BTreeMap::new(),
};
}
let mut blanked: usize = 0;
let mut already_blank: u64 = 0;
for je in entries.iter_mut() {
match je.header.sap_source_code.as_deref() {
None | Some("") => {
already_blank += 1;
continue;
}
_ => {}
}
let draw: f64 = rng.random();
if draw < self.rate {
je.header.sap_source_code = Some(String::new());
blanked += 1;
}
}
let mut extra = BTreeMap::new();
extra.insert("blanked", blanked as u64);
extra.insert("already_blank", already_blank);
let total = entries.len() as u64;
let target_bp = (self.rate * 10_000.0) as u64;
extra.insert("target_rate_bp", target_bp);
if let Some(eff_bp) = (blanked as u64 * 10_000).checked_div(total) {
extra.insert("effective_rate_bp", eff_bp);
}
ConcentrationStats {
pass: PASS_NAME,
entries_examined: entries.len(),
entries_modified: blanked,
extra,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::NaiveDate;
use datasynth_core::models::{JournalEntry, JournalEntryLine};
use rand::SeedableRng;
fn make_je(idx: usize, source: Option<&str>) -> JournalEntry {
let mut je = JournalEntry::new_simple(
format!("JE{idx}"),
"C1".to_string(),
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
format!("test {idx}"),
);
je.header.sap_source_code = source.map(String::from);
let line = JournalEntryLine {
gl_account: "6000".to_string(),
..JournalEntryLine::default()
};
je.lines.push(line);
je
}
#[test]
fn rate_zero_leaves_all_sources_intact() {
let mut entries: Vec<JournalEntry> = (0..100).map(|i| make_je(i, Some("BKPF"))).collect();
let pass = SourceBlankingPass::new(SourceBlankingPassConfig { rate: 0.0 });
let mut rng = ChaCha8Rng::seed_from_u64(0);
let stats = pass.apply(&mut entries, &mut rng);
assert_eq!(stats.entries_modified, 0);
for je in &entries {
assert_eq!(je.header.sap_source_code.as_deref(), Some("BKPF"));
}
}
#[test]
fn rate_one_blanks_every_source() {
let mut entries: Vec<JournalEntry> = (0..100).map(|i| make_je(i, Some("BKPF"))).collect();
let pass = SourceBlankingPass::new(SourceBlankingPassConfig { rate: 1.0 });
let mut rng = ChaCha8Rng::seed_from_u64(1);
let stats = pass.apply(&mut entries, &mut rng);
assert_eq!(stats.entries_modified, 100);
for je in &entries {
assert_eq!(je.header.sap_source_code.as_deref(), Some(""));
}
}
#[test]
fn rate_021_lands_in_corpus_band() {
let mut entries: Vec<JournalEntry> = (0..2000).map(|i| make_je(i, Some("BKPF"))).collect();
let pass = SourceBlankingPass::new(SourceBlankingPassConfig { rate: 0.21 });
let mut rng = ChaCha8Rng::seed_from_u64(7);
let stats = pass.apply(&mut entries, &mut rng);
let blanked = stats.entries_modified;
assert!(
(320..=520).contains(&blanked),
"rate=0.21 blanked={} (expected ~420)",
blanked
);
assert_eq!(stats.extra["target_rate_bp"], 2100);
}
#[test]
fn already_blank_jes_pass_through_uncounted() {
let mut entries: Vec<JournalEntry> = (0..100)
.map(|i| make_je(i, if i % 2 == 0 { Some("BKPF") } else { None }))
.collect();
let pass = SourceBlankingPass::new(SourceBlankingPassConfig { rate: 1.0 });
let mut rng = ChaCha8Rng::seed_from_u64(2);
let stats = pass.apply(&mut entries, &mut rng);
assert_eq!(stats.entries_modified, 50); assert_eq!(stats.extra["already_blank"], 50);
}
#[test]
fn deterministic_under_same_seed() {
let make_batch =
|| -> Vec<JournalEntry> { (0..100).map(|i| make_je(i, Some("BKPF"))).collect() };
let cfg = SourceBlankingPassConfig { rate: 0.3 };
let pass_a = SourceBlankingPass::new(cfg.clone());
let pass_b = SourceBlankingPass::new(cfg);
let mut batch_a = make_batch();
let mut batch_b = make_batch();
let mut rng_a = ChaCha8Rng::seed_from_u64(42);
let mut rng_b = ChaCha8Rng::seed_from_u64(42);
pass_a.apply(&mut batch_a, &mut rng_a);
pass_b.apply(&mut batch_b, &mut rng_b);
for (a, b) in batch_a.iter().zip(batch_b.iter()) {
assert_eq!(a.header.sap_source_code, b.header.sap_source_code);
}
}
}