Skip to main content

datasynth_generators/concentration/
source_conditional_rarity_pass.rs

1//! `SourceConditionalRarityPass` — Phase 1 trait wrapper around the already-
2//! shipped SOTA-12 tagger (`crate::anomaly::source_conditional_rarity`).
3//!
4//! Pure interface adapter — no algorithm change. The existing 4 unit tests in
5//! `anomaly/source_conditional_rarity.rs` continue to cover the algorithm;
6//! this module's test verifies the wrapper plumbing.
7
8use std::collections::BTreeMap;
9
10use datasynth_config::schema::SourceConditionalRarityPassConfig;
11use datasynth_core::models::JournalEntry;
12use rand_chacha::ChaCha8Rng;
13
14use super::{ConcentrationPass, ConcentrationStats};
15use crate::anomaly::source_conditional_rarity::{
16    tag_source_conditional_rarity, SourceConditionalRarityConfig,
17};
18
19const PASS_NAME: &str = "source_conditional_rarity";
20
21pub struct SourceConditionalRarityPass {
22    cfg: SourceConditionalRarityPassConfig,
23}
24
25impl SourceConditionalRarityPass {
26    pub fn new(cfg: SourceConditionalRarityPassConfig) -> Self {
27        Self { cfg }
28    }
29
30    fn build_inner_config(&self) -> SourceConditionalRarityConfig {
31        let defaults = SourceConditionalRarityConfig::default();
32        SourceConditionalRarityConfig {
33            rate: self.cfg.rate,
34            min_surprise: self.cfg.min_surprise.unwrap_or(defaults.min_surprise),
35            min_per_source_lines: self
36                .cfg
37                .min_per_source_lines
38                .unwrap_or(defaults.min_per_source_lines),
39        }
40    }
41}
42
43impl ConcentrationPass for SourceConditionalRarityPass {
44    fn name(&self) -> &'static str {
45        PASS_NAME
46    }
47
48    fn apply(&self, entries: &mut [JournalEntry], _rng: &mut ChaCha8Rng) -> ConcentrationStats {
49        let inner = self.build_inner_config();
50        let tagged = tag_source_conditional_rarity(entries, &inner);
51        let mut extra = BTreeMap::new();
52        extra.insert("jes_tagged", tagged as u64);
53        ConcentrationStats {
54            pass: PASS_NAME,
55            entries_examined: entries.len(),
56            entries_modified: tagged,
57            extra,
58        }
59    }
60}
61
62#[cfg(test)]
63mod tests {
64    use super::*;
65    use chrono::NaiveDate;
66    use datasynth_core::models::{JournalEntry, JournalEntryLine};
67    use rand::SeedableRng;
68
69    fn make_je_with_source(idx: usize, source: &str, account: &str) -> JournalEntry {
70        let mut je = JournalEntry::new_simple(
71            format!("JE{idx}"),
72            "C1".to_string(),
73            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
74            format!("test je {idx}"),
75        );
76        je.header.sap_source_code = Some(source.to_string());
77        let line = JournalEntryLine {
78            gl_account: account.to_string(),
79            ..JournalEntryLine::default()
80        };
81        je.lines.push(line);
82        je
83    }
84
85    #[test]
86    fn wrapper_delegates_to_inner_tagger() {
87        // 100 JEs, 99 share the common (S1, 6000) edge, 1 is rare (S1, 9999).
88        // With rate=0.02 the wrapper should tag at most 2.
89        let mut entries: Vec<JournalEntry> = (0..99)
90            .map(|i| make_je_with_source(i, "S1", "6000"))
91            .collect();
92        entries.push(make_je_with_source(99, "S1", "9999"));
93
94        let pass = SourceConditionalRarityPass::new(SourceConditionalRarityPassConfig {
95            rate: 0.02,
96            min_surprise: Some(0.0), // disable floor to confirm count from rate alone
97            min_per_source_lines: Some(1),
98        });
99        let mut rng = ChaCha8Rng::seed_from_u64(42);
100        let stats = pass.apply(&mut entries, &mut rng);
101
102        assert_eq!(stats.pass, PASS_NAME);
103        assert_eq!(stats.entries_examined, 100);
104        // rate=0.02 * 100 = 2 JEs eligible to tag; the rare one (9999) must be in.
105        assert!(stats.entries_modified <= 2, "{}", stats.entries_modified);
106        assert!(stats.entries_modified >= 1, "rare JE should be tagged");
107        // The rare JE must be one of the tagged ones.
108        let tagged_rare = entries
109            .iter()
110            .any(|je| je.lines[0].gl_account == "9999" && je.header.is_anomaly);
111        assert!(tagged_rare, "the rare (S1, 9999) JE must be tagged");
112    }
113}