Skip to main content

datasynth_core/distributions/
source_active_window.rs

1//! Per-Source active-window sampler driven by SP2's ActiveLifetimePrior.
2
3use std::collections::HashMap;
4
5use rand::{Rng, RngExt};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub struct ActiveWindow {
9    pub start_day: i64,
10    pub end_day: i64,
11}
12
13impl ActiveWindow {
14    pub fn contains(&self, day: i64) -> bool {
15        day >= self.start_day && day <= self.end_day
16    }
17    pub fn length_days(&self) -> i64 {
18        (self.end_day - self.start_day).max(0)
19    }
20}
21
22#[derive(Clone)]
23pub struct SourceActiveWindow {
24    pub by_source: HashMap<String, ActiveWindow>,
25    pub period_days: i64,
26}
27
28impl SourceActiveWindow {
29    pub fn build<R: Rng>(
30        sources: &[String],
31        period_days: i64,
32        mut lifetime_sampler: impl FnMut(&mut R) -> i64,
33        rng: &mut R,
34    ) -> Self {
35        let mut by_source = HashMap::new();
36        for src in sources {
37            let life = lifetime_sampler(rng).min(period_days).max(0);
38            let max_start = (period_days - life).max(0);
39            let start = if max_start == 0 {
40                0
41            } else {
42                rng.random_range(0..=max_start)
43            };
44            by_source.insert(
45                src.clone(),
46                ActiveWindow {
47                    start_day: start,
48                    end_day: start + life,
49                },
50            );
51        }
52        Self {
53            by_source,
54            period_days,
55        }
56    }
57
58    pub fn is_active(&self, source: &str, day_in_period: i64) -> bool {
59        match self.by_source.get(source) {
60            Some(w) => w.contains(day_in_period),
61            None => day_in_period >= 0 && day_in_period < self.period_days,
62        }
63    }
64}
65
66/// SP3.2 — per-Source multi-segment active windows. When present in the prior,
67/// preferred over the single-window `SourceActiveWindow` because real Sources
68/// have multimodal activity (month-end pile-ups, quarter close).
69#[derive(Debug, Clone)]
70pub struct MultiSegmentActiveWindow {
71    pub by_source: std::collections::HashMap<String, Vec<ActiveWindow>>,
72    pub period_days: i64,
73}
74
75impl MultiSegmentActiveWindow {
76    /// Build by sampling segments from the prior.
77    ///
78    /// `fallback_lifetime` is used when a Source has no per-source prior entry.
79    pub fn build_from_prior<R: rand::Rng>(
80        sources: &[String],
81        period_days: i64,
82        prior: &crate::distributions::behavioral_priors::ActiveSegmentsPrior,
83        mut fallback_lifetime: impl FnMut(&mut R) -> i64,
84        rng: &mut R,
85    ) -> Self {
86        use rand::RngExt;
87        let mut by_source: std::collections::HashMap<String, Vec<ActiveWindow>> =
88            std::collections::HashMap::new();
89        for src in sources {
90            let segments = match prior.by_source.get(src) {
91                Some(summary) => place_segments_from_prior(summary, period_days, rng),
92                None => {
93                    let life = fallback_lifetime(rng).min(period_days).max(0);
94                    let max_start = (period_days - life).max(0);
95                    let start = if max_start == 0 {
96                        0
97                    } else {
98                        rng.random_range(0..=max_start)
99                    };
100                    vec![ActiveWindow {
101                        start_day: start,
102                        end_day: start + life,
103                    }]
104                }
105            };
106            by_source.insert(src.clone(), segments);
107        }
108        Self {
109            by_source,
110            period_days,
111        }
112    }
113
114    pub fn is_active(&self, source: &str, day: i64) -> bool {
115        match self.by_source.get(source) {
116            Some(segments) => segments.iter().any(|w| w.contains(day)),
117            None => day >= 0 && day < self.period_days,
118        }
119    }
120}
121
122fn place_segments_from_prior<R: rand::Rng>(
123    summary: &crate::distributions::behavioral_priors::SourceSegmentSummary,
124    period_days: i64,
125    rng: &mut R,
126) -> Vec<ActiveWindow> {
127    use rand::RngExt;
128    let n_segments = summary.segment_count_histogram.sample_bucket(rng).max(1) as usize;
129    // Pre-sample lengths + gaps.
130    let mut lengths: Vec<i64> = Vec::with_capacity(n_segments);
131    let mut gaps: Vec<i64> = Vec::with_capacity(n_segments.saturating_sub(1));
132    for _ in 0..n_segments {
133        lengths.push(summary.segment_length_histogram.sample_bucket(rng).max(1) as i64);
134    }
135    for _ in 0..n_segments.saturating_sub(1) {
136        gaps.push(summary.gap_length_histogram.sample_bucket(rng).max(1) as i64);
137    }
138    // Total span = sum(lengths) + sum(gaps).
139    let total_span: i64 = lengths.iter().sum::<i64>() + gaps.iter().sum::<i64>();
140    let max_start = (period_days - total_span).max(0);
141    let mut cursor = if max_start == 0 {
142        0
143    } else {
144        rng.random_range(0..=max_start)
145    };
146    let mut windows = Vec::with_capacity(n_segments);
147    for (idx, len) in lengths.iter().enumerate() {
148        if idx > 0 {
149            cursor += gaps[idx - 1];
150        }
151        let end = (cursor + len).min(period_days);
152        windows.push(ActiveWindow {
153            start_day: cursor,
154            end_day: end,
155        });
156        cursor = end;
157    }
158    windows
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use rand::SeedableRng;
165    use rand_chacha::ChaCha8Rng;
166
167    #[test]
168    fn active_window_contains_known_range() {
169        let w = ActiveWindow {
170            start_day: 10,
171            end_day: 20,
172        };
173        assert!(w.contains(10));
174        assert!(w.contains(15));
175        assert!(w.contains(20));
176        assert!(!w.contains(9));
177        assert!(!w.contains(21));
178    }
179
180    #[test]
181    fn build_assigns_one_window_per_source() {
182        let sources = vec!["KR".to_string(), "RE".to_string()];
183        let mut rng = ChaCha8Rng::seed_from_u64(42);
184        let saw = SourceActiveWindow::build(&sources, 365, |r| r.random_range(30..=180), &mut rng);
185        assert_eq!(saw.by_source.len(), 2);
186        for w in saw.by_source.values() {
187            assert!(w.length_days() >= 30 && w.length_days() <= 180);
188            assert!(w.start_day >= 0);
189            assert!(w.end_day <= 365);
190        }
191    }
192
193    #[test]
194    fn is_active_unknown_source_full_period() {
195        let saw = SourceActiveWindow {
196            by_source: HashMap::new(),
197            period_days: 100,
198        };
199        assert!(saw.is_active("UNKNOWN", 0));
200        assert!(saw.is_active("UNKNOWN", 99));
201        assert!(!saw.is_active("UNKNOWN", 100));
202        assert!(!saw.is_active("UNKNOWN", -1));
203    }
204
205    #[test]
206    fn multi_segment_active_window_respects_prior() {
207        use crate::distributions::behavioral_priors::{
208            ActiveSegmentsPrior, LineCountHistogram, SourceSegmentSummary,
209            ACTIVE_LIFETIME_DAY_BUCKETS, SEGMENT_COUNT_BUCKETS, SEGMENT_GAP_BUCKETS,
210        };
211        let (count_h, _) = LineCountHistogram::build(&[3], SEGMENT_COUNT_BUCKETS);
212        let (len_h, _) = LineCountHistogram::build(&[30, 30, 30], ACTIVE_LIFETIME_DAY_BUCKETS);
213        let (gap_h, _) = LineCountHistogram::build(&[14, 14], SEGMENT_GAP_BUCKETS);
214        let mut by_source = std::collections::BTreeMap::new();
215        by_source.insert(
216            "A".to_string(),
217            SourceSegmentSummary {
218                segment_count_histogram: count_h,
219                segment_length_histogram: len_h,
220                gap_length_histogram: gap_h,
221            },
222        );
223        let prior = ActiveSegmentsPrior { by_source };
224        let mut rng = ChaCha8Rng::seed_from_u64(42);
225        let saw = MultiSegmentActiveWindow::build_from_prior(
226            &["A".to_string()],
227            365,
228            &prior,
229            |r| {
230                use rand::RngExt;
231                r.random_range(30..=180)
232            },
233            &mut rng,
234        );
235        let segments = &saw.by_source["A"];
236        assert!(!segments.is_empty(), "should place at least one segment");
237        assert!(
238            segments.len() <= 5,
239            "got {} segments (expected ~3 ± noise, ≥1 covered by is_empty assert above)",
240            segments.len()
241        );
242        for w in segments {
243            assert!(w.start_day >= 0);
244            assert!(w.end_day <= 365);
245            assert!(w.end_day >= w.start_day);
246        }
247    }
248
249    #[test]
250    fn multi_segment_is_active_handles_unknown_source() {
251        let saw = MultiSegmentActiveWindow {
252            by_source: std::collections::HashMap::new(),
253            period_days: 100,
254        };
255        assert!(saw.is_active("UNKNOWN", 0));
256        assert!(saw.is_active("UNKNOWN", 99));
257        assert!(!saw.is_active("UNKNOWN", 100));
258    }
259}