datasynth_core/distributions/
source_active_window.rs1use std::collections::HashMap;
4
5use rand::{Rng, RngExt};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub struct ActiveWindow {
9 pub start_day: i64,
10 pub end_day: i64,
11}
12
13impl ActiveWindow {
14 pub fn contains(&self, day: i64) -> bool {
15 day >= self.start_day && day <= self.end_day
16 }
17 pub fn length_days(&self) -> i64 {
18 (self.end_day - self.start_day).max(0)
19 }
20}
21
22#[derive(Clone)]
23pub struct SourceActiveWindow {
24 pub by_source: HashMap<String, ActiveWindow>,
25 pub period_days: i64,
26}
27
28impl SourceActiveWindow {
29 pub fn build<R: Rng>(
30 sources: &[String],
31 period_days: i64,
32 mut lifetime_sampler: impl FnMut(&mut R) -> i64,
33 rng: &mut R,
34 ) -> Self {
35 let mut by_source = HashMap::new();
36 for src in sources {
37 let life = lifetime_sampler(rng).min(period_days).max(0);
38 let max_start = (period_days - life).max(0);
39 let start = if max_start == 0 {
40 0
41 } else {
42 rng.random_range(0..=max_start)
43 };
44 by_source.insert(
45 src.clone(),
46 ActiveWindow {
47 start_day: start,
48 end_day: start + life,
49 },
50 );
51 }
52 Self {
53 by_source,
54 period_days,
55 }
56 }
57
58 pub fn is_active(&self, source: &str, day_in_period: i64) -> bool {
59 match self.by_source.get(source) {
60 Some(w) => w.contains(day_in_period),
61 None => day_in_period >= 0 && day_in_period < self.period_days,
62 }
63 }
64}
65
66#[derive(Debug, Clone)]
70pub struct MultiSegmentActiveWindow {
71 pub by_source: std::collections::HashMap<String, Vec<ActiveWindow>>,
72 pub period_days: i64,
73}
74
75impl MultiSegmentActiveWindow {
76 pub fn build_from_prior<R: rand::Rng>(
80 sources: &[String],
81 period_days: i64,
82 prior: &crate::distributions::behavioral_priors::ActiveSegmentsPrior,
83 mut fallback_lifetime: impl FnMut(&mut R) -> i64,
84 rng: &mut R,
85 ) -> Self {
86 use rand::RngExt;
87 let mut by_source: std::collections::HashMap<String, Vec<ActiveWindow>> =
88 std::collections::HashMap::new();
89 for src in sources {
90 let segments = match prior.by_source.get(src) {
91 Some(summary) => place_segments_from_prior(summary, period_days, rng),
92 None => {
93 let life = fallback_lifetime(rng).min(period_days).max(0);
94 let max_start = (period_days - life).max(0);
95 let start = if max_start == 0 {
96 0
97 } else {
98 rng.random_range(0..=max_start)
99 };
100 vec![ActiveWindow {
101 start_day: start,
102 end_day: start + life,
103 }]
104 }
105 };
106 by_source.insert(src.clone(), segments);
107 }
108 Self {
109 by_source,
110 period_days,
111 }
112 }
113
114 pub fn is_active(&self, source: &str, day: i64) -> bool {
115 match self.by_source.get(source) {
116 Some(segments) => segments.iter().any(|w| w.contains(day)),
117 None => day >= 0 && day < self.period_days,
118 }
119 }
120}
121
122fn place_segments_from_prior<R: rand::Rng>(
123 summary: &crate::distributions::behavioral_priors::SourceSegmentSummary,
124 period_days: i64,
125 rng: &mut R,
126) -> Vec<ActiveWindow> {
127 use rand::RngExt;
128 let n_segments = summary.segment_count_histogram.sample_bucket(rng).max(1) as usize;
129 let mut lengths: Vec<i64> = Vec::with_capacity(n_segments);
131 let mut gaps: Vec<i64> = Vec::with_capacity(n_segments.saturating_sub(1));
132 for _ in 0..n_segments {
133 lengths.push(summary.segment_length_histogram.sample_bucket(rng).max(1) as i64);
134 }
135 for _ in 0..n_segments.saturating_sub(1) {
136 gaps.push(summary.gap_length_histogram.sample_bucket(rng).max(1) as i64);
137 }
138 let total_span: i64 = lengths.iter().sum::<i64>() + gaps.iter().sum::<i64>();
140 let max_start = (period_days - total_span).max(0);
141 let mut cursor = if max_start == 0 {
142 0
143 } else {
144 rng.random_range(0..=max_start)
145 };
146 let mut windows = Vec::with_capacity(n_segments);
147 for (idx, len) in lengths.iter().enumerate() {
148 if idx > 0 {
149 cursor += gaps[idx - 1];
150 }
151 let end = (cursor + len).min(period_days);
152 windows.push(ActiveWindow {
153 start_day: cursor,
154 end_day: end,
155 });
156 cursor = end;
157 }
158 windows
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164 use rand::SeedableRng;
165 use rand_chacha::ChaCha8Rng;
166
167 #[test]
168 fn active_window_contains_known_range() {
169 let w = ActiveWindow {
170 start_day: 10,
171 end_day: 20,
172 };
173 assert!(w.contains(10));
174 assert!(w.contains(15));
175 assert!(w.contains(20));
176 assert!(!w.contains(9));
177 assert!(!w.contains(21));
178 }
179
180 #[test]
181 fn build_assigns_one_window_per_source() {
182 let sources = vec!["KR".to_string(), "RE".to_string()];
183 let mut rng = ChaCha8Rng::seed_from_u64(42);
184 let saw = SourceActiveWindow::build(&sources, 365, |r| r.random_range(30..=180), &mut rng);
185 assert_eq!(saw.by_source.len(), 2);
186 for w in saw.by_source.values() {
187 assert!(w.length_days() >= 30 && w.length_days() <= 180);
188 assert!(w.start_day >= 0);
189 assert!(w.end_day <= 365);
190 }
191 }
192
193 #[test]
194 fn is_active_unknown_source_full_period() {
195 let saw = SourceActiveWindow {
196 by_source: HashMap::new(),
197 period_days: 100,
198 };
199 assert!(saw.is_active("UNKNOWN", 0));
200 assert!(saw.is_active("UNKNOWN", 99));
201 assert!(!saw.is_active("UNKNOWN", 100));
202 assert!(!saw.is_active("UNKNOWN", -1));
203 }
204
205 #[test]
206 fn multi_segment_active_window_respects_prior() {
207 use crate::distributions::behavioral_priors::{
208 ActiveSegmentsPrior, LineCountHistogram, SourceSegmentSummary,
209 ACTIVE_LIFETIME_DAY_BUCKETS, SEGMENT_COUNT_BUCKETS, SEGMENT_GAP_BUCKETS,
210 };
211 let (count_h, _) = LineCountHistogram::build(&[3], SEGMENT_COUNT_BUCKETS);
212 let (len_h, _) = LineCountHistogram::build(&[30, 30, 30], ACTIVE_LIFETIME_DAY_BUCKETS);
213 let (gap_h, _) = LineCountHistogram::build(&[14, 14], SEGMENT_GAP_BUCKETS);
214 let mut by_source = std::collections::BTreeMap::new();
215 by_source.insert(
216 "A".to_string(),
217 SourceSegmentSummary {
218 segment_count_histogram: count_h,
219 segment_length_histogram: len_h,
220 gap_length_histogram: gap_h,
221 },
222 );
223 let prior = ActiveSegmentsPrior { by_source };
224 let mut rng = ChaCha8Rng::seed_from_u64(42);
225 let saw = MultiSegmentActiveWindow::build_from_prior(
226 &["A".to_string()],
227 365,
228 &prior,
229 |r| {
230 use rand::RngExt;
231 r.random_range(30..=180)
232 },
233 &mut rng,
234 );
235 let segments = &saw.by_source["A"];
236 assert!(!segments.is_empty(), "should place at least one segment");
237 assert!(
238 segments.len() <= 5,
239 "got {} segments (expected ~3 ± noise, ≥1 covered by is_empty assert above)",
240 segments.len()
241 );
242 for w in segments {
243 assert!(w.start_day >= 0);
244 assert!(w.end_day <= 365);
245 assert!(w.end_day >= w.start_day);
246 }
247 }
248
249 #[test]
250 fn multi_segment_is_active_handles_unknown_source() {
251 let saw = MultiSegmentActiveWindow {
252 by_source: std::collections::HashMap::new(),
253 period_days: 100,
254 };
255 assert!(saw.is_active("UNKNOWN", 0));
256 assert!(saw.is_active("UNKNOWN", 99));
257 assert!(!saw.is_active("UNKNOWN", 100));
258 }
259}