Skip to main content

datasynth_generators/hr/
time_entry_generator.rs

1//! Time entry generator for the Hire-to-Retire (H2R) process.
2//!
3//! Generates daily time entries for employees across business days in a period,
4//! including regular hours, overtime, PTO, and sick leave with approval statuses.
5
6use chrono::{Datelike, NaiveDate};
7use datasynth_config::schema::TimeAttendanceConfig;
8use datasynth_core::distributions::TemporalContext;
9use datasynth_core::models::{TimeApprovalStatus, TimeEntry};
10use datasynth_core::utils::seeded_rng;
11use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
12use rand::prelude::*;
13use rand_chacha::ChaCha8Rng;
14use std::collections::HashMap;
15use std::sync::Arc;
16use tracing::debug;
17
18/// Default PTO rate (probability that an employee takes PTO on a given business day).
19const DEFAULT_PTO_RATE: f64 = 0.03;
20
21/// Default sick leave rate.
22const DEFAULT_SICK_RATE: f64 = 0.01;
23
24/// Generates [`TimeEntry`] records for employees across business days in a period.
25pub struct TimeEntryGenerator {
26    rng: ChaCha8Rng,
27    uuid_factory: DeterministicUuidFactory,
28    /// Pool of real employee IDs for approved_by references.
29    employee_ids_pool: Vec<String>,
30    /// Pool of real cost center IDs.
31    cost_center_ids_pool: Vec<String>,
32    /// Mapping of employee_id → employee_name for denormalization (DS-011).
33    employee_names: HashMap<String, String>,
34    /// v3.4.2+ temporal context — when set, `collect_business_days` filters
35    /// out holidays (not just weekends) and `submitted_at` lag snaps to
36    /// business days. `None` preserves legacy weekday-only behavior.
37    temporal_context: Option<Arc<TemporalContext>>,
38}
39
40impl TimeEntryGenerator {
41    /// Create a new time entry generator.
42    pub fn new(seed: u64) -> Self {
43        Self {
44            rng: seeded_rng(seed, 0),
45            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::TimeEntry),
46            employee_ids_pool: Vec::new(),
47            cost_center_ids_pool: Vec::new(),
48            employee_names: HashMap::new(),
49            temporal_context: None,
50        }
51    }
52
53    /// Set the shared [`TemporalContext`] so business-day collection excludes
54    /// holidays (not just weekends) and `submitted_at` lag days snap forward
55    /// to the next business day.
56    pub fn set_temporal_context(&mut self, ctx: Arc<TemporalContext>) {
57        self.temporal_context = Some(ctx);
58    }
59
60    /// Builder variant of [`Self::set_temporal_context`].
61    pub fn with_temporal_context(mut self, ctx: Arc<TemporalContext>) -> Self {
62        self.temporal_context = Some(ctx);
63        self
64    }
65
66    /// Set ID pools for cross-reference coherence.
67    ///
68    /// When pools are non-empty, the generator selects `approved_by` from
69    /// `employee_ids` and `cost_center` from `cost_center_ids` instead of
70    /// fabricating placeholder IDs.
71    pub fn with_pools(mut self, employee_ids: Vec<String>, cost_center_ids: Vec<String>) -> Self {
72        self.employee_ids_pool = employee_ids;
73        self.cost_center_ids_pool = cost_center_ids;
74        self
75    }
76
77    /// Set the employee name mapping for denormalization (DS-011).
78    ///
79    /// Maps employee IDs to their display names so that generated time entries
80    /// include the employee name for graph export convenience.
81    pub fn with_employee_names(mut self, names: HashMap<String, String>) -> Self {
82        self.employee_names = names;
83        self
84    }
85
86    /// Generate time entries for a set of employees over a date range.
87    ///
88    /// # Arguments
89    ///
90    /// * `employee_ids` - Slice of employee identifiers
91    /// * `period_start` - Start of the period (inclusive)
92    /// * `period_end` - End of the period (inclusive)
93    /// * `config` - Time and attendance configuration
94    pub fn generate(
95        &mut self,
96        employee_ids: &[String],
97        period_start: NaiveDate,
98        period_end: NaiveDate,
99        config: &TimeAttendanceConfig,
100    ) -> Vec<TimeEntry> {
101        debug!(employee_count = employee_ids.len(), %period_start, %period_end, "Generating time entries");
102        let mut entries = Vec::new();
103        let business_days = self.collect_business_days(period_start, period_end);
104
105        let overtime_rate = config.overtime_rate;
106
107        for employee_id in employee_ids {
108            for &day in &business_days {
109                let entry = self.generate_entry(employee_id, day, overtime_rate);
110                entries.push(entry);
111            }
112        }
113
114        entries
115    }
116
117    /// Collect all business days within the given date range.
118    ///
119    /// When a [`TemporalContext`] is configured, holidays are also excluded
120    /// (the context's full `is_business_day` check runs). Otherwise, only
121    /// weekends are filtered (legacy pre-v3.4.2 behavior).
122    fn collect_business_days(&self, start: NaiveDate, end: NaiveDate) -> Vec<NaiveDate> {
123        let mut days = Vec::new();
124        let mut current = start;
125        while current <= end {
126            let is_business = match &self.temporal_context {
127                Some(ctx) => ctx.is_business_day(current),
128                None => {
129                    let weekday = current.weekday();
130                    weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun
131                }
132            };
133            if is_business {
134                days.push(current);
135            }
136            current += chrono::Duration::days(1);
137        }
138        days
139    }
140
141    /// Snap a date to the next business day when a [`TemporalContext`] is
142    /// present; otherwise return it unchanged.
143    fn snap_to_business_day(&self, date: NaiveDate) -> NaiveDate {
144        match &self.temporal_context {
145            Some(ctx) => ctx.adjust_to_business_day(date),
146            None => date,
147        }
148    }
149
150    /// Generate a single time entry for an employee on a given day.
151    fn generate_entry(
152        &mut self,
153        employee_id: &str,
154        date: NaiveDate,
155        overtime_rate: f64,
156    ) -> TimeEntry {
157        let entry_id = self.uuid_factory.next().to_string();
158
159        // Determine entry type: PTO, sick, or regular working day
160        let pto_roll: f64 = self.rng.random();
161        let sick_roll: f64 = self.rng.random();
162
163        let (hours_regular, hours_overtime, hours_pto, hours_sick) = if pto_roll < DEFAULT_PTO_RATE
164        {
165            // PTO day: 8 hours PTO, no work
166            (0.0, 0.0, 8.0, 0.0)
167        } else if sick_roll < DEFAULT_SICK_RATE {
168            // Sick day: 8 hours sick leave, no work
169            (0.0, 0.0, 0.0, 8.0)
170        } else {
171            // Regular working day
172            let regular = 8.0;
173            let overtime = if self.rng.random_bool(overtime_rate) {
174                self.rng.random_range(1.0..=4.0)
175            } else {
176                0.0
177            };
178            (regular, overtime, 0.0, 0.0)
179        };
180
181        // Project assignment: ~60% of entries have a project
182        let project_id = if self.rng.random_bool(0.60) {
183            Some(format!("PROJ-{:04}", self.rng.random_range(1..=50)))
184        } else {
185            None
186        };
187
188        // Cost center: ~70% of entries have a cost center
189        let cost_center = if self.rng.random_bool(0.70) {
190            if !self.cost_center_ids_pool.is_empty() {
191                let idx = self.rng.random_range(0..self.cost_center_ids_pool.len());
192                Some(self.cost_center_ids_pool[idx].clone())
193            } else {
194                Some(format!("CC-{:03}", self.rng.random_range(100..=500)))
195            }
196        } else {
197            None
198        };
199
200        // Description based on entry type
201        let description = if hours_pto > 0.0 {
202            Some("Paid time off".to_string())
203        } else if hours_sick > 0.0 {
204            Some("Sick leave".to_string())
205        } else if hours_overtime > 0.0 {
206            Some("Regular work + overtime".to_string())
207        } else {
208            None
209        };
210
211        // Approval status: 90% approved, 5% pending, 5% rejected
212        let status_roll: f64 = self.rng.random();
213        let approval_status = if status_roll < 0.90 {
214            TimeApprovalStatus::Approved
215        } else if status_roll < 0.95 {
216            TimeApprovalStatus::Pending
217        } else {
218            TimeApprovalStatus::Rejected
219        };
220
221        let approved_by = if approval_status == TimeApprovalStatus::Approved {
222            if !self.employee_ids_pool.is_empty() {
223                let idx = self.rng.random_range(0..self.employee_ids_pool.len());
224                Some(self.employee_ids_pool[idx].clone())
225            } else {
226                Some(format!("MGR-{:04}", self.rng.random_range(1..=100)))
227            }
228        } else {
229            None
230        };
231
232        let submitted_at =
233            if approval_status != TimeApprovalStatus::Pending || self.rng.random_bool(0.5) {
234                // Most entries are submitted on the day or the next day
235                let lag = self.rng.random_range(0..=2);
236                let raw = date + chrono::Duration::days(lag);
237                Some(self.snap_to_business_day(raw))
238            } else {
239                None
240            };
241
242        TimeEntry {
243            entry_id,
244            employee_id: employee_id.to_string(),
245            date,
246            hours_regular,
247            hours_overtime,
248            hours_pto,
249            hours_sick,
250            project_id,
251            cost_center,
252            description,
253            approval_status,
254            approved_by,
255            submitted_at,
256            employee_name: self.employee_names.get(employee_id).cloned(),
257            billable: self.rng.random_bool(0.70),
258        }
259    }
260}
261
262#[cfg(test)]
263#[allow(clippy::unwrap_used)]
264mod tests {
265    use super::*;
266
267    fn test_employee_ids() -> Vec<String> {
268        vec![
269            "EMP-001".to_string(),
270            "EMP-002".to_string(),
271            "EMP-003".to_string(),
272        ]
273    }
274
275    #[test]
276    fn test_basic_time_entry_generation() {
277        let mut gen = TimeEntryGenerator::new(42);
278        let employees = test_employee_ids();
279        let period_start = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
280        let period_end = NaiveDate::from_ymd_opt(2024, 1, 31).unwrap();
281        let config = TimeAttendanceConfig::default();
282
283        let entries = gen.generate(&employees, period_start, period_end, &config);
284
285        // January 2024 has 23 business days, 3 employees => 69 entries
286        assert!(!entries.is_empty());
287        assert_eq!(entries.len(), 23 * 3);
288
289        for entry in &entries {
290            assert!(!entry.entry_id.is_empty());
291            assert!(!entry.employee_id.is_empty());
292            // Each day should have some hours
293            let total =
294                entry.hours_regular + entry.hours_overtime + entry.hours_pto + entry.hours_sick;
295            assert!(total > 0.0, "Entry should have some hours recorded");
296            // No weekend entries
297            let weekday = entry.date.weekday();
298            assert!(
299                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
300                "Should not generate weekend entries"
301            );
302        }
303    }
304
305    #[test]
306    fn test_deterministic_time_entries() {
307        let employees = test_employee_ids();
308        let period_start = NaiveDate::from_ymd_opt(2024, 3, 1).unwrap();
309        let period_end = NaiveDate::from_ymd_opt(2024, 3, 31).unwrap();
310        let config = TimeAttendanceConfig::default();
311
312        let mut gen1 = TimeEntryGenerator::new(42);
313        let entries1 = gen1.generate(&employees, period_start, period_end, &config);
314
315        let mut gen2 = TimeEntryGenerator::new(42);
316        let entries2 = gen2.generate(&employees, period_start, period_end, &config);
317
318        assert_eq!(entries1.len(), entries2.len());
319        for (a, b) in entries1.iter().zip(entries2.iter()) {
320            assert_eq!(a.entry_id, b.entry_id);
321            assert_eq!(a.employee_id, b.employee_id);
322            assert_eq!(a.date, b.date);
323            assert_eq!(a.hours_regular, b.hours_regular);
324            assert_eq!(a.hours_overtime, b.hours_overtime);
325            assert_eq!(a.approval_status, b.approval_status);
326        }
327    }
328
329    #[test]
330    fn test_approval_status_distribution() {
331        let mut gen = TimeEntryGenerator::new(99);
332        // Use more employees for a larger sample
333        let employees: Vec<String> = (1..=20).map(|i| format!("EMP-{:04}", i)).collect();
334        let period_start = NaiveDate::from_ymd_opt(2024, 6, 1).unwrap();
335        let period_end = NaiveDate::from_ymd_opt(2024, 6, 30).unwrap();
336        let config = TimeAttendanceConfig::default();
337
338        let entries = gen.generate(&employees, period_start, period_end, &config);
339
340        let approved_count = entries
341            .iter()
342            .filter(|e| e.approval_status == TimeApprovalStatus::Approved)
343            .count();
344        let pending_count = entries
345            .iter()
346            .filter(|e| e.approval_status == TimeApprovalStatus::Pending)
347            .count();
348        let rejected_count = entries
349            .iter()
350            .filter(|e| e.approval_status == TimeApprovalStatus::Rejected)
351            .count();
352
353        let total = entries.len() as f64;
354        // Approved should be dominant (~90%)
355        assert!(
356            (approved_count as f64 / total) > 0.80,
357            "Expected >80% approved, got {:.1}%",
358            approved_count as f64 / total * 100.0
359        );
360        // Pending and rejected should exist
361        assert!(pending_count > 0, "Expected at least some pending entries");
362        assert!(
363            rejected_count > 0,
364            "Expected at least some rejected entries"
365        );
366    }
367}