Skip to main content

datasynth_generators/hr/
time_entry_generator.rs

1//! Time entry generator for the Hire-to-Retire (H2R) process.
2//!
3//! Generates daily time entries for employees across business days in a period,
4//! including regular hours, overtime, PTO, and sick leave with approval statuses.
5
6use chrono::{Datelike, NaiveDate};
7use datasynth_config::schema::TimeAttendanceConfig;
8use datasynth_core::models::{TimeApprovalStatus, TimeEntry};
9use datasynth_core::utils::seeded_rng;
10use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
11use rand::prelude::*;
12use rand_chacha::ChaCha8Rng;
13use std::collections::HashMap;
14use tracing::debug;
15
16/// Default PTO rate (probability that an employee takes PTO on a given business day).
17const DEFAULT_PTO_RATE: f64 = 0.03;
18
19/// Default sick leave rate.
20const DEFAULT_SICK_RATE: f64 = 0.01;
21
22/// Generates [`TimeEntry`] records for employees across business days in a period.
23pub struct TimeEntryGenerator {
24    rng: ChaCha8Rng,
25    uuid_factory: DeterministicUuidFactory,
26    /// Pool of real employee IDs for approved_by references.
27    employee_ids_pool: Vec<String>,
28    /// Pool of real cost center IDs.
29    cost_center_ids_pool: Vec<String>,
30    /// Mapping of employee_id → employee_name for denormalization (DS-011).
31    employee_names: HashMap<String, String>,
32}
33
34impl TimeEntryGenerator {
35    /// Create a new time entry generator.
36    pub fn new(seed: u64) -> Self {
37        Self {
38            rng: seeded_rng(seed, 0),
39            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::TimeEntry),
40            employee_ids_pool: Vec::new(),
41            cost_center_ids_pool: Vec::new(),
42            employee_names: HashMap::new(),
43        }
44    }
45
46    /// Set ID pools for cross-reference coherence.
47    ///
48    /// When pools are non-empty, the generator selects `approved_by` from
49    /// `employee_ids` and `cost_center` from `cost_center_ids` instead of
50    /// fabricating placeholder IDs.
51    pub fn with_pools(mut self, employee_ids: Vec<String>, cost_center_ids: Vec<String>) -> Self {
52        self.employee_ids_pool = employee_ids;
53        self.cost_center_ids_pool = cost_center_ids;
54        self
55    }
56
57    /// Set the employee name mapping for denormalization (DS-011).
58    ///
59    /// Maps employee IDs to their display names so that generated time entries
60    /// include the employee name for graph export convenience.
61    pub fn with_employee_names(mut self, names: HashMap<String, String>) -> Self {
62        self.employee_names = names;
63        self
64    }
65
66    /// Generate time entries for a set of employees over a date range.
67    ///
68    /// # Arguments
69    ///
70    /// * `employee_ids` - Slice of employee identifiers
71    /// * `period_start` - Start of the period (inclusive)
72    /// * `period_end` - End of the period (inclusive)
73    /// * `config` - Time and attendance configuration
74    pub fn generate(
75        &mut self,
76        employee_ids: &[String],
77        period_start: NaiveDate,
78        period_end: NaiveDate,
79        config: &TimeAttendanceConfig,
80    ) -> Vec<TimeEntry> {
81        debug!(employee_count = employee_ids.len(), %period_start, %period_end, "Generating time entries");
82        let mut entries = Vec::new();
83        let business_days = self.collect_business_days(period_start, period_end);
84
85        let overtime_rate = config.overtime_rate;
86
87        for employee_id in employee_ids {
88            for &day in &business_days {
89                let entry = self.generate_entry(employee_id, day, overtime_rate);
90                entries.push(entry);
91            }
92        }
93
94        entries
95    }
96
97    /// Collect all business days (Mon-Fri) within the given date range.
98    fn collect_business_days(&self, start: NaiveDate, end: NaiveDate) -> Vec<NaiveDate> {
99        let mut days = Vec::new();
100        let mut current = start;
101        while current <= end {
102            let weekday = current.weekday();
103            if weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun {
104                days.push(current);
105            }
106            current += chrono::Duration::days(1);
107        }
108        days
109    }
110
111    /// Generate a single time entry for an employee on a given day.
112    fn generate_entry(
113        &mut self,
114        employee_id: &str,
115        date: NaiveDate,
116        overtime_rate: f64,
117    ) -> TimeEntry {
118        let entry_id = self.uuid_factory.next().to_string();
119
120        // Determine entry type: PTO, sick, or regular working day
121        let pto_roll: f64 = self.rng.random();
122        let sick_roll: f64 = self.rng.random();
123
124        let (hours_regular, hours_overtime, hours_pto, hours_sick) = if pto_roll < DEFAULT_PTO_RATE
125        {
126            // PTO day: 8 hours PTO, no work
127            (0.0, 0.0, 8.0, 0.0)
128        } else if sick_roll < DEFAULT_SICK_RATE {
129            // Sick day: 8 hours sick leave, no work
130            (0.0, 0.0, 0.0, 8.0)
131        } else {
132            // Regular working day
133            let regular = 8.0;
134            let overtime = if self.rng.random_bool(overtime_rate) {
135                self.rng.random_range(1.0..=4.0)
136            } else {
137                0.0
138            };
139            (regular, overtime, 0.0, 0.0)
140        };
141
142        // Project assignment: ~60% of entries have a project
143        let project_id = if self.rng.random_bool(0.60) {
144            Some(format!("PROJ-{:04}", self.rng.random_range(1..=50)))
145        } else {
146            None
147        };
148
149        // Cost center: ~70% of entries have a cost center
150        let cost_center = if self.rng.random_bool(0.70) {
151            if !self.cost_center_ids_pool.is_empty() {
152                let idx = self.rng.random_range(0..self.cost_center_ids_pool.len());
153                Some(self.cost_center_ids_pool[idx].clone())
154            } else {
155                Some(format!("CC-{:03}", self.rng.random_range(100..=500)))
156            }
157        } else {
158            None
159        };
160
161        // Description based on entry type
162        let description = if hours_pto > 0.0 {
163            Some("Paid time off".to_string())
164        } else if hours_sick > 0.0 {
165            Some("Sick leave".to_string())
166        } else if hours_overtime > 0.0 {
167            Some("Regular work + overtime".to_string())
168        } else {
169            None
170        };
171
172        // Approval status: 90% approved, 5% pending, 5% rejected
173        let status_roll: f64 = self.rng.random();
174        let approval_status = if status_roll < 0.90 {
175            TimeApprovalStatus::Approved
176        } else if status_roll < 0.95 {
177            TimeApprovalStatus::Pending
178        } else {
179            TimeApprovalStatus::Rejected
180        };
181
182        let approved_by = if approval_status == TimeApprovalStatus::Approved {
183            if !self.employee_ids_pool.is_empty() {
184                let idx = self.rng.random_range(0..self.employee_ids_pool.len());
185                Some(self.employee_ids_pool[idx].clone())
186            } else {
187                Some(format!("MGR-{:04}", self.rng.random_range(1..=100)))
188            }
189        } else {
190            None
191        };
192
193        let submitted_at =
194            if approval_status != TimeApprovalStatus::Pending || self.rng.random_bool(0.5) {
195                // Most entries are submitted on the day or the next day
196                let lag = self.rng.random_range(0..=2);
197                Some(date + chrono::Duration::days(lag))
198            } else {
199                None
200            };
201
202        TimeEntry {
203            entry_id,
204            employee_id: employee_id.to_string(),
205            date,
206            hours_regular,
207            hours_overtime,
208            hours_pto,
209            hours_sick,
210            project_id,
211            cost_center,
212            description,
213            approval_status,
214            approved_by,
215            submitted_at,
216            employee_name: self.employee_names.get(employee_id).cloned(),
217            billable: self.rng.random_bool(0.70),
218        }
219    }
220}
221
222#[cfg(test)]
223#[allow(clippy::unwrap_used)]
224mod tests {
225    use super::*;
226
227    fn test_employee_ids() -> Vec<String> {
228        vec![
229            "EMP-001".to_string(),
230            "EMP-002".to_string(),
231            "EMP-003".to_string(),
232        ]
233    }
234
235    #[test]
236    fn test_basic_time_entry_generation() {
237        let mut gen = TimeEntryGenerator::new(42);
238        let employees = test_employee_ids();
239        let period_start = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
240        let period_end = NaiveDate::from_ymd_opt(2024, 1, 31).unwrap();
241        let config = TimeAttendanceConfig::default();
242
243        let entries = gen.generate(&employees, period_start, period_end, &config);
244
245        // January 2024 has 23 business days, 3 employees => 69 entries
246        assert!(!entries.is_empty());
247        assert_eq!(entries.len(), 23 * 3);
248
249        for entry in &entries {
250            assert!(!entry.entry_id.is_empty());
251            assert!(!entry.employee_id.is_empty());
252            // Each day should have some hours
253            let total =
254                entry.hours_regular + entry.hours_overtime + entry.hours_pto + entry.hours_sick;
255            assert!(total > 0.0, "Entry should have some hours recorded");
256            // No weekend entries
257            let weekday = entry.date.weekday();
258            assert!(
259                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
260                "Should not generate weekend entries"
261            );
262        }
263    }
264
265    #[test]
266    fn test_deterministic_time_entries() {
267        let employees = test_employee_ids();
268        let period_start = NaiveDate::from_ymd_opt(2024, 3, 1).unwrap();
269        let period_end = NaiveDate::from_ymd_opt(2024, 3, 31).unwrap();
270        let config = TimeAttendanceConfig::default();
271
272        let mut gen1 = TimeEntryGenerator::new(42);
273        let entries1 = gen1.generate(&employees, period_start, period_end, &config);
274
275        let mut gen2 = TimeEntryGenerator::new(42);
276        let entries2 = gen2.generate(&employees, period_start, period_end, &config);
277
278        assert_eq!(entries1.len(), entries2.len());
279        for (a, b) in entries1.iter().zip(entries2.iter()) {
280            assert_eq!(a.entry_id, b.entry_id);
281            assert_eq!(a.employee_id, b.employee_id);
282            assert_eq!(a.date, b.date);
283            assert_eq!(a.hours_regular, b.hours_regular);
284            assert_eq!(a.hours_overtime, b.hours_overtime);
285            assert_eq!(a.approval_status, b.approval_status);
286        }
287    }
288
289    #[test]
290    fn test_approval_status_distribution() {
291        let mut gen = TimeEntryGenerator::new(99);
292        // Use more employees for a larger sample
293        let employees: Vec<String> = (1..=20).map(|i| format!("EMP-{:04}", i)).collect();
294        let period_start = NaiveDate::from_ymd_opt(2024, 6, 1).unwrap();
295        let period_end = NaiveDate::from_ymd_opt(2024, 6, 30).unwrap();
296        let config = TimeAttendanceConfig::default();
297
298        let entries = gen.generate(&employees, period_start, period_end, &config);
299
300        let approved_count = entries
301            .iter()
302            .filter(|e| e.approval_status == TimeApprovalStatus::Approved)
303            .count();
304        let pending_count = entries
305            .iter()
306            .filter(|e| e.approval_status == TimeApprovalStatus::Pending)
307            .count();
308        let rejected_count = entries
309            .iter()
310            .filter(|e| e.approval_status == TimeApprovalStatus::Rejected)
311            .count();
312
313        let total = entries.len() as f64;
314        // Approved should be dominant (~90%)
315        assert!(
316            (approved_count as f64 / total) > 0.80,
317            "Expected >80% approved, got {:.1}%",
318            approved_count as f64 / total * 100.0
319        );
320        // Pending and rejected should exist
321        assert!(pending_count > 0, "Expected at least some pending entries");
322        assert!(
323            rejected_count > 0,
324            "Expected at least some rejected entries"
325        );
326    }
327}