Skip to main content

datasynth_generators/hr/
time_entry_generator.rs

1//! Time entry generator for the Hire-to-Retire (H2R) process.
2//!
3//! Generates daily time entries for employees across business days in a period,
4//! including regular hours, overtime, PTO, and sick leave with approval statuses.
5
6use chrono::{Datelike, NaiveDate};
7use datasynth_config::schema::TimeAttendanceConfig;
8use datasynth_core::models::{TimeApprovalStatus, TimeEntry};
9use datasynth_core::utils::seeded_rng;
10use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
11use rand::prelude::*;
12use rand_chacha::ChaCha8Rng;
13use tracing::debug;
14
15/// Default PTO rate (probability that an employee takes PTO on a given business day).
16const DEFAULT_PTO_RATE: f64 = 0.03;
17
18/// Default sick leave rate.
19const DEFAULT_SICK_RATE: f64 = 0.01;
20
21/// Generates [`TimeEntry`] records for employees across business days in a period.
22pub struct TimeEntryGenerator {
23    rng: ChaCha8Rng,
24    uuid_factory: DeterministicUuidFactory,
25    /// Pool of real employee IDs for approved_by references.
26    employee_ids_pool: Vec<String>,
27    /// Pool of real cost center IDs.
28    cost_center_ids_pool: Vec<String>,
29}
30
31impl TimeEntryGenerator {
32    /// Create a new time entry generator.
33    pub fn new(seed: u64) -> Self {
34        Self {
35            rng: seeded_rng(seed, 0),
36            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::TimeEntry),
37            employee_ids_pool: Vec::new(),
38            cost_center_ids_pool: Vec::new(),
39        }
40    }
41
42    /// Set ID pools for cross-reference coherence.
43    ///
44    /// When pools are non-empty, the generator selects `approved_by` from
45    /// `employee_ids` and `cost_center` from `cost_center_ids` instead of
46    /// fabricating placeholder IDs.
47    pub fn with_pools(mut self, employee_ids: Vec<String>, cost_center_ids: Vec<String>) -> Self {
48        self.employee_ids_pool = employee_ids;
49        self.cost_center_ids_pool = cost_center_ids;
50        self
51    }
52
53    /// Generate time entries for a set of employees over a date range.
54    ///
55    /// # Arguments
56    ///
57    /// * `employee_ids` - Slice of employee identifiers
58    /// * `period_start` - Start of the period (inclusive)
59    /// * `period_end` - End of the period (inclusive)
60    /// * `config` - Time and attendance configuration
61    pub fn generate(
62        &mut self,
63        employee_ids: &[String],
64        period_start: NaiveDate,
65        period_end: NaiveDate,
66        config: &TimeAttendanceConfig,
67    ) -> Vec<TimeEntry> {
68        debug!(employee_count = employee_ids.len(), %period_start, %period_end, "Generating time entries");
69        let mut entries = Vec::new();
70        let business_days = self.collect_business_days(period_start, period_end);
71
72        let overtime_rate = config.overtime_rate;
73
74        for employee_id in employee_ids {
75            for &day in &business_days {
76                let entry = self.generate_entry(employee_id, day, overtime_rate);
77                entries.push(entry);
78            }
79        }
80
81        entries
82    }
83
84    /// Collect all business days (Mon-Fri) within the given date range.
85    fn collect_business_days(&self, start: NaiveDate, end: NaiveDate) -> Vec<NaiveDate> {
86        let mut days = Vec::new();
87        let mut current = start;
88        while current <= end {
89            let weekday = current.weekday();
90            if weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun {
91                days.push(current);
92            }
93            current += chrono::Duration::days(1);
94        }
95        days
96    }
97
98    /// Generate a single time entry for an employee on a given day.
99    fn generate_entry(
100        &mut self,
101        employee_id: &str,
102        date: NaiveDate,
103        overtime_rate: f64,
104    ) -> TimeEntry {
105        let entry_id = self.uuid_factory.next().to_string();
106
107        // Determine entry type: PTO, sick, or regular working day
108        let pto_roll: f64 = self.rng.gen();
109        let sick_roll: f64 = self.rng.gen();
110
111        let (hours_regular, hours_overtime, hours_pto, hours_sick) = if pto_roll < DEFAULT_PTO_RATE
112        {
113            // PTO day: 8 hours PTO, no work
114            (0.0, 0.0, 8.0, 0.0)
115        } else if sick_roll < DEFAULT_SICK_RATE {
116            // Sick day: 8 hours sick leave, no work
117            (0.0, 0.0, 0.0, 8.0)
118        } else {
119            // Regular working day
120            let regular = 8.0;
121            let overtime = if self.rng.gen_bool(overtime_rate) {
122                self.rng.gen_range(1.0..=4.0)
123            } else {
124                0.0
125            };
126            (regular, overtime, 0.0, 0.0)
127        };
128
129        // Project assignment: ~60% of entries have a project
130        let project_id = if self.rng.gen_bool(0.60) {
131            Some(format!("PROJ-{:04}", self.rng.gen_range(1..=50)))
132        } else {
133            None
134        };
135
136        // Cost center: ~70% of entries have a cost center
137        let cost_center = if self.rng.gen_bool(0.70) {
138            if !self.cost_center_ids_pool.is_empty() {
139                let idx = self.rng.gen_range(0..self.cost_center_ids_pool.len());
140                Some(self.cost_center_ids_pool[idx].clone())
141            } else {
142                Some(format!("CC-{:03}", self.rng.gen_range(100..=500)))
143            }
144        } else {
145            None
146        };
147
148        // Description based on entry type
149        let description = if hours_pto > 0.0 {
150            Some("Paid time off".to_string())
151        } else if hours_sick > 0.0 {
152            Some("Sick leave".to_string())
153        } else if hours_overtime > 0.0 {
154            Some("Regular work + overtime".to_string())
155        } else {
156            None
157        };
158
159        // Approval status: 90% approved, 5% pending, 5% rejected
160        let status_roll: f64 = self.rng.gen();
161        let approval_status = if status_roll < 0.90 {
162            TimeApprovalStatus::Approved
163        } else if status_roll < 0.95 {
164            TimeApprovalStatus::Pending
165        } else {
166            TimeApprovalStatus::Rejected
167        };
168
169        let approved_by = if approval_status == TimeApprovalStatus::Approved {
170            if !self.employee_ids_pool.is_empty() {
171                let idx = self.rng.gen_range(0..self.employee_ids_pool.len());
172                Some(self.employee_ids_pool[idx].clone())
173            } else {
174                Some(format!("MGR-{:04}", self.rng.gen_range(1..=100)))
175            }
176        } else {
177            None
178        };
179
180        let submitted_at =
181            if approval_status != TimeApprovalStatus::Pending || self.rng.gen_bool(0.5) {
182                // Most entries are submitted on the day or the next day
183                let lag = self.rng.gen_range(0..=2);
184                Some(date + chrono::Duration::days(lag))
185            } else {
186                None
187            };
188
189        TimeEntry {
190            entry_id,
191            employee_id: employee_id.to_string(),
192            date,
193            hours_regular,
194            hours_overtime,
195            hours_pto,
196            hours_sick,
197            project_id,
198            cost_center,
199            description,
200            approval_status,
201            approved_by,
202            submitted_at,
203        }
204    }
205}
206
207#[cfg(test)]
208#[allow(clippy::unwrap_used)]
209mod tests {
210    use super::*;
211
212    fn test_employee_ids() -> Vec<String> {
213        vec![
214            "EMP-001".to_string(),
215            "EMP-002".to_string(),
216            "EMP-003".to_string(),
217        ]
218    }
219
220    #[test]
221    fn test_basic_time_entry_generation() {
222        let mut gen = TimeEntryGenerator::new(42);
223        let employees = test_employee_ids();
224        let period_start = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
225        let period_end = NaiveDate::from_ymd_opt(2024, 1, 31).unwrap();
226        let config = TimeAttendanceConfig::default();
227
228        let entries = gen.generate(&employees, period_start, period_end, &config);
229
230        // January 2024 has 23 business days, 3 employees => 69 entries
231        assert!(!entries.is_empty());
232        assert_eq!(entries.len(), 23 * 3);
233
234        for entry in &entries {
235            assert!(!entry.entry_id.is_empty());
236            assert!(!entry.employee_id.is_empty());
237            // Each day should have some hours
238            let total =
239                entry.hours_regular + entry.hours_overtime + entry.hours_pto + entry.hours_sick;
240            assert!(total > 0.0, "Entry should have some hours recorded");
241            // No weekend entries
242            let weekday = entry.date.weekday();
243            assert!(
244                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
245                "Should not generate weekend entries"
246            );
247        }
248    }
249
250    #[test]
251    fn test_deterministic_time_entries() {
252        let employees = test_employee_ids();
253        let period_start = NaiveDate::from_ymd_opt(2024, 3, 1).unwrap();
254        let period_end = NaiveDate::from_ymd_opt(2024, 3, 31).unwrap();
255        let config = TimeAttendanceConfig::default();
256
257        let mut gen1 = TimeEntryGenerator::new(42);
258        let entries1 = gen1.generate(&employees, period_start, period_end, &config);
259
260        let mut gen2 = TimeEntryGenerator::new(42);
261        let entries2 = gen2.generate(&employees, period_start, period_end, &config);
262
263        assert_eq!(entries1.len(), entries2.len());
264        for (a, b) in entries1.iter().zip(entries2.iter()) {
265            assert_eq!(a.entry_id, b.entry_id);
266            assert_eq!(a.employee_id, b.employee_id);
267            assert_eq!(a.date, b.date);
268            assert_eq!(a.hours_regular, b.hours_regular);
269            assert_eq!(a.hours_overtime, b.hours_overtime);
270            assert_eq!(a.approval_status, b.approval_status);
271        }
272    }
273
274    #[test]
275    fn test_approval_status_distribution() {
276        let mut gen = TimeEntryGenerator::new(99);
277        // Use more employees for a larger sample
278        let employees: Vec<String> = (1..=20).map(|i| format!("EMP-{:04}", i)).collect();
279        let period_start = NaiveDate::from_ymd_opt(2024, 6, 1).unwrap();
280        let period_end = NaiveDate::from_ymd_opt(2024, 6, 30).unwrap();
281        let config = TimeAttendanceConfig::default();
282
283        let entries = gen.generate(&employees, period_start, period_end, &config);
284
285        let approved_count = entries
286            .iter()
287            .filter(|e| e.approval_status == TimeApprovalStatus::Approved)
288            .count();
289        let pending_count = entries
290            .iter()
291            .filter(|e| e.approval_status == TimeApprovalStatus::Pending)
292            .count();
293        let rejected_count = entries
294            .iter()
295            .filter(|e| e.approval_status == TimeApprovalStatus::Rejected)
296            .count();
297
298        let total = entries.len() as f64;
299        // Approved should be dominant (~90%)
300        assert!(
301            (approved_count as f64 / total) > 0.80,
302            "Expected >80% approved, got {:.1}%",
303            approved_count as f64 / total * 100.0
304        );
305        // Pending and rejected should exist
306        assert!(pending_count > 0, "Expected at least some pending entries");
307        assert!(
308            rejected_count > 0,
309            "Expected at least some rejected entries"
310        );
311    }
312}