Skip to main content

datasynth_generators/hr/
time_entry_generator.rs

1//! Time entry generator for the Hire-to-Retire (H2R) process.
2//!
3//! Generates daily time entries for employees across business days in a period,
4//! including regular hours, overtime, PTO, and sick leave with approval statuses.
5
6use chrono::{Datelike, NaiveDate};
7use datasynth_config::schema::TimeAttendanceConfig;
8use datasynth_core::models::{TimeApprovalStatus, TimeEntry};
9use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
10use rand::prelude::*;
11use rand_chacha::ChaCha8Rng;
12
13/// Default PTO rate (probability that an employee takes PTO on a given business day).
14const DEFAULT_PTO_RATE: f64 = 0.03;
15
16/// Default sick leave rate.
17const DEFAULT_SICK_RATE: f64 = 0.01;
18
19/// Generates [`TimeEntry`] records for employees across business days in a period.
20pub struct TimeEntryGenerator {
21    rng: ChaCha8Rng,
22    uuid_factory: DeterministicUuidFactory,
23}
24
25impl TimeEntryGenerator {
26    /// Create a new time entry generator.
27    pub fn new(seed: u64) -> Self {
28        Self {
29            rng: ChaCha8Rng::seed_from_u64(seed),
30            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::TimeEntry),
31        }
32    }
33
34    /// Generate time entries for a set of employees over a date range.
35    ///
36    /// # Arguments
37    ///
38    /// * `employee_ids` - Slice of employee identifiers
39    /// * `period_start` - Start of the period (inclusive)
40    /// * `period_end` - End of the period (inclusive)
41    /// * `config` - Time and attendance configuration
42    pub fn generate(
43        &mut self,
44        employee_ids: &[String],
45        period_start: NaiveDate,
46        period_end: NaiveDate,
47        config: &TimeAttendanceConfig,
48    ) -> Vec<TimeEntry> {
49        let mut entries = Vec::new();
50        let business_days = self.collect_business_days(period_start, period_end);
51
52        let overtime_rate = config.overtime_rate;
53
54        for employee_id in employee_ids {
55            for &day in &business_days {
56                let entry = self.generate_entry(employee_id, day, overtime_rate);
57                entries.push(entry);
58            }
59        }
60
61        entries
62    }
63
64    /// Collect all business days (Mon-Fri) within the given date range.
65    fn collect_business_days(&self, start: NaiveDate, end: NaiveDate) -> Vec<NaiveDate> {
66        let mut days = Vec::new();
67        let mut current = start;
68        while current <= end {
69            let weekday = current.weekday();
70            if weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun {
71                days.push(current);
72            }
73            current += chrono::Duration::days(1);
74        }
75        days
76    }
77
78    /// Generate a single time entry for an employee on a given day.
79    fn generate_entry(
80        &mut self,
81        employee_id: &str,
82        date: NaiveDate,
83        overtime_rate: f64,
84    ) -> TimeEntry {
85        let entry_id = self.uuid_factory.next().to_string();
86
87        // Determine entry type: PTO, sick, or regular working day
88        let pto_roll: f64 = self.rng.gen();
89        let sick_roll: f64 = self.rng.gen();
90
91        let (hours_regular, hours_overtime, hours_pto, hours_sick) = if pto_roll < DEFAULT_PTO_RATE
92        {
93            // PTO day: 8 hours PTO, no work
94            (0.0, 0.0, 8.0, 0.0)
95        } else if sick_roll < DEFAULT_SICK_RATE {
96            // Sick day: 8 hours sick leave, no work
97            (0.0, 0.0, 0.0, 8.0)
98        } else {
99            // Regular working day
100            let regular = 8.0;
101            let overtime = if self.rng.gen_bool(overtime_rate) {
102                self.rng.gen_range(1.0..=4.0)
103            } else {
104                0.0
105            };
106            (regular, overtime, 0.0, 0.0)
107        };
108
109        // Project assignment: ~60% of entries have a project
110        let project_id = if self.rng.gen_bool(0.60) {
111            Some(format!("PROJ-{:04}", self.rng.gen_range(1..=50)))
112        } else {
113            None
114        };
115
116        // Cost center: ~70% of entries have a cost center
117        let cost_center = if self.rng.gen_bool(0.70) {
118            Some(format!("CC-{:03}", self.rng.gen_range(100..=500)))
119        } else {
120            None
121        };
122
123        // Description based on entry type
124        let description = if hours_pto > 0.0 {
125            Some("Paid time off".to_string())
126        } else if hours_sick > 0.0 {
127            Some("Sick leave".to_string())
128        } else if hours_overtime > 0.0 {
129            Some("Regular work + overtime".to_string())
130        } else {
131            None
132        };
133
134        // Approval status: 90% approved, 5% pending, 5% rejected
135        let status_roll: f64 = self.rng.gen();
136        let approval_status = if status_roll < 0.90 {
137            TimeApprovalStatus::Approved
138        } else if status_roll < 0.95 {
139            TimeApprovalStatus::Pending
140        } else {
141            TimeApprovalStatus::Rejected
142        };
143
144        let approved_by = if approval_status == TimeApprovalStatus::Approved {
145            Some(format!("MGR-{:04}", self.rng.gen_range(1..=100)))
146        } else {
147            None
148        };
149
150        let submitted_at =
151            if approval_status != TimeApprovalStatus::Pending || self.rng.gen_bool(0.5) {
152                // Most entries are submitted on the day or the next day
153                let lag = self.rng.gen_range(0..=2);
154                Some(date + chrono::Duration::days(lag))
155            } else {
156                None
157            };
158
159        TimeEntry {
160            entry_id,
161            employee_id: employee_id.to_string(),
162            date,
163            hours_regular,
164            hours_overtime,
165            hours_pto,
166            hours_sick,
167            project_id,
168            cost_center,
169            description,
170            approval_status,
171            approved_by,
172            submitted_at,
173        }
174    }
175}
176
177#[cfg(test)]
178#[allow(clippy::unwrap_used)]
179mod tests {
180    use super::*;
181
182    fn test_employee_ids() -> Vec<String> {
183        vec![
184            "EMP-001".to_string(),
185            "EMP-002".to_string(),
186            "EMP-003".to_string(),
187        ]
188    }
189
190    #[test]
191    fn test_basic_time_entry_generation() {
192        let mut gen = TimeEntryGenerator::new(42);
193        let employees = test_employee_ids();
194        let period_start = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
195        let period_end = NaiveDate::from_ymd_opt(2024, 1, 31).unwrap();
196        let config = TimeAttendanceConfig::default();
197
198        let entries = gen.generate(&employees, period_start, period_end, &config);
199
200        // January 2024 has 23 business days, 3 employees => 69 entries
201        assert!(!entries.is_empty());
202        assert_eq!(entries.len(), 23 * 3);
203
204        for entry in &entries {
205            assert!(!entry.entry_id.is_empty());
206            assert!(!entry.employee_id.is_empty());
207            // Each day should have some hours
208            let total =
209                entry.hours_regular + entry.hours_overtime + entry.hours_pto + entry.hours_sick;
210            assert!(total > 0.0, "Entry should have some hours recorded");
211            // No weekend entries
212            let weekday = entry.date.weekday();
213            assert!(
214                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
215                "Should not generate weekend entries"
216            );
217        }
218    }
219
220    #[test]
221    fn test_deterministic_time_entries() {
222        let employees = test_employee_ids();
223        let period_start = NaiveDate::from_ymd_opt(2024, 3, 1).unwrap();
224        let period_end = NaiveDate::from_ymd_opt(2024, 3, 31).unwrap();
225        let config = TimeAttendanceConfig::default();
226
227        let mut gen1 = TimeEntryGenerator::new(42);
228        let entries1 = gen1.generate(&employees, period_start, period_end, &config);
229
230        let mut gen2 = TimeEntryGenerator::new(42);
231        let entries2 = gen2.generate(&employees, period_start, period_end, &config);
232
233        assert_eq!(entries1.len(), entries2.len());
234        for (a, b) in entries1.iter().zip(entries2.iter()) {
235            assert_eq!(a.entry_id, b.entry_id);
236            assert_eq!(a.employee_id, b.employee_id);
237            assert_eq!(a.date, b.date);
238            assert_eq!(a.hours_regular, b.hours_regular);
239            assert_eq!(a.hours_overtime, b.hours_overtime);
240            assert_eq!(a.approval_status, b.approval_status);
241        }
242    }
243
244    #[test]
245    fn test_approval_status_distribution() {
246        let mut gen = TimeEntryGenerator::new(99);
247        // Use more employees for a larger sample
248        let employees: Vec<String> = (1..=20).map(|i| format!("EMP-{:04}", i)).collect();
249        let period_start = NaiveDate::from_ymd_opt(2024, 6, 1).unwrap();
250        let period_end = NaiveDate::from_ymd_opt(2024, 6, 30).unwrap();
251        let config = TimeAttendanceConfig::default();
252
253        let entries = gen.generate(&employees, period_start, period_end, &config);
254
255        let approved_count = entries
256            .iter()
257            .filter(|e| e.approval_status == TimeApprovalStatus::Approved)
258            .count();
259        let pending_count = entries
260            .iter()
261            .filter(|e| e.approval_status == TimeApprovalStatus::Pending)
262            .count();
263        let rejected_count = entries
264            .iter()
265            .filter(|e| e.approval_status == TimeApprovalStatus::Rejected)
266            .count();
267
268        let total = entries.len() as f64;
269        // Approved should be dominant (~90%)
270        assert!(
271            (approved_count as f64 / total) > 0.80,
272            "Expected >80% approved, got {:.1}%",
273            approved_count as f64 / total * 100.0
274        );
275        // Pending and rejected should exist
276        assert!(pending_count > 0, "Expected at least some pending entries");
277        assert!(
278            rejected_count > 0,
279            "Expected at least some rejected entries"
280        );
281    }
282}