Skip to main content

datasynth_eval/coherence/
hr_payroll.rs

1//! HR/Payroll evaluator.
2//!
3//! Validates payroll arithmetic coherence including gross-to-net calculations,
4//! component sums, run totals, time entry mapping, and expense report consistency.
5
6use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8
9/// Thresholds for HR/payroll evaluation.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct HrPayrollThresholds {
12    /// Minimum calculation accuracy (arithmetic should be near-exact).
13    pub min_calculation_accuracy: f64,
14    /// Tolerance for floating-point comparisons.
15    pub tolerance: f64,
16}
17
18impl Default for HrPayrollThresholds {
19    fn default() -> Self {
20        Self {
21            min_calculation_accuracy: 0.999,
22            tolerance: 0.01,
23        }
24    }
25}
26
27/// Payroll line item data for validation.
28#[derive(Debug, Clone)]
29pub struct PayrollLineItemData {
30    /// Employee identifier.
31    pub employee_id: String,
32    /// Gross pay.
33    pub gross_pay: f64,
34    /// Base salary component.
35    pub base_pay: f64,
36    /// Overtime component.
37    pub overtime_pay: f64,
38    /// Bonus component.
39    pub bonus_pay: f64,
40    /// Net pay.
41    pub net_pay: f64,
42    /// Total deductions.
43    pub total_deductions: f64,
44    /// Tax deduction.
45    pub tax_deduction: f64,
46    /// Social security deduction.
47    pub social_security: f64,
48    /// Health insurance deduction.
49    pub health_insurance: f64,
50    /// Retirement contribution.
51    pub retirement: f64,
52    /// Other deductions.
53    pub other_deductions: f64,
54}
55
56/// Payroll run data for validation.
57#[derive(Debug, Clone)]
58pub struct PayrollRunData {
59    /// Run identifier.
60    pub run_id: String,
61    /// Reported total net pay for the run.
62    pub total_net_pay: f64,
63    /// Line items in this run.
64    pub line_items: Vec<PayrollLineItemData>,
65}
66
67/// Time entry data for validation.
68#[derive(Debug, Clone)]
69pub struct TimeEntryData {
70    /// Employee identifier.
71    pub employee_id: String,
72    /// Total hours from time entries for the period.
73    pub total_hours: f64,
74}
75
76/// Payroll hours for an employee from payroll records.
77#[derive(Debug, Clone)]
78pub struct PayrollHoursData {
79    /// Employee identifier.
80    pub employee_id: String,
81    /// Hours recorded in payroll.
82    pub payroll_hours: f64,
83}
84
85/// Expense report data.
86#[derive(Debug, Clone)]
87pub struct ExpenseReportData {
88    /// Report identifier.
89    pub report_id: String,
90    /// Reported total amount.
91    pub total_amount: f64,
92    /// Sum of line item amounts.
93    pub line_items_sum: f64,
94    /// Whether the report is approved.
95    pub is_approved: bool,
96    /// Whether the report has an approver assigned.
97    pub has_approver: bool,
98}
99
100/// Results of HR/payroll evaluation.
101#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct HrPayrollEvaluation {
103    /// Gross-to-net accuracy: fraction of line items where net = gross - deductions.
104    pub gross_to_net_accuracy: f64,
105    /// Component sum accuracy: fraction where gross = base + OT + bonus.
106    pub component_sum_accuracy: f64,
107    /// Deduction sum accuracy: fraction where total_deductions = tax + SS + health + retirement + other.
108    pub deduction_sum_accuracy: f64,
109    /// Run sum accuracy: fraction where run total = SUM(line_items.net_pay).
110    pub run_sum_accuracy: f64,
111    /// Time-to-payroll mapping rate: fraction of employees with matching hours.
112    pub time_to_payroll_mapping_rate: f64,
113    /// Expense line item sum accuracy: fraction where report total = SUM(line_items).
114    pub expense_line_item_sum_accuracy: f64,
115    /// Expense approval consistency: fraction of approved reports with approver.
116    pub expense_approval_consistency: f64,
117    /// Total line items checked.
118    pub total_line_items: usize,
119    /// Total runs checked.
120    pub total_runs: usize,
121    /// Overall pass/fail.
122    pub passes: bool,
123    /// Issues found.
124    pub issues: Vec<String>,
125}
126
127/// Evaluator for HR/payroll coherence.
128pub struct HrPayrollEvaluator {
129    thresholds: HrPayrollThresholds,
130}
131
132impl HrPayrollEvaluator {
133    /// Create a new evaluator with default thresholds.
134    pub fn new() -> Self {
135        Self {
136            thresholds: HrPayrollThresholds::default(),
137        }
138    }
139
140    /// Create with custom thresholds.
141    pub fn with_thresholds(thresholds: HrPayrollThresholds) -> Self {
142        Self { thresholds }
143    }
144
145    /// Evaluate payroll data.
146    pub fn evaluate(
147        &self,
148        runs: &[PayrollRunData],
149        time_entries: &[TimeEntryData],
150        payroll_hours: &[PayrollHoursData],
151        expense_reports: &[ExpenseReportData],
152    ) -> EvalResult<HrPayrollEvaluation> {
153        let mut issues = Vec::new();
154        let tol = self.thresholds.tolerance;
155
156        // Collect all line items
157        let all_items: Vec<&PayrollLineItemData> =
158            runs.iter().flat_map(|r| r.line_items.iter()).collect();
159        let total_line_items = all_items.len();
160
161        // 1. Gross-to-net: net = gross - deductions
162        let gross_to_net_ok = all_items
163            .iter()
164            .filter(|li| (li.net_pay - (li.gross_pay - li.total_deductions)).abs() <= tol)
165            .count();
166        let gross_to_net_accuracy = if total_line_items > 0 {
167            gross_to_net_ok as f64 / total_line_items as f64
168        } else {
169            1.0
170        };
171
172        // 2. Component sums: gross = base + OT + bonus
173        let component_ok = all_items
174            .iter()
175            .filter(|li| {
176                (li.gross_pay - (li.base_pay + li.overtime_pay + li.bonus_pay)).abs() <= tol
177            })
178            .count();
179        let component_sum_accuracy = if total_line_items > 0 {
180            component_ok as f64 / total_line_items as f64
181        } else {
182            1.0
183        };
184
185        // 3. Deduction sums
186        let deduction_ok = all_items
187            .iter()
188            .filter(|li| {
189                let computed = li.tax_deduction
190                    + li.social_security
191                    + li.health_insurance
192                    + li.retirement
193                    + li.other_deductions;
194                (li.total_deductions - computed).abs() <= tol
195            })
196            .count();
197        let deduction_sum_accuracy = if total_line_items > 0 {
198            deduction_ok as f64 / total_line_items as f64
199        } else {
200            1.0
201        };
202
203        // 4. Run totals
204        let total_runs = runs.len();
205        let run_ok = runs
206            .iter()
207            .filter(|run| {
208                let computed_total: f64 = run.line_items.iter().map(|li| li.net_pay).sum();
209                (run.total_net_pay - computed_total).abs() <= tol
210            })
211            .count();
212        let run_sum_accuracy = if total_runs > 0 {
213            run_ok as f64 / total_runs as f64
214        } else {
215            1.0
216        };
217
218        // 5. Time entry mapping
219        let time_map: std::collections::HashMap<&str, f64> = time_entries
220            .iter()
221            .map(|te| (te.employee_id.as_str(), te.total_hours))
222            .collect();
223        let mapped_count = payroll_hours
224            .iter()
225            .filter(|ph| {
226                time_map
227                    .get(ph.employee_id.as_str())
228                    .map(|&hours| (hours - ph.payroll_hours).abs() <= 1.0)
229                    .unwrap_or(false)
230            })
231            .count();
232        let time_to_payroll_mapping_rate = if payroll_hours.is_empty() {
233            1.0
234        } else {
235            mapped_count as f64 / payroll_hours.len() as f64
236        };
237
238        // 6. Expense reports
239        let expense_sum_ok = expense_reports
240            .iter()
241            .filter(|er| (er.total_amount - er.line_items_sum).abs() <= tol)
242            .count();
243        let expense_line_item_sum_accuracy = if expense_reports.is_empty() {
244            1.0
245        } else {
246            expense_sum_ok as f64 / expense_reports.len() as f64
247        };
248
249        let approved_reports: Vec<&ExpenseReportData> =
250            expense_reports.iter().filter(|er| er.is_approved).collect();
251        let approval_consistent = approved_reports.iter().filter(|er| er.has_approver).count();
252        let expense_approval_consistency = if approved_reports.is_empty() {
253            1.0
254        } else {
255            approval_consistent as f64 / approved_reports.len() as f64
256        };
257
258        // Check thresholds
259        let min_acc = self.thresholds.min_calculation_accuracy;
260        if gross_to_net_accuracy < min_acc {
261            issues.push(format!(
262                "Gross-to-net accuracy {:.4} < {:.4}",
263                gross_to_net_accuracy, min_acc
264            ));
265        }
266        if component_sum_accuracy < min_acc {
267            issues.push(format!(
268                "Component sum accuracy {:.4} < {:.4}",
269                component_sum_accuracy, min_acc
270            ));
271        }
272        if deduction_sum_accuracy < min_acc {
273            issues.push(format!(
274                "Deduction sum accuracy {:.4} < {:.4}",
275                deduction_sum_accuracy, min_acc
276            ));
277        }
278        if run_sum_accuracy < min_acc {
279            issues.push(format!(
280                "Run sum accuracy {:.4} < {:.4}",
281                run_sum_accuracy, min_acc
282            ));
283        }
284
285        let passes = issues.is_empty();
286
287        Ok(HrPayrollEvaluation {
288            gross_to_net_accuracy,
289            component_sum_accuracy,
290            deduction_sum_accuracy,
291            run_sum_accuracy,
292            time_to_payroll_mapping_rate,
293            expense_line_item_sum_accuracy,
294            expense_approval_consistency,
295            total_line_items,
296            total_runs,
297            passes,
298            issues,
299        })
300    }
301}
302
303impl Default for HrPayrollEvaluator {
304    fn default() -> Self {
305        Self::new()
306    }
307}
308
309#[cfg(test)]
310#[allow(clippy::unwrap_used)]
311mod tests {
312    use super::*;
313
314    fn valid_line_item() -> PayrollLineItemData {
315        PayrollLineItemData {
316            employee_id: "EMP001".to_string(),
317            gross_pay: 5000.0,
318            base_pay: 4000.0,
319            overtime_pay: 500.0,
320            bonus_pay: 500.0,
321            net_pay: 3500.0,
322            total_deductions: 1500.0,
323            tax_deduction: 800.0,
324            social_security: 300.0,
325            health_insurance: 200.0,
326            retirement: 150.0,
327            other_deductions: 50.0,
328        }
329    }
330
331    #[test]
332    fn test_valid_payroll() {
333        let evaluator = HrPayrollEvaluator::new();
334        let runs = vec![PayrollRunData {
335            run_id: "PR001".to_string(),
336            total_net_pay: 3500.0,
337            line_items: vec![valid_line_item()],
338        }];
339
340        let result = evaluator.evaluate(&runs, &[], &[], &[]).unwrap();
341        assert!(result.passes);
342        assert_eq!(result.gross_to_net_accuracy, 1.0);
343        assert_eq!(result.component_sum_accuracy, 1.0);
344        assert_eq!(result.run_sum_accuracy, 1.0);
345    }
346
347    #[test]
348    fn test_broken_gross_to_net() {
349        let evaluator = HrPayrollEvaluator::new();
350        let mut item = valid_line_item();
351        item.net_pay = 4000.0; // Wrong: should be 3500
352
353        let runs = vec![PayrollRunData {
354            run_id: "PR001".to_string(),
355            total_net_pay: 4000.0,
356            line_items: vec![item],
357        }];
358
359        let result = evaluator.evaluate(&runs, &[], &[], &[]).unwrap();
360        assert!(!result.passes);
361        assert!(result.gross_to_net_accuracy < 1.0);
362    }
363
364    #[test]
365    fn test_empty_data() {
366        let evaluator = HrPayrollEvaluator::new();
367        let result = evaluator.evaluate(&[], &[], &[], &[]).unwrap();
368        assert!(result.passes);
369    }
370
371    #[test]
372    fn test_expense_report_consistency() {
373        let evaluator = HrPayrollEvaluator::new();
374        let expenses = vec![
375            ExpenseReportData {
376                report_id: "ER001".to_string(),
377                total_amount: 500.0,
378                line_items_sum: 500.0,
379                is_approved: true,
380                has_approver: true,
381            },
382            ExpenseReportData {
383                report_id: "ER002".to_string(),
384                total_amount: 300.0,
385                line_items_sum: 300.0,
386                is_approved: true,
387                has_approver: false, // Approved but no approver
388            },
389        ];
390
391        let result = evaluator.evaluate(&[], &[], &[], &expenses).unwrap();
392        assert_eq!(result.expense_line_item_sum_accuracy, 1.0);
393        assert_eq!(result.expense_approval_consistency, 0.5);
394    }
395}