Skip to main content

datasynth_generators/
it_controls_generator.rs

1//! IT Controls generator — access logs and change management records.
2//!
3//! Generates realistic IT access logs and change management records for
4//! ITGC (IT General Controls) testing, supporting ISA 315, ISA 330,
5//! and SOX 404 audit procedures.
6
7use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime};
8use datasynth_core::models::{AccessLog, ChangeManagementRecord};
9use datasynth_core::utils::seeded_rng;
10use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
11use rand::prelude::*;
12use rand_chacha::ChaCha8Rng;
13
14// ---------------------------------------------------------------------------
15// Constants
16// ---------------------------------------------------------------------------
17
18/// Actions and their approximate cumulative weights for selection.
19const ACCESS_ACTIONS: &[(&str, f64)] = &[
20    ("login", 0.60),
21    ("logout", 0.85),
22    ("failed_login", 0.90),
23    ("privilege_change", 0.95),
24    ("data_export", 1.00),
25];
26
27/// Change types and their approximate cumulative weights.
28const CHANGE_TYPES: &[(&str, f64)] = &[
29    ("config_change", 0.30),
30    ("code_deployment", 0.55),
31    ("patch", 0.75),
32    ("access_change", 0.90),
33    ("emergency_fix", 1.00),
34];
35
36/// Description templates per change type.
37const CONFIG_CHANGE_DESCRIPTIONS: &[&str] = &[
38    "Updated firewall rules for DMZ",
39    "Modified database connection pool settings",
40    "Changed application timeout parameters",
41    "Updated email relay configuration",
42    "Modified backup retention policy",
43    "Adjusted logging verbosity levels",
44    "Changed SSL/TLS certificate configuration",
45    "Updated LDAP authentication settings",
46];
47
48const CODE_DEPLOYMENT_DESCRIPTIONS: &[&str] = &[
49    "Deployed financial reporting module v2.3",
50    "Released hotfix for invoice processing",
51    "Deployed updated reconciliation engine",
52    "Released new user interface components",
53    "Deployed API gateway update",
54    "Released batch processing optimization",
55    "Deployed security patch for web application",
56    "Released data migration scripts",
57];
58
59const PATCH_DESCRIPTIONS: &[&str] = &[
60    "Applied OS security patch KB-2024-001",
61    "Updated database server to latest patch level",
62    "Applied middleware security update",
63    "Patched web server vulnerability CVE-2024-1234",
64    "Applied ERP kernel update",
65    "Updated antivirus definitions",
66    "Applied network firmware update",
67    "Patched authentication module vulnerability",
68];
69
70const ACCESS_CHANGE_DESCRIPTIONS: &[&str] = &[
71    "Granted read access to financial reports",
72    "Revoked terminated employee access",
73    "Modified role assignment for department transfer",
74    "Added privileged access for system maintenance",
75    "Updated service account permissions",
76    "Removed legacy admin access rights",
77    "Granted vendor portal access",
78    "Modified segregation of duties profile",
79];
80
81const EMERGENCY_FIX_DESCRIPTIONS: &[&str] = &[
82    "Emergency fix for production outage",
83    "Critical security vulnerability remediation",
84    "Emergency database recovery procedure",
85    "Urgent fix for data corruption issue",
86    "Emergency patch for authentication bypass",
87    "Critical fix for payment processing failure",
88    "Emergency rollback of failed deployment",
89    "Urgent fix for regulatory reporting deadline",
90];
91
92const TEST_EVIDENCE_TEMPLATES: &[&str] = &[
93    "UAT sign-off document ref: UAT-2024-{:04}",
94    "Regression test suite passed: TS-{:04}",
95    "Integration test report: ITR-{:04}",
96    "Performance test results: PTR-{:04}",
97    "Security scan report: SEC-{:04}",
98    "User acceptance testing completed: UAT-{:04}",
99];
100
101// ---------------------------------------------------------------------------
102// Generator
103// ---------------------------------------------------------------------------
104
105/// Generates [`AccessLog`] and [`ChangeManagementRecord`] entries for ITGC testing.
106pub struct ItControlsGenerator {
107    rng: ChaCha8Rng,
108    uuid_factory: DeterministicUuidFactory,
109}
110
111impl ItControlsGenerator {
112    /// Create a new generator with the given seed.
113    pub fn new(seed: u64) -> Self {
114        Self {
115            rng: seeded_rng(seed, 0),
116            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::ItControls),
117        }
118    }
119
120    /// Generate IT access logs for the given employees and systems.
121    ///
122    /// Produces 10-30 log entries per employee per month with realistic
123    /// distributions:
124    /// - Actions: login (60%), logout (25%), failed_login (5%),
125    ///   privilege_change (5%), data_export (5%)
126    /// - 80% of events during business hours (8am-6pm)
127    /// - Failed logins clustered in brute-force patterns (3-5 consecutive)
128    /// - IP addresses from internal 10.0.0.0/8 range
129    pub fn generate_access_logs(
130        &mut self,
131        employee_ids: &[(String, String)], // (id, name) pairs
132        systems: &[String],
133        start_date: NaiveDate,
134        period_months: u32,
135    ) -> Vec<AccessLog> {
136        if employee_ids.is_empty() || systems.is_empty() {
137            return Vec::new();
138        }
139
140        let mut logs = Vec::new();
141
142        for month_offset in 0..period_months {
143            let year = start_date.year() + (start_date.month0() + month_offset) as i32 / 12;
144            let month = (start_date.month0() + month_offset) % 12 + 1;
145            let days_in_month = days_in_month(year, month);
146
147            for (user_id, user_name) in employee_ids {
148                let log_count = self.rng.random_range(10u32..=30);
149                // Assign a consistent primary system and IP for this employee
150                let primary_system = &systems[self.rng.random_range(0..systems.len())];
151                let ip_address = self.generate_ip();
152
153                // Decide whether this employee gets a failed login cluster this month
154                let has_failed_cluster = self.rng.random_bool(0.08);
155                let cluster_day = if has_failed_cluster {
156                    self.rng.random_range(1..=days_in_month)
157                } else {
158                    1 // unused
159                };
160
161                for i in 0..log_count {
162                    let day = self.rng.random_range(1..=days_in_month);
163                    let (hour, minute, second) = self.generate_time();
164
165                    let Some(date) = NaiveDate::from_ymd_opt(year, month, day) else {
166                        continue;
167                    };
168                    let Some(time) = NaiveTime::from_hms_opt(hour, minute, second) else {
169                        continue;
170                    };
171                    let timestamp = NaiveDateTime::new(date, time);
172
173                    let (action, success) = self.pick_action();
174                    let system = if self.rng.random_bool(0.7) {
175                        primary_system.clone()
176                    } else {
177                        systems[self.rng.random_range(0..systems.len())].clone()
178                    };
179
180                    let session_duration = if action == "logout" {
181                        Some(self.rng.random_range(5u32..=480))
182                    } else {
183                        None
184                    };
185
186                    logs.push(AccessLog {
187                        log_id: self.uuid_factory.next(),
188                        timestamp,
189                        user_id: user_id.clone(),
190                        user_name: user_name.clone(),
191                        system,
192                        action,
193                        success,
194                        ip_address: ip_address.clone(),
195                        session_duration_minutes: session_duration,
196                    });
197
198                    // Insert failed login cluster if applicable
199                    if has_failed_cluster && i == 0 {
200                        let cluster_size = self.rng.random_range(3u32..=5);
201                        let Some(cluster_date) = NaiveDate::from_ymd_opt(year, month, cluster_day)
202                        else {
203                            continue;
204                        };
205
206                        for j in 0..cluster_size {
207                            let cluster_minute = self.rng.random_range(0u32..=2);
208                            let cluster_second = self.rng.random_range(0u32..=59);
209                            let cluster_hour = self.rng.random_range(1u32..=5); // off-hours
210                            let Some(ct) = NaiveTime::from_hms_opt(
211                                cluster_hour,
212                                cluster_minute + j,
213                                cluster_second,
214                            ) else {
215                                continue;
216                            };
217
218                            logs.push(AccessLog {
219                                log_id: self.uuid_factory.next(),
220                                timestamp: NaiveDateTime::new(cluster_date, ct),
221                                user_id: user_id.clone(),
222                                user_name: user_name.clone(),
223                                system: primary_system.clone(),
224                                action: "failed_login".to_string(),
225                                success: false,
226                                ip_address: self.generate_ip(), // different IP = external attacker
227                                session_duration_minutes: None,
228                            });
229                        }
230                    }
231                }
232            }
233        }
234
235        // Sort chronologically
236        logs.sort_by_key(|l| l.timestamp);
237        logs
238    }
239
240    /// Generate change management records for the given systems and period.
241    ///
242    /// Produces 5-15 changes per month with realistic distributions:
243    /// - Types: config_change (30%), code_deployment (25%), patch (20%),
244    ///   access_change (15%), emergency_fix (10%)
245    /// - 90% have approval (10% gap = ITGC finding)
246    /// - 85% tested before deployment
247    /// - 95% have rollback plans
248    /// - Emergency fixes: lower approval/testing rates (realistic weakness)
249    pub fn generate_change_records(
250        &mut self,
251        employee_ids: &[(String, String)],
252        systems: &[String],
253        start_date: NaiveDate,
254        period_months: u32,
255    ) -> Vec<ChangeManagementRecord> {
256        if employee_ids.is_empty() || systems.is_empty() {
257            return Vec::new();
258        }
259
260        let mut records = Vec::new();
261
262        for month_offset in 0..period_months {
263            let year = start_date.year() + (start_date.month0() + month_offset) as i32 / 12;
264            let month = (start_date.month0() + month_offset) % 12 + 1;
265            let days_in_month = days_in_month(year, month);
266
267            let changes_this_month = self.rng.random_range(5u32..=15);
268
269            for _ in 0..changes_this_month {
270                let change_type = self.pick_change_type();
271                let system = &systems[self.rng.random_range(0..systems.len())];
272                let description = self.pick_description(&change_type);
273
274                let requester_idx = self.rng.random_range(0..employee_ids.len());
275                let requested_by = employee_ids[requester_idx].1.clone();
276
277                // Pick implementer (different from requester when possible)
278                let implementer_idx = if employee_ids.len() > 1 {
279                    let mut idx = self.rng.random_range(0..employee_ids.len());
280                    if idx == requester_idx {
281                        idx = (idx + 1) % employee_ids.len();
282                    }
283                    idx
284                } else {
285                    0
286                };
287                let implemented_by = employee_ids[implementer_idx].1.clone();
288
289                // Approval: emergency fixes have ~30% approval, others ~95%
290                let is_emergency = change_type == "emergency_fix";
291                let has_approval = if is_emergency {
292                    self.rng.random_bool(0.30)
293                } else {
294                    self.rng.random_bool(0.95)
295                };
296
297                let approved_by = if has_approval {
298                    // Pick approver (different from requester and implementer when possible)
299                    let mut approver_idx = self.rng.random_range(0..employee_ids.len());
300                    if employee_ids.len() > 2 {
301                        while approver_idx == requester_idx || approver_idx == implementer_idx {
302                            approver_idx = self.rng.random_range(0..employee_ids.len());
303                        }
304                    }
305                    Some(employee_ids[approver_idx].1.clone())
306                } else {
307                    None
308                };
309
310                // Testing: emergency fixes have ~20% testing, others ~90%
311                let tested = if is_emergency {
312                    self.rng.random_bool(0.20)
313                } else {
314                    self.rng.random_bool(0.90)
315                };
316
317                let test_evidence = if tested {
318                    let evidence_num = self.rng.random_range(1u32..=9999);
319                    let template = TEST_EVIDENCE_TEMPLATES
320                        [self.rng.random_range(0..TEST_EVIDENCE_TEMPLATES.len())];
321                    Some(template.replace("{:04}", &format!("{:04}", evidence_num)))
322                } else {
323                    None
324                };
325
326                // Rollback plan: emergency fixes have ~50%, others ~98%
327                let rollback_plan = if is_emergency {
328                    self.rng.random_bool(0.50)
329                } else {
330                    self.rng.random_bool(0.98)
331                };
332
333                // Request date: random day in the month
334                let request_day = self.rng.random_range(1..=days_in_month);
335                let request_hour = self.rng.random_range(8u32..=17);
336                let request_minute = self.rng.random_range(0u32..=59);
337                let Some(request_date_d) = NaiveDate::from_ymd_opt(year, month, request_day) else {
338                    continue;
339                };
340                let Some(request_time) = NaiveTime::from_hms_opt(request_hour, request_minute, 0)
341                else {
342                    continue;
343                };
344                let request_date = NaiveDateTime::new(request_date_d, request_time);
345
346                // Implementation date: 0-14 days after request
347                // Emergency fixes: 0-1 days; others: 1-14 days
348                let impl_lag_days = if is_emergency {
349                    self.rng.random_range(0i64..=1)
350                } else {
351                    self.rng.random_range(1i64..=14)
352                };
353                let impl_date_d = request_date_d + chrono::Duration::days(impl_lag_days);
354                let impl_hour = self.rng.random_range(8u32..=22);
355                let impl_minute = self.rng.random_range(0u32..=59);
356                let Some(impl_time) = NaiveTime::from_hms_opt(impl_hour, impl_minute, 0) else {
357                    continue;
358                };
359                let implementation_date = NaiveDateTime::new(impl_date_d, impl_time);
360
361                records.push(ChangeManagementRecord {
362                    change_id: self.uuid_factory.next(),
363                    system: system.clone(),
364                    change_type,
365                    description,
366                    requested_by,
367                    approved_by,
368                    implemented_by,
369                    request_date,
370                    implementation_date,
371                    tested,
372                    test_evidence,
373                    rollback_plan,
374                });
375            }
376        }
377
378        // Sort by request date
379        records.sort_by_key(|r| r.request_date);
380        records
381    }
382
383    // -----------------------------------------------------------------------
384    // Helpers
385    // -----------------------------------------------------------------------
386
387    /// Pick an action based on weighted distribution.
388    fn pick_action(&mut self) -> (String, bool) {
389        let r: f64 = self.rng.random_range(0.0..1.0);
390        for &(action, threshold) in ACCESS_ACTIONS {
391            if r < threshold {
392                let success = action != "failed_login";
393                return (action.to_string(), success);
394            }
395        }
396        ("login".to_string(), true)
397    }
398
399    /// Pick a change type based on weighted distribution.
400    fn pick_change_type(&mut self) -> String {
401        let r: f64 = self.rng.random_range(0.0..1.0);
402        for &(ct, threshold) in CHANGE_TYPES {
403            if r < threshold {
404                return ct.to_string();
405            }
406        }
407        "config_change".to_string()
408    }
409
410    /// Pick a description template for a given change type.
411    fn pick_description(&mut self, change_type: &str) -> String {
412        let pool = match change_type {
413            "config_change" => CONFIG_CHANGE_DESCRIPTIONS,
414            "code_deployment" => CODE_DEPLOYMENT_DESCRIPTIONS,
415            "patch" => PATCH_DESCRIPTIONS,
416            "access_change" => ACCESS_CHANGE_DESCRIPTIONS,
417            "emergency_fix" => EMERGENCY_FIX_DESCRIPTIONS,
418            _ => CONFIG_CHANGE_DESCRIPTIONS,
419        };
420        pool.choose(&mut self.rng)
421            .map(|s| s.to_string())
422            .unwrap_or_else(|| "System change".to_string())
423    }
424
425    /// Generate a timestamp hour/minute/second with 80% business hours bias.
426    fn generate_time(&mut self) -> (u32, u32, u32) {
427        let is_business_hours = self.rng.random_bool(0.80);
428        let hour = if is_business_hours {
429            self.rng.random_range(8u32..=17)
430        } else {
431            // Off-hours: 0-7 or 18-23
432            if self.rng.random_bool(0.5) {
433                self.rng.random_range(0u32..=7)
434            } else {
435                self.rng.random_range(18u32..=23)
436            }
437        };
438        let minute = self.rng.random_range(0u32..=59);
439        let second = self.rng.random_range(0u32..=59);
440        (hour, minute, second)
441    }
442
443    /// Generate an IP address in the 10.0.0.0/8 range.
444    fn generate_ip(&mut self) -> String {
445        format!(
446            "10.{}.{}.{}",
447            self.rng.random_range(0u8..=255),
448            self.rng.random_range(0u8..=255),
449            self.rng.random_range(1u8..=254),
450        )
451    }
452}
453
454/// Return the number of days in the given month.
455fn days_in_month(year: i32, month: u32) -> u32 {
456    // Get the first day of the next month, then subtract one day
457    let (next_year, next_month) = if month == 12 {
458        (year + 1, 1)
459    } else {
460        (year, month + 1)
461    };
462    NaiveDate::from_ymd_opt(next_year, next_month, 1)
463        .and_then(|d| d.pred_opt())
464        .map(|d| d.day())
465        .unwrap_or(28)
466}
467
468// ---------------------------------------------------------------------------
469// Tests
470// ---------------------------------------------------------------------------
471
472#[cfg(test)]
473#[allow(clippy::unwrap_used)]
474mod tests {
475    use super::*;
476    use chrono::Timelike;
477
478    fn sample_employees() -> Vec<(String, String)> {
479        (1..=10)
480            .map(|i| (format!("EMP-{:04}", i), format!("Employee {}", i)))
481            .collect()
482    }
483
484    fn sample_systems() -> Vec<String> {
485        vec![
486            "SAP-FI".to_string(),
487            "Active Directory".to_string(),
488            "Oracle-HR".to_string(),
489            "ServiceNow".to_string(),
490        ]
491    }
492
493    #[test]
494    fn test_access_logs_generated() {
495        let mut gen = ItControlsGenerator::new(42);
496        let logs = gen.generate_access_logs(
497            &sample_employees(),
498            &sample_systems(),
499            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
500            3,
501        );
502        assert!(!logs.is_empty(), "should produce access logs");
503        for log in &logs {
504            assert!(!log.user_id.is_empty());
505            assert!(!log.user_name.is_empty());
506            assert!(!log.system.is_empty());
507            assert!(!log.action.is_empty());
508            assert!(!log.ip_address.is_empty());
509            assert!(log.ip_address.starts_with("10."));
510        }
511    }
512
513    #[test]
514    fn test_access_log_business_hours() {
515        let mut gen = ItControlsGenerator::new(42);
516        let logs = gen.generate_access_logs(
517            &sample_employees(),
518            &sample_systems(),
519            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
520            6,
521        );
522        let total = logs.len() as f64;
523        let business_hours_count = logs
524            .iter()
525            .filter(|l| {
526                let hour = l.timestamp.time().hour();
527                (8..=17).contains(&hour)
528            })
529            .count() as f64;
530        let ratio = business_hours_count / total;
531        assert!(
532            ratio > 0.70,
533            "expected >70% business hours, got {:.1}%",
534            ratio * 100.0
535        );
536    }
537
538    #[test]
539    fn test_failed_login_rate() {
540        let mut gen = ItControlsGenerator::new(42);
541        let logs = gen.generate_access_logs(
542            &sample_employees(),
543            &sample_systems(),
544            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
545            6,
546        );
547        let total = logs.len() as f64;
548        let failed = logs.iter().filter(|l| l.action == "failed_login").count() as f64;
549        let rate = failed / total;
550        assert!(
551            rate >= 0.02 && rate <= 0.15,
552            "expected 2-15% failed login rate, got {:.1}%",
553            rate * 100.0
554        );
555    }
556
557    #[test]
558    fn test_access_log_references_employees() {
559        let employees = sample_employees();
560        let employee_ids: std::collections::HashSet<&str> =
561            employees.iter().map(|(id, _)| id.as_str()).collect();
562
563        let mut gen = ItControlsGenerator::new(42);
564        let logs = gen.generate_access_logs(
565            &employees,
566            &sample_systems(),
567            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
568            3,
569        );
570
571        for log in &logs {
572            assert!(
573                employee_ids.contains(log.user_id.as_str()),
574                "user_id {} should come from employee input",
575                log.user_id
576            );
577        }
578    }
579
580    #[test]
581    fn test_change_records_generated() {
582        let mut gen = ItControlsGenerator::new(42);
583        let records = gen.generate_change_records(
584            &sample_employees(),
585            &sample_systems(),
586            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
587            3,
588        );
589        assert!(!records.is_empty(), "should produce change records");
590        for r in &records {
591            assert!(!r.system.is_empty());
592            assert!(!r.change_type.is_empty());
593            assert!(!r.description.is_empty());
594            assert!(!r.requested_by.is_empty());
595            assert!(!r.implemented_by.is_empty());
596        }
597    }
598
599    #[test]
600    fn test_change_approval_rate() {
601        let mut gen = ItControlsGenerator::new(42);
602        let records = gen.generate_change_records(
603            &sample_employees(),
604            &sample_systems(),
605            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
606            12,
607        );
608        let total = records.len() as f64;
609        let approved = records.iter().filter(|r| r.approved_by.is_some()).count() as f64;
610        let rate = approved / total;
611        // Overall rate should be ~85-95% (mix of emergency and normal)
612        assert!(
613            rate > 0.75 && rate < 0.99,
614            "expected ~85-95% approval rate, got {:.1}%",
615            rate * 100.0
616        );
617    }
618
619    #[test]
620    fn test_emergency_fixes_unapproved() {
621        let mut gen = ItControlsGenerator::new(42);
622        let records = gen.generate_change_records(
623            &sample_employees(),
624            &sample_systems(),
625            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
626            24,
627        );
628
629        let emergency: Vec<_> = records
630            .iter()
631            .filter(|r| r.change_type == "emergency_fix")
632            .collect();
633        let non_emergency: Vec<_> = records
634            .iter()
635            .filter(|r| r.change_type != "emergency_fix")
636            .collect();
637
638        if !emergency.is_empty() && !non_emergency.is_empty() {
639            let emergency_approval_rate =
640                emergency.iter().filter(|r| r.approved_by.is_some()).count() as f64
641                    / emergency.len() as f64;
642            let non_emergency_approval_rate = non_emergency
643                .iter()
644                .filter(|r| r.approved_by.is_some())
645                .count() as f64
646                / non_emergency.len() as f64;
647
648            assert!(
649                emergency_approval_rate < non_emergency_approval_rate,
650                "emergency fixes ({:.0}%) should have lower approval rate than normal changes ({:.0}%)",
651                emergency_approval_rate * 100.0,
652                non_emergency_approval_rate * 100.0
653            );
654        }
655    }
656
657    #[test]
658    fn test_change_dates_ordered() {
659        let mut gen = ItControlsGenerator::new(42);
660        let records = gen.generate_change_records(
661            &sample_employees(),
662            &sample_systems(),
663            NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
664            6,
665        );
666
667        for r in &records {
668            // implementation_date should be on or after request_date (comparing date portion)
669            assert!(
670                r.implementation_date.date() >= r.request_date.date(),
671                "implementation date {} should be >= request date {} for change {}",
672                r.implementation_date,
673                r.request_date,
674                r.change_id
675            );
676        }
677    }
678}