Skip to main content

datasynth_generators/disruption/
mod.rs

1//! Operational disruption modeling.
2//!
3//! Models realistic operational disruptions that can be injected into generated data:
4//! - System outages (missing data windows)
5//! - Migration artifacts (format changes, dual-running periods)
6//! - Process changes (workflow shifts, policy changes)
7//! - Data recovery patterns (backfill, catch-up processing)
8
9use chrono::NaiveDate;
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12
13/// Types of operational disruptions.
14#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
15pub enum DisruptionType {
16    /// System outage causing missing data
17    SystemOutage(OutageConfig),
18    /// System migration with format changes
19    SystemMigration(MigrationConfig),
20    /// Process or policy change
21    ProcessChange(ProcessChangeConfig),
22    /// Data recovery or backfill
23    DataRecovery(RecoveryConfig),
24    /// Regulatory compliance change
25    RegulatoryChange(RegulatoryConfig),
26}
27
28/// Configuration for a system outage.
29#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
30pub struct OutageConfig {
31    /// Start of the outage
32    pub start_date: NaiveDate,
33    /// End of the outage
34    pub end_date: NaiveDate,
35    /// Affected systems/modules
36    pub affected_systems: Vec<String>,
37    /// Whether data was completely lost vs just delayed
38    pub data_loss: bool,
39    /// Recovery mode (if not complete loss)
40    pub recovery_mode: Option<RecoveryMode>,
41    /// Outage cause for labeling
42    pub cause: OutageCause,
43}
44
45/// Cause of an outage.
46#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
47pub enum OutageCause {
48    /// Planned maintenance
49    PlannedMaintenance,
50    /// Unplanned system failure
51    SystemFailure,
52    /// Network connectivity issues
53    NetworkOutage,
54    /// Database issues
55    DatabaseFailure,
56    /// Third-party service unavailable
57    VendorOutage,
58    /// Security incident
59    SecurityIncident,
60    /// Natural disaster
61    Disaster,
62}
63
64/// How data was recovered after an outage.
65#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
66pub enum RecoveryMode {
67    /// Transactions processed after recovery with original dates
68    BackdatedRecovery,
69    /// Transactions processed with recovery date
70    CurrentDateRecovery,
71    /// Mix of both approaches
72    MixedRecovery,
73    /// Manual journal entries to reconcile
74    ManualReconciliation,
75}
76
77/// Configuration for a system migration.
78#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
79pub struct MigrationConfig {
80    /// Migration go-live date
81    pub go_live_date: NaiveDate,
82    /// Dual-running period start (before go-live)
83    pub dual_run_start: Option<NaiveDate>,
84    /// Dual-running period end (after go-live)
85    pub dual_run_end: Option<NaiveDate>,
86    /// Source system name
87    pub source_system: String,
88    /// Target system name
89    pub target_system: String,
90    /// Format changes applied
91    pub format_changes: Vec<FormatChange>,
92    /// Account mapping changes
93    pub account_remapping: HashMap<String, String>,
94    /// Data quality issues during migration
95    pub migration_issues: Vec<MigrationIssue>,
96}
97
98/// Types of format changes during migration.
99#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
100pub enum FormatChange {
101    /// Date format change (e.g., MM/DD/YYYY to YYYY-MM-DD)
102    DateFormat {
103        old_format: String,
104        new_format: String,
105    },
106    /// Amount precision change
107    AmountPrecision { old_decimals: u8, new_decimals: u8 },
108    /// Currency code format
109    CurrencyCode {
110        old_format: String,
111        new_format: String,
112    },
113    /// Account number format
114    AccountFormat {
115        old_pattern: String,
116        new_pattern: String,
117    },
118    /// Reference number format
119    ReferenceFormat {
120        old_pattern: String,
121        new_pattern: String,
122    },
123    /// Text encoding change
124    TextEncoding {
125        old_encoding: String,
126        new_encoding: String,
127    },
128    /// Field length change
129    FieldLength {
130        field: String,
131        old_length: usize,
132        new_length: usize,
133    },
134}
135
136/// Issues that can occur during migration.
137#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
138pub enum MigrationIssue {
139    /// Duplicate records created
140    DuplicateRecords { affected_count: usize },
141    /// Missing records not migrated
142    MissingRecords { affected_count: usize },
143    /// Truncated data
144    TruncatedData {
145        field: String,
146        affected_count: usize,
147    },
148    /// Encoding corruption
149    EncodingCorruption { affected_count: usize },
150    /// Mismatched balances
151    BalanceMismatch { variance: f64 },
152    /// Orphaned references
153    OrphanedReferences { affected_count: usize },
154}
155
156/// Configuration for process changes.
157#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
158pub struct ProcessChangeConfig {
159    /// Effective date of the change
160    pub effective_date: NaiveDate,
161    /// Type of process change
162    pub change_type: ProcessChangeType,
163    /// Transition period length in days
164    pub transition_days: u32,
165    /// Whether retroactive changes were applied
166    pub retroactive: bool,
167}
168
169/// Types of process changes.
170#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
171pub enum ProcessChangeType {
172    /// Approval threshold change
173    ApprovalThreshold {
174        old_threshold: f64,
175        new_threshold: f64,
176    },
177    /// New approval level added
178    NewApprovalLevel { level_name: String, threshold: f64 },
179    /// Approval level removed
180    RemovedApprovalLevel { level_name: String },
181    /// Segregation of duties change
182    SodPolicyChange {
183        new_conflicts: Vec<(String, String)>,
184    },
185    /// Account posting rules change
186    PostingRuleChange { affected_accounts: Vec<String> },
187    /// Vendor management change
188    VendorPolicyChange { policy_name: String },
189    /// Period close procedure change
190    CloseProcessChange {
191        old_close_day: u8,
192        new_close_day: u8,
193    },
194    /// Document retention change
195    RetentionPolicyChange { old_years: u8, new_years: u8 },
196}
197
198/// Configuration for data recovery scenarios.
199#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
200pub struct RecoveryConfig {
201    /// When recovery started
202    pub recovery_start: NaiveDate,
203    /// When recovery completed
204    pub recovery_end: NaiveDate,
205    /// Period being recovered
206    pub affected_period_start: NaiveDate,
207    /// Period being recovered end
208    pub affected_period_end: NaiveDate,
209    /// Recovery approach
210    pub recovery_type: RecoveryType,
211    /// Quality of recovered data
212    pub data_quality: RecoveredDataQuality,
213}
214
215/// Types of data recovery.
216#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
217pub enum RecoveryType {
218    /// Full backup restoration
219    BackupRestore,
220    /// Reconstruction from source documents
221    SourceReconstruction,
222    /// Interface file reprocessing
223    InterfaceReplay,
224    /// Manual entry from paper records
225    ManualReentry,
226    /// Partial recovery with estimates
227    PartialWithEstimates,
228}
229
230/// Quality level of recovered data.
231#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
232pub enum RecoveredDataQuality {
233    /// Complete and accurate
234    Complete,
235    /// Minor discrepancies
236    MinorDiscrepancies,
237    /// Estimated values used
238    EstimatedValues,
239    /// Significant gaps remain
240    PartialRecovery,
241}
242
243/// Configuration for regulatory changes.
244#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
245pub struct RegulatoryConfig {
246    /// Effective date
247    pub effective_date: NaiveDate,
248    /// Regulation name
249    pub regulation_name: String,
250    /// Type of regulatory change
251    pub change_type: RegulatoryChangeType,
252    /// Grace period in days
253    pub grace_period_days: u32,
254}
255
256/// Types of regulatory changes.
257#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
258pub enum RegulatoryChangeType {
259    /// New reporting requirement
260    NewReporting { report_name: String },
261    /// Changed chart of accounts structure
262    CoaRestructure,
263    /// New tax rules
264    TaxChange { jurisdiction: String },
265    /// Revenue recognition change
266    RevenueRecognition,
267    /// Lease accounting change
268    LeaseAccounting,
269    /// Data privacy requirement
270    DataPrivacy { regulation: String },
271}
272
273/// A disruption event with timing and effects.
274#[derive(Debug, Clone, Serialize, Deserialize)]
275pub struct DisruptionEvent {
276    /// Unique identifier
277    pub event_id: String,
278    /// Type of disruption
279    pub disruption_type: DisruptionType,
280    /// Detailed description
281    pub description: String,
282    /// Impact severity (1-5)
283    pub severity: u8,
284    /// Affected company codes
285    pub affected_companies: Vec<String>,
286    /// Labels for ML training
287    pub labels: HashMap<String, String>,
288}
289
290/// Manages disruption scenarios for data generation.
291pub struct DisruptionManager {
292    /// Active disruption events
293    events: Vec<DisruptionEvent>,
294    /// Event counter for ID generation
295    event_counter: u64,
296}
297
298impl DisruptionManager {
299    /// Create a new disruption manager.
300    pub fn new() -> Self {
301        Self {
302            events: Vec::new(),
303            event_counter: 0,
304        }
305    }
306
307    /// Add a disruption event.
308    pub fn add_event(
309        &mut self,
310        disruption_type: DisruptionType,
311        description: &str,
312        severity: u8,
313        affected_companies: Vec<String>,
314    ) -> String {
315        self.event_counter += 1;
316        let event_id = format!("DISRUPT-{:06}", self.event_counter);
317
318        let labels = self.generate_labels(&disruption_type);
319
320        let event = DisruptionEvent {
321            event_id: event_id.clone(),
322            disruption_type,
323            description: description.to_string(),
324            severity,
325            affected_companies,
326            labels,
327        };
328
329        self.events.push(event);
330        event_id
331    }
332
333    /// Generate ML labels for a disruption type.
334    fn generate_labels(&self, disruption_type: &DisruptionType) -> HashMap<String, String> {
335        let mut labels = HashMap::new();
336
337        match disruption_type {
338            DisruptionType::SystemOutage(config) => {
339                labels.insert("disruption_category".to_string(), "outage".to_string());
340                labels.insert("cause".to_string(), format!("{:?}", config.cause));
341                labels.insert("data_loss".to_string(), config.data_loss.to_string());
342            }
343            DisruptionType::SystemMigration(config) => {
344                labels.insert("disruption_category".to_string(), "migration".to_string());
345                labels.insert("source_system".to_string(), config.source_system.clone());
346                labels.insert("target_system".to_string(), config.target_system.clone());
347            }
348            DisruptionType::ProcessChange(config) => {
349                labels.insert(
350                    "disruption_category".to_string(),
351                    "process_change".to_string(),
352                );
353                labels.insert(
354                    "change_type".to_string(),
355                    format!("{:?}", config.change_type),
356                );
357                labels.insert("retroactive".to_string(), config.retroactive.to_string());
358            }
359            DisruptionType::DataRecovery(config) => {
360                labels.insert("disruption_category".to_string(), "recovery".to_string());
361                labels.insert(
362                    "recovery_type".to_string(),
363                    format!("{:?}", config.recovery_type),
364                );
365                labels.insert(
366                    "data_quality".to_string(),
367                    format!("{:?}", config.data_quality),
368                );
369            }
370            DisruptionType::RegulatoryChange(config) => {
371                labels.insert("disruption_category".to_string(), "regulatory".to_string());
372                labels.insert("regulation".to_string(), config.regulation_name.clone());
373                labels.insert(
374                    "change_type".to_string(),
375                    format!("{:?}", config.change_type),
376                );
377            }
378        }
379
380        labels
381    }
382
383    /// Check if a date falls within any outage period.
384    pub fn is_in_outage(&self, date: NaiveDate, company_code: &str) -> Option<&DisruptionEvent> {
385        self.events.iter().find(|event| {
386            if !event.affected_companies.contains(&company_code.to_string())
387                && !event.affected_companies.is_empty()
388            {
389                return false;
390            }
391
392            match &event.disruption_type {
393                DisruptionType::SystemOutage(config) => {
394                    date >= config.start_date && date <= config.end_date
395                }
396                _ => false,
397            }
398        })
399    }
400
401    /// Check if a date is in a migration dual-run period.
402    pub fn is_in_dual_run(&self, date: NaiveDate, company_code: &str) -> Option<&DisruptionEvent> {
403        self.events.iter().find(|event| {
404            if !event.affected_companies.contains(&company_code.to_string())
405                && !event.affected_companies.is_empty()
406            {
407                return false;
408            }
409
410            match &event.disruption_type {
411                DisruptionType::SystemMigration(config) => {
412                    let start = config.dual_run_start.unwrap_or(config.go_live_date);
413                    let end = config.dual_run_end.unwrap_or(config.go_live_date);
414                    date >= start && date <= end
415                }
416                _ => false,
417            }
418        })
419    }
420
421    /// Get format changes applicable to a date.
422    pub fn get_format_changes(&self, date: NaiveDate, company_code: &str) -> Vec<&FormatChange> {
423        let mut changes = Vec::new();
424
425        for event in &self.events {
426            if !event.affected_companies.contains(&company_code.to_string())
427                && !event.affected_companies.is_empty()
428            {
429                continue;
430            }
431
432            if let DisruptionType::SystemMigration(config) = &event.disruption_type {
433                if date >= config.go_live_date {
434                    changes.extend(config.format_changes.iter());
435                }
436            }
437        }
438
439        changes
440    }
441
442    /// Get active process changes for a date.
443    pub fn get_active_process_changes(
444        &self,
445        date: NaiveDate,
446        company_code: &str,
447    ) -> Vec<&ProcessChangeConfig> {
448        self.events
449            .iter()
450            .filter(|event| {
451                event.affected_companies.contains(&company_code.to_string())
452                    || event.affected_companies.is_empty()
453            })
454            .filter_map(|event| match &event.disruption_type {
455                DisruptionType::ProcessChange(config) if date >= config.effective_date => {
456                    Some(config)
457                }
458                _ => None,
459            })
460            .collect()
461    }
462
463    /// Check if a date is in a recovery period.
464    pub fn is_in_recovery(&self, date: NaiveDate, company_code: &str) -> Option<&DisruptionEvent> {
465        self.events.iter().find(|event| {
466            if !event.affected_companies.contains(&company_code.to_string())
467                && !event.affected_companies.is_empty()
468            {
469                return false;
470            }
471
472            match &event.disruption_type {
473                DisruptionType::DataRecovery(config) => {
474                    date >= config.recovery_start && date <= config.recovery_end
475                }
476                _ => false,
477            }
478        })
479    }
480
481    /// Get all events.
482    pub fn events(&self) -> &[DisruptionEvent] {
483        &self.events
484    }
485
486    /// Get events affecting a specific company.
487    pub fn events_for_company(&self, company_code: &str) -> Vec<&DisruptionEvent> {
488        self.events
489            .iter()
490            .filter(|e| {
491                e.affected_companies.contains(&company_code.to_string())
492                    || e.affected_companies.is_empty()
493            })
494            .collect()
495    }
496}
497
498impl Default for DisruptionManager {
499    fn default() -> Self {
500        Self::new()
501    }
502}
503
504/// Effects that a disruption can have on generated data.
505#[derive(Debug, Clone, Default)]
506pub struct DisruptionEffect {
507    /// Skip generating data for this date
508    pub skip_generation: bool,
509    /// Apply format transformation
510    pub format_transform: Option<FormatChange>,
511    /// Add recovery/backfill markers
512    pub add_recovery_markers: bool,
513    /// Duplicate to secondary system
514    pub duplicate_to_system: Option<String>,
515    /// Apply process rule changes
516    pub process_changes: Vec<ProcessChangeType>,
517    /// Labels to add to generated records
518    pub labels: HashMap<String, String>,
519}
520
521/// Apply disruption effects to determine how data should be generated.
522pub fn compute_disruption_effect(
523    manager: &DisruptionManager,
524    date: NaiveDate,
525    company_code: &str,
526) -> DisruptionEffect {
527    let mut effect = DisruptionEffect::default();
528
529    // Check for outage
530    if let Some(outage_event) = manager.is_in_outage(date, company_code) {
531        if let DisruptionType::SystemOutage(config) = &outage_event.disruption_type {
532            if config.data_loss {
533                effect.skip_generation = true;
534            } else {
535                effect.add_recovery_markers = true;
536            }
537            effect
538                .labels
539                .insert("outage_event".to_string(), outage_event.event_id.clone());
540        }
541    }
542
543    // Check for dual-run
544    if let Some(migration_event) = manager.is_in_dual_run(date, company_code) {
545        if let DisruptionType::SystemMigration(config) = &migration_event.disruption_type {
546            effect.duplicate_to_system = Some(config.target_system.clone());
547            effect.labels.insert(
548                "migration_event".to_string(),
549                migration_event.event_id.clone(),
550            );
551        }
552    }
553
554    // Check for format changes
555    let format_changes = manager.get_format_changes(date, company_code);
556    if let Some(first_change) = format_changes.first() {
557        effect.format_transform = Some((*first_change).clone());
558    }
559
560    // Check for process changes
561    for process_change in manager.get_active_process_changes(date, company_code) {
562        effect
563            .process_changes
564            .push(process_change.change_type.clone());
565    }
566
567    // Check for recovery period
568    if let Some(recovery_event) = manager.is_in_recovery(date, company_code) {
569        effect.add_recovery_markers = true;
570        effect.labels.insert(
571            "recovery_event".to_string(),
572            recovery_event.event_id.clone(),
573        );
574    }
575
576    effect
577}
578
579#[cfg(test)]
580mod tests {
581    use super::*;
582
583    #[test]
584    fn test_outage_detection() {
585        let mut manager = DisruptionManager::new();
586
587        let outage = OutageConfig {
588            start_date: NaiveDate::from_ymd_opt(2024, 3, 15).unwrap(),
589            end_date: NaiveDate::from_ymd_opt(2024, 3, 17).unwrap(),
590            affected_systems: vec!["GL".to_string()],
591            data_loss: false,
592            recovery_mode: Some(RecoveryMode::BackdatedRecovery),
593            cause: OutageCause::SystemFailure,
594        };
595
596        manager.add_event(
597            DisruptionType::SystemOutage(outage),
598            "GL system outage",
599            3,
600            vec!["1000".to_string()],
601        );
602
603        // During outage
604        assert!(manager
605            .is_in_outage(NaiveDate::from_ymd_opt(2024, 3, 16).unwrap(), "1000")
606            .is_some());
607
608        // Before outage
609        assert!(manager
610            .is_in_outage(NaiveDate::from_ymd_opt(2024, 3, 14).unwrap(), "1000")
611            .is_none());
612
613        // Different company
614        assert!(manager
615            .is_in_outage(NaiveDate::from_ymd_opt(2024, 3, 16).unwrap(), "2000")
616            .is_none());
617    }
618
619    #[test]
620    fn test_migration_dual_run() {
621        let mut manager = DisruptionManager::new();
622
623        let migration = MigrationConfig {
624            go_live_date: NaiveDate::from_ymd_opt(2024, 7, 1).unwrap(),
625            dual_run_start: Some(NaiveDate::from_ymd_opt(2024, 6, 15).unwrap()),
626            dual_run_end: Some(NaiveDate::from_ymd_opt(2024, 7, 15).unwrap()),
627            source_system: "Legacy".to_string(),
628            target_system: "S4HANA".to_string(),
629            format_changes: vec![FormatChange::DateFormat {
630                old_format: "MM/DD/YYYY".to_string(),
631                new_format: "YYYY-MM-DD".to_string(),
632            }],
633            account_remapping: HashMap::new(),
634            migration_issues: Vec::new(),
635        };
636
637        manager.add_event(
638            DisruptionType::SystemMigration(migration),
639            "S/4HANA migration",
640            4,
641            vec![], // All companies
642        );
643
644        // During dual-run
645        assert!(manager
646            .is_in_dual_run(NaiveDate::from_ymd_opt(2024, 6, 20).unwrap(), "1000")
647            .is_some());
648
649        // After dual-run
650        assert!(manager
651            .is_in_dual_run(NaiveDate::from_ymd_opt(2024, 7, 20).unwrap(), "1000")
652            .is_none());
653    }
654
655    #[test]
656    fn test_process_change() {
657        let mut manager = DisruptionManager::new();
658
659        let process_change = ProcessChangeConfig {
660            effective_date: NaiveDate::from_ymd_opt(2024, 4, 1).unwrap(),
661            change_type: ProcessChangeType::ApprovalThreshold {
662                old_threshold: 10000.0,
663                new_threshold: 5000.0,
664            },
665            transition_days: 30,
666            retroactive: false,
667        };
668
669        manager.add_event(
670            DisruptionType::ProcessChange(process_change),
671            "Lower approval threshold",
672            2,
673            vec!["1000".to_string()],
674        );
675
676        // After change
677        let changes = manager
678            .get_active_process_changes(NaiveDate::from_ymd_opt(2024, 5, 1).unwrap(), "1000");
679        assert_eq!(changes.len(), 1);
680
681        // Before change
682        let changes = manager
683            .get_active_process_changes(NaiveDate::from_ymd_opt(2024, 3, 1).unwrap(), "1000");
684        assert_eq!(changes.len(), 0);
685    }
686
687    #[test]
688    fn test_compute_disruption_effect() {
689        let mut manager = DisruptionManager::new();
690
691        let outage = OutageConfig {
692            start_date: NaiveDate::from_ymd_opt(2024, 3, 15).unwrap(),
693            end_date: NaiveDate::from_ymd_opt(2024, 3, 17).unwrap(),
694            affected_systems: vec!["GL".to_string()],
695            data_loss: true,
696            recovery_mode: None,
697            cause: OutageCause::SystemFailure,
698        };
699
700        manager.add_event(
701            DisruptionType::SystemOutage(outage),
702            "GL system outage with data loss",
703            5,
704            vec!["1000".to_string()],
705        );
706
707        let effect = compute_disruption_effect(
708            &manager,
709            NaiveDate::from_ymd_opt(2024, 3, 16).unwrap(),
710            "1000",
711        );
712
713        assert!(effect.skip_generation);
714        assert!(effect.labels.contains_key("outage_event"));
715    }
716}