datasynth_config/
schema.rs

1//! Configuration schema for synthetic data generation.
2
3use datasynth_core::distributions::{
4    AmountDistributionConfig, DebitCreditDistributionConfig, EvenOddDistributionConfig,
5    LineItemDistributionConfig, SeasonalityConfig,
6};
7use datasynth_core::models::{CoAComplexity, IndustrySector};
8use serde::{Deserialize, Serialize};
9use std::path::PathBuf;
10
11/// Root configuration for the synthetic data generator.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct GeneratorConfig {
14    /// Global settings
15    pub global: GlobalConfig,
16    /// Company configuration
17    pub companies: Vec<CompanyConfig>,
18    /// Chart of Accounts configuration
19    pub chart_of_accounts: ChartOfAccountsConfig,
20    /// Transaction generation settings
21    #[serde(default)]
22    pub transactions: TransactionConfig,
23    /// Output configuration
24    pub output: OutputConfig,
25    /// Fraud simulation settings
26    #[serde(default)]
27    pub fraud: FraudConfig,
28    /// Data quality variation settings
29    #[serde(default)]
30    pub data_quality: DataQualitySchemaConfig,
31    /// Internal Controls System settings
32    #[serde(default)]
33    pub internal_controls: InternalControlsConfig,
34    /// Business process mix
35    #[serde(default)]
36    pub business_processes: BusinessProcessConfig,
37    /// User persona distribution
38    #[serde(default)]
39    pub user_personas: UserPersonaConfig,
40    /// Template configuration for realistic data
41    #[serde(default)]
42    pub templates: TemplateConfig,
43    /// Approval workflow configuration
44    #[serde(default)]
45    pub approval: ApprovalConfig,
46    /// Department structure configuration
47    #[serde(default)]
48    pub departments: DepartmentConfig,
49    /// Master data generation settings
50    #[serde(default)]
51    pub master_data: MasterDataConfig,
52    /// Document flow generation settings
53    #[serde(default)]
54    pub document_flows: DocumentFlowConfig,
55    /// Intercompany transaction settings
56    #[serde(default)]
57    pub intercompany: IntercompanyConfig,
58    /// Balance and trial balance settings
59    #[serde(default)]
60    pub balance: BalanceConfig,
61    /// OCPM (Object-Centric Process Mining) settings
62    #[serde(default)]
63    pub ocpm: OcpmConfig,
64    /// Audit engagement and workpaper generation settings
65    #[serde(default)]
66    pub audit: AuditGenerationConfig,
67    /// Banking KYC/AML transaction generation settings
68    #[serde(default)]
69    pub banking: datasynth_banking::BankingConfig,
70    /// Scenario configuration for metadata and tagging (Phase 1.3)
71    #[serde(default)]
72    pub scenario: ScenarioConfig,
73    /// Temporal drift configuration for simulating distribution changes over time (Phase 2.2)
74    #[serde(default)]
75    pub temporal: TemporalDriftConfig,
76    /// Graph export configuration for accounting network export
77    #[serde(default)]
78    pub graph_export: GraphExportConfig,
79}
80
81/// Graph export configuration for accounting network and ML training exports.
82///
83/// This section enables exporting generated data as graphs for:
84/// - Network reconstruction algorithms
85/// - Graph neural network training
86/// - Neo4j graph database import
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct GraphExportConfig {
89    /// Enable graph export.
90    #[serde(default)]
91    pub enabled: bool,
92
93    /// Graph types to generate.
94    #[serde(default = "default_graph_types")]
95    pub graph_types: Vec<GraphTypeConfig>,
96
97    /// Export formats to generate.
98    #[serde(default = "default_graph_formats")]
99    pub formats: Vec<GraphExportFormat>,
100
101    /// Train split ratio for ML datasets.
102    #[serde(default = "default_train_ratio")]
103    pub train_ratio: f64,
104
105    /// Validation split ratio for ML datasets.
106    #[serde(default = "default_val_ratio")]
107    pub validation_ratio: f64,
108
109    /// Random seed for train/val/test splits.
110    #[serde(default)]
111    pub split_seed: Option<u64>,
112
113    /// Output subdirectory for graph exports (relative to output directory).
114    #[serde(default = "default_graph_subdir")]
115    pub output_subdirectory: String,
116}
117
118fn default_graph_types() -> Vec<GraphTypeConfig> {
119    vec![GraphTypeConfig::default()]
120}
121
122fn default_graph_formats() -> Vec<GraphExportFormat> {
123    vec![GraphExportFormat::PytorchGeometric]
124}
125
126fn default_train_ratio() -> f64 {
127    0.7
128}
129
130fn default_val_ratio() -> f64 {
131    0.15
132}
133
134fn default_graph_subdir() -> String {
135    "graphs".to_string()
136}
137
138impl Default for GraphExportConfig {
139    fn default() -> Self {
140        Self {
141            enabled: false,
142            graph_types: default_graph_types(),
143            formats: default_graph_formats(),
144            train_ratio: 0.7,
145            validation_ratio: 0.15,
146            split_seed: None,
147            output_subdirectory: "graphs".to_string(),
148        }
149    }
150}
151
152/// Configuration for a specific graph type to export.
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct GraphTypeConfig {
155    /// Name identifier for this graph configuration.
156    #[serde(default = "default_graph_name")]
157    pub name: String,
158
159    /// Whether to aggregate parallel edges between the same nodes.
160    #[serde(default)]
161    pub aggregate_edges: bool,
162
163    /// Minimum edge weight to include (filters out small transactions).
164    #[serde(default)]
165    pub min_edge_weight: f64,
166
167    /// Whether to include document nodes (creates hub-and-spoke structure).
168    #[serde(default)]
169    pub include_document_nodes: bool,
170}
171
172fn default_graph_name() -> String {
173    "accounting_network".to_string()
174}
175
176impl Default for GraphTypeConfig {
177    fn default() -> Self {
178        Self {
179            name: "accounting_network".to_string(),
180            aggregate_edges: false,
181            min_edge_weight: 0.0,
182            include_document_nodes: false,
183        }
184    }
185}
186
187/// Export format for graph data.
188#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
189#[serde(rename_all = "snake_case")]
190pub enum GraphExportFormat {
191    /// PyTorch Geometric format (.npy files + metadata.json).
192    PytorchGeometric,
193    /// Neo4j format (CSV files + Cypher import scripts).
194    Neo4j,
195    /// Deep Graph Library format.
196    Dgl,
197}
198
199/// Scenario configuration for metadata, tagging, and ML training setup.
200///
201/// This section enables tracking the purpose and characteristics of a generation run.
202#[derive(Debug, Clone, Default, Serialize, Deserialize)]
203pub struct ScenarioConfig {
204    /// Tags for categorizing and filtering datasets.
205    /// Examples: "fraud_detection", "retail", "month_end_stress", "ml_training"
206    #[serde(default)]
207    pub tags: Vec<String>,
208
209    /// Data quality profile preset.
210    /// - "clean": Minimal data quality issues (0.1% missing, 0.05% typos)
211    /// - "noisy": Moderate issues (5% missing, 2% typos, 1% duplicates)
212    /// - "legacy": Heavy issues simulating legacy system data (10% missing, 5% typos)
213    #[serde(default)]
214    pub profile: Option<String>,
215
216    /// Human-readable description of the scenario purpose.
217    #[serde(default)]
218    pub description: Option<String>,
219
220    /// Whether this run is for ML training (enables balanced labeling).
221    #[serde(default)]
222    pub ml_training: bool,
223
224    /// Target anomaly class balance for ML training.
225    /// If set, anomalies will be injected to achieve this ratio.
226    #[serde(default)]
227    pub target_anomaly_ratio: Option<f64>,
228
229    /// Custom metadata key-value pairs.
230    #[serde(default)]
231    pub metadata: std::collections::HashMap<String, String>,
232}
233
234/// Temporal drift configuration for simulating distribution changes over time.
235///
236/// This enables generation of data that shows realistic temporal evolution,
237/// useful for training drift detection models and testing temporal robustness.
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct TemporalDriftConfig {
240    /// Enable temporal drift simulation.
241    #[serde(default)]
242    pub enabled: bool,
243
244    /// Amount mean drift per period (e.g., 0.02 = 2% mean shift per month).
245    /// Simulates gradual inflation or business growth.
246    #[serde(default = "default_amount_drift")]
247    pub amount_mean_drift: f64,
248
249    /// Amount variance drift per period (e.g., 0.01 = 1% variance increase per month).
250    /// Simulates increasing volatility over time.
251    #[serde(default)]
252    pub amount_variance_drift: f64,
253
254    /// Anomaly rate drift per period (e.g., 0.001 = 0.1% increase per month).
255    /// Simulates increasing fraud attempts or degrading controls.
256    #[serde(default)]
257    pub anomaly_rate_drift: f64,
258
259    /// Concept drift rate - how quickly feature distributions change (0.0-1.0).
260    /// Higher values cause more rapid distribution shifts.
261    #[serde(default = "default_concept_drift")]
262    pub concept_drift_rate: f64,
263
264    /// Sudden drift events - probability of a sudden distribution shift in any period.
265    #[serde(default)]
266    pub sudden_drift_probability: f64,
267
268    /// Magnitude of sudden drift events when they occur (multiplier).
269    #[serde(default = "default_sudden_drift_magnitude")]
270    pub sudden_drift_magnitude: f64,
271
272    /// Seasonal drift - enable cyclic patterns that repeat annually.
273    #[serde(default)]
274    pub seasonal_drift: bool,
275
276    /// Drift start period (0 = from beginning). Use to simulate stable baseline before drift.
277    #[serde(default)]
278    pub drift_start_period: u32,
279
280    /// Drift type: "gradual", "sudden", "recurring", "mixed"
281    #[serde(default = "default_drift_type")]
282    pub drift_type: DriftType,
283}
284
285fn default_amount_drift() -> f64 {
286    0.02
287}
288
289fn default_concept_drift() -> f64 {
290    0.01
291}
292
293fn default_sudden_drift_magnitude() -> f64 {
294    2.0
295}
296
297fn default_drift_type() -> DriftType {
298    DriftType::Gradual
299}
300
301impl Default for TemporalDriftConfig {
302    fn default() -> Self {
303        Self {
304            enabled: false,
305            amount_mean_drift: 0.02,
306            amount_variance_drift: 0.0,
307            anomaly_rate_drift: 0.0,
308            concept_drift_rate: 0.01,
309            sudden_drift_probability: 0.0,
310            sudden_drift_magnitude: 2.0,
311            seasonal_drift: false,
312            drift_start_period: 0,
313            drift_type: DriftType::Gradual,
314        }
315    }
316}
317
318impl TemporalDriftConfig {
319    /// Convert to core DriftConfig for use in generators.
320    pub fn to_core_config(&self) -> datasynth_core::distributions::DriftConfig {
321        datasynth_core::distributions::DriftConfig {
322            enabled: self.enabled,
323            amount_mean_drift: self.amount_mean_drift,
324            amount_variance_drift: self.amount_variance_drift,
325            anomaly_rate_drift: self.anomaly_rate_drift,
326            concept_drift_rate: self.concept_drift_rate,
327            sudden_drift_probability: self.sudden_drift_probability,
328            sudden_drift_magnitude: self.sudden_drift_magnitude,
329            seasonal_drift: self.seasonal_drift,
330            drift_start_period: self.drift_start_period,
331            drift_type: match self.drift_type {
332                DriftType::Gradual => datasynth_core::distributions::DriftType::Gradual,
333                DriftType::Sudden => datasynth_core::distributions::DriftType::Sudden,
334                DriftType::Recurring => datasynth_core::distributions::DriftType::Recurring,
335                DriftType::Mixed => datasynth_core::distributions::DriftType::Mixed,
336            },
337        }
338    }
339}
340
341/// Types of temporal drift patterns.
342#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
343#[serde(rename_all = "snake_case")]
344pub enum DriftType {
345    /// Gradual, continuous drift over time (like inflation).
346    #[default]
347    Gradual,
348    /// Sudden, point-in-time shifts (like policy changes).
349    Sudden,
350    /// Recurring patterns that cycle (like seasonal variations).
351    Recurring,
352    /// Combination of gradual background drift with occasional sudden shifts.
353    Mixed,
354}
355
356/// Global configuration settings.
357#[derive(Debug, Clone, Serialize, Deserialize)]
358pub struct GlobalConfig {
359    /// Random seed for reproducibility
360    pub seed: Option<u64>,
361    /// Industry sector
362    pub industry: IndustrySector,
363    /// Simulation start date (YYYY-MM-DD)
364    pub start_date: String,
365    /// Simulation period in months
366    pub period_months: u32,
367    /// Base currency for group reporting
368    #[serde(default = "default_currency")]
369    pub group_currency: String,
370    /// Enable parallel generation
371    #[serde(default = "default_true")]
372    pub parallel: bool,
373    /// Number of worker threads (0 = auto-detect)
374    #[serde(default)]
375    pub worker_threads: usize,
376    /// Memory limit in MB (0 = unlimited)
377    #[serde(default)]
378    pub memory_limit_mb: usize,
379}
380
381fn default_currency() -> String {
382    "USD".to_string()
383}
384fn default_true() -> bool {
385    true
386}
387
388/// Company code configuration.
389#[derive(Debug, Clone, Serialize, Deserialize)]
390pub struct CompanyConfig {
391    /// Company code identifier
392    pub code: String,
393    /// Company name
394    pub name: String,
395    /// Local currency (ISO 4217)
396    pub currency: String,
397    /// Country code (ISO 3166-1 alpha-2)
398    pub country: String,
399    /// Fiscal year variant
400    #[serde(default = "default_fiscal_variant")]
401    pub fiscal_year_variant: String,
402    /// Transaction volume per year
403    pub annual_transaction_volume: TransactionVolume,
404    /// Company-specific transaction weight
405    #[serde(default = "default_weight")]
406    pub volume_weight: f64,
407}
408
409fn default_fiscal_variant() -> String {
410    "K4".to_string()
411}
412fn default_weight() -> f64 {
413    1.0
414}
415
416/// Transaction volume presets.
417#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
418#[serde(rename_all = "snake_case")]
419pub enum TransactionVolume {
420    /// 10,000 transactions per year
421    TenK,
422    /// 100,000 transactions per year
423    HundredK,
424    /// 1,000,000 transactions per year
425    OneM,
426    /// 10,000,000 transactions per year
427    TenM,
428    /// 100,000,000 transactions per year
429    HundredM,
430    /// Custom count
431    Custom(u64),
432}
433
434impl TransactionVolume {
435    /// Get the transaction count.
436    pub fn count(&self) -> u64 {
437        match self {
438            Self::TenK => 10_000,
439            Self::HundredK => 100_000,
440            Self::OneM => 1_000_000,
441            Self::TenM => 10_000_000,
442            Self::HundredM => 100_000_000,
443            Self::Custom(n) => *n,
444        }
445    }
446}
447
448/// Chart of Accounts configuration.
449#[derive(Debug, Clone, Serialize, Deserialize)]
450pub struct ChartOfAccountsConfig {
451    /// CoA complexity level
452    pub complexity: CoAComplexity,
453    /// Use industry-specific accounts
454    #[serde(default = "default_true")]
455    pub industry_specific: bool,
456    /// Custom account definitions file
457    pub custom_accounts: Option<PathBuf>,
458    /// Minimum hierarchy depth
459    #[serde(default = "default_min_depth")]
460    pub min_hierarchy_depth: u8,
461    /// Maximum hierarchy depth
462    #[serde(default = "default_max_depth")]
463    pub max_hierarchy_depth: u8,
464}
465
466fn default_min_depth() -> u8 {
467    2
468}
469fn default_max_depth() -> u8 {
470    5
471}
472
473impl Default for ChartOfAccountsConfig {
474    fn default() -> Self {
475        Self {
476            complexity: CoAComplexity::Small,
477            industry_specific: true,
478            custom_accounts: None,
479            min_hierarchy_depth: default_min_depth(),
480            max_hierarchy_depth: default_max_depth(),
481        }
482    }
483}
484
485/// Transaction generation configuration.
486#[derive(Debug, Clone, Serialize, Deserialize, Default)]
487pub struct TransactionConfig {
488    /// Line item distribution
489    #[serde(default)]
490    pub line_item_distribution: LineItemDistributionConfig,
491    /// Debit/credit balance distribution
492    #[serde(default)]
493    pub debit_credit_distribution: DebitCreditDistributionConfig,
494    /// Even/odd line count distribution
495    #[serde(default)]
496    pub even_odd_distribution: EvenOddDistributionConfig,
497    /// Transaction source distribution
498    #[serde(default)]
499    pub source_distribution: SourceDistribution,
500    /// Seasonality configuration
501    #[serde(default)]
502    pub seasonality: SeasonalityConfig,
503    /// Amount distribution
504    #[serde(default)]
505    pub amounts: AmountDistributionConfig,
506    /// Benford's Law compliance configuration
507    #[serde(default)]
508    pub benford: BenfordConfig,
509}
510
511/// Benford's Law compliance configuration.
512#[derive(Debug, Clone, Serialize, Deserialize)]
513pub struct BenfordConfig {
514    /// Enable Benford's Law compliance for amount generation
515    #[serde(default = "default_true")]
516    pub enabled: bool,
517    /// Tolerance for deviation from ideal Benford distribution (0.0-1.0)
518    #[serde(default = "default_benford_tolerance")]
519    pub tolerance: f64,
520    /// Transaction sources exempt from Benford's Law (fixed amounts)
521    #[serde(default)]
522    pub exempt_sources: Vec<BenfordExemption>,
523}
524
525fn default_benford_tolerance() -> f64 {
526    0.05
527}
528
529impl Default for BenfordConfig {
530    fn default() -> Self {
531        Self {
532            enabled: true,
533            tolerance: default_benford_tolerance(),
534            exempt_sources: vec![BenfordExemption::Recurring, BenfordExemption::Payroll],
535        }
536    }
537}
538
539/// Types of transactions exempt from Benford's Law.
540#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
541#[serde(rename_all = "snake_case")]
542pub enum BenfordExemption {
543    /// Recurring fixed amounts (rent, subscriptions)
544    Recurring,
545    /// Payroll (standardized salaries)
546    Payroll,
547    /// Fixed fees and charges
548    FixedFees,
549    /// Round number purchases (often legitimate)
550    RoundAmounts,
551}
552
553/// Distribution of transaction sources.
554#[derive(Debug, Clone, Serialize, Deserialize)]
555pub struct SourceDistribution {
556    /// Manual entries percentage
557    pub manual: f64,
558    /// Automated system entries
559    pub automated: f64,
560    /// Recurring entries
561    pub recurring: f64,
562    /// Adjustment entries
563    pub adjustment: f64,
564}
565
566impl Default for SourceDistribution {
567    fn default() -> Self {
568        Self {
569            manual: 0.20,
570            automated: 0.70,
571            recurring: 0.07,
572            adjustment: 0.03,
573        }
574    }
575}
576
577/// Output configuration.
578#[derive(Debug, Clone, Serialize, Deserialize)]
579pub struct OutputConfig {
580    /// Output mode
581    #[serde(default)]
582    pub mode: OutputMode,
583    /// Output directory
584    pub output_directory: PathBuf,
585    /// File formats to generate
586    #[serde(default = "default_formats")]
587    pub formats: Vec<FileFormat>,
588    /// Compression settings
589    #[serde(default)]
590    pub compression: CompressionConfig,
591    /// Batch size for writes
592    #[serde(default = "default_batch_size")]
593    pub batch_size: usize,
594    /// Include ACDOCA format
595    #[serde(default = "default_true")]
596    pub include_acdoca: bool,
597    /// Include BSEG format
598    #[serde(default)]
599    pub include_bseg: bool,
600    /// Partition by fiscal period
601    #[serde(default = "default_true")]
602    pub partition_by_period: bool,
603    /// Partition by company code
604    #[serde(default)]
605    pub partition_by_company: bool,
606}
607
608fn default_formats() -> Vec<FileFormat> {
609    vec![FileFormat::Parquet]
610}
611fn default_batch_size() -> usize {
612    100_000
613}
614
615impl Default for OutputConfig {
616    fn default() -> Self {
617        Self {
618            mode: OutputMode::FlatFile,
619            output_directory: PathBuf::from("./output"),
620            formats: default_formats(),
621            compression: CompressionConfig::default(),
622            batch_size: default_batch_size(),
623            include_acdoca: true,
624            include_bseg: false,
625            partition_by_period: true,
626            partition_by_company: false,
627        }
628    }
629}
630
631/// Output mode.
632#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
633#[serde(rename_all = "snake_case")]
634pub enum OutputMode {
635    /// Stream records as generated
636    Streaming,
637    /// Write to flat files
638    #[default]
639    FlatFile,
640    /// Both streaming and flat file
641    Both,
642}
643
644/// Supported file formats.
645#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
646#[serde(rename_all = "snake_case")]
647pub enum FileFormat {
648    Csv,
649    Parquet,
650    Json,
651    JsonLines,
652}
653
654/// Compression configuration.
655#[derive(Debug, Clone, Serialize, Deserialize)]
656pub struct CompressionConfig {
657    /// Enable compression
658    #[serde(default = "default_true")]
659    pub enabled: bool,
660    /// Compression algorithm
661    #[serde(default)]
662    pub algorithm: CompressionAlgorithm,
663    /// Compression level (1-9)
664    #[serde(default = "default_compression_level")]
665    pub level: u8,
666}
667
668fn default_compression_level() -> u8 {
669    3
670}
671
672impl Default for CompressionConfig {
673    fn default() -> Self {
674        Self {
675            enabled: true,
676            algorithm: CompressionAlgorithm::default(),
677            level: default_compression_level(),
678        }
679    }
680}
681
682/// Compression algorithms.
683#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
684#[serde(rename_all = "snake_case")]
685pub enum CompressionAlgorithm {
686    Gzip,
687    #[default]
688    Zstd,
689    Lz4,
690    Snappy,
691}
692
693/// Fraud simulation configuration.
694#[derive(Debug, Clone, Serialize, Deserialize)]
695pub struct FraudConfig {
696    /// Enable fraud scenario generation
697    #[serde(default)]
698    pub enabled: bool,
699    /// Overall fraud rate (0.0 to 1.0)
700    #[serde(default = "default_fraud_rate")]
701    pub fraud_rate: f64,
702    /// Fraud type distribution
703    #[serde(default)]
704    pub fraud_type_distribution: FraudTypeDistribution,
705    /// Enable fraud clustering
706    #[serde(default)]
707    pub clustering_enabled: bool,
708    /// Clustering factor
709    #[serde(default = "default_clustering_factor")]
710    pub clustering_factor: f64,
711    /// Approval thresholds for threshold-adjacent fraud pattern
712    #[serde(default = "default_approval_thresholds")]
713    pub approval_thresholds: Vec<f64>,
714}
715
716fn default_approval_thresholds() -> Vec<f64> {
717    vec![1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0]
718}
719
720fn default_fraud_rate() -> f64 {
721    0.005
722}
723fn default_clustering_factor() -> f64 {
724    3.0
725}
726
727impl Default for FraudConfig {
728    fn default() -> Self {
729        Self {
730            enabled: false,
731            fraud_rate: default_fraud_rate(),
732            fraud_type_distribution: FraudTypeDistribution::default(),
733            clustering_enabled: false,
734            clustering_factor: default_clustering_factor(),
735            approval_thresholds: default_approval_thresholds(),
736        }
737    }
738}
739
740/// Distribution of fraud types.
741#[derive(Debug, Clone, Serialize, Deserialize)]
742pub struct FraudTypeDistribution {
743    pub suspense_account_abuse: f64,
744    pub fictitious_transaction: f64,
745    pub revenue_manipulation: f64,
746    pub expense_capitalization: f64,
747    pub split_transaction: f64,
748    pub timing_anomaly: f64,
749    pub unauthorized_access: f64,
750    pub duplicate_payment: f64,
751}
752
753impl Default for FraudTypeDistribution {
754    fn default() -> Self {
755        Self {
756            suspense_account_abuse: 0.25,
757            fictitious_transaction: 0.15,
758            revenue_manipulation: 0.10,
759            expense_capitalization: 0.10,
760            split_transaction: 0.15,
761            timing_anomaly: 0.10,
762            unauthorized_access: 0.10,
763            duplicate_payment: 0.05,
764        }
765    }
766}
767
768/// Internal Controls System (ICS) configuration.
769#[derive(Debug, Clone, Serialize, Deserialize)]
770pub struct InternalControlsConfig {
771    /// Enable internal controls system
772    #[serde(default)]
773    pub enabled: bool,
774    /// Rate at which controls result in exceptions (0.0 - 1.0)
775    #[serde(default = "default_exception_rate")]
776    pub exception_rate: f64,
777    /// Rate at which SoD violations occur (0.0 - 1.0)
778    #[serde(default = "default_sod_violation_rate")]
779    pub sod_violation_rate: f64,
780    /// Export control master data to separate files
781    #[serde(default = "default_true")]
782    pub export_control_master_data: bool,
783    /// SOX materiality threshold for marking transactions as SOX-relevant
784    #[serde(default = "default_sox_materiality_threshold")]
785    pub sox_materiality_threshold: f64,
786}
787
788fn default_exception_rate() -> f64 {
789    0.02
790}
791
792fn default_sod_violation_rate() -> f64 {
793    0.01
794}
795
796fn default_sox_materiality_threshold() -> f64 {
797    10000.0
798}
799
800impl Default for InternalControlsConfig {
801    fn default() -> Self {
802        Self {
803            enabled: false,
804            exception_rate: default_exception_rate(),
805            sod_violation_rate: default_sod_violation_rate(),
806            export_control_master_data: true,
807            sox_materiality_threshold: default_sox_materiality_threshold(),
808        }
809    }
810}
811
812/// Business process configuration.
813#[derive(Debug, Clone, Serialize, Deserialize)]
814pub struct BusinessProcessConfig {
815    /// Order-to-Cash weight
816    #[serde(default = "default_o2c")]
817    pub o2c_weight: f64,
818    /// Procure-to-Pay weight
819    #[serde(default = "default_p2p")]
820    pub p2p_weight: f64,
821    /// Record-to-Report weight
822    #[serde(default = "default_r2r")]
823    pub r2r_weight: f64,
824    /// Hire-to-Retire weight
825    #[serde(default = "default_h2r")]
826    pub h2r_weight: f64,
827    /// Acquire-to-Retire weight
828    #[serde(default = "default_a2r")]
829    pub a2r_weight: f64,
830}
831
832fn default_o2c() -> f64 {
833    0.35
834}
835fn default_p2p() -> f64 {
836    0.30
837}
838fn default_r2r() -> f64 {
839    0.20
840}
841fn default_h2r() -> f64 {
842    0.10
843}
844fn default_a2r() -> f64 {
845    0.05
846}
847
848impl Default for BusinessProcessConfig {
849    fn default() -> Self {
850        Self {
851            o2c_weight: default_o2c(),
852            p2p_weight: default_p2p(),
853            r2r_weight: default_r2r(),
854            h2r_weight: default_h2r(),
855            a2r_weight: default_a2r(),
856        }
857    }
858}
859
860/// User persona configuration.
861#[derive(Debug, Clone, Serialize, Deserialize, Default)]
862pub struct UserPersonaConfig {
863    /// Distribution of user personas
864    #[serde(default)]
865    pub persona_distribution: PersonaDistribution,
866    /// Users per persona type
867    #[serde(default)]
868    pub users_per_persona: UsersPerPersona,
869}
870
871/// Distribution of user personas for transaction generation.
872#[derive(Debug, Clone, Serialize, Deserialize)]
873pub struct PersonaDistribution {
874    pub junior_accountant: f64,
875    pub senior_accountant: f64,
876    pub controller: f64,
877    pub manager: f64,
878    pub automated_system: f64,
879}
880
881impl Default for PersonaDistribution {
882    fn default() -> Self {
883        Self {
884            junior_accountant: 0.15,
885            senior_accountant: 0.15,
886            controller: 0.05,
887            manager: 0.05,
888            automated_system: 0.60,
889        }
890    }
891}
892
893/// Number of users per persona type.
894#[derive(Debug, Clone, Serialize, Deserialize)]
895pub struct UsersPerPersona {
896    pub junior_accountant: usize,
897    pub senior_accountant: usize,
898    pub controller: usize,
899    pub manager: usize,
900    pub automated_system: usize,
901}
902
903impl Default for UsersPerPersona {
904    fn default() -> Self {
905        Self {
906            junior_accountant: 10,
907            senior_accountant: 5,
908            controller: 2,
909            manager: 3,
910            automated_system: 20,
911        }
912    }
913}
914
915/// Template configuration for realistic data generation.
916#[derive(Debug, Clone, Serialize, Deserialize, Default)]
917pub struct TemplateConfig {
918    /// Name generation settings
919    #[serde(default)]
920    pub names: NameTemplateConfig,
921    /// Description generation settings
922    #[serde(default)]
923    pub descriptions: DescriptionTemplateConfig,
924    /// Reference number settings
925    #[serde(default)]
926    pub references: ReferenceTemplateConfig,
927}
928
929/// Name template configuration.
930#[derive(Debug, Clone, Serialize, Deserialize)]
931pub struct NameTemplateConfig {
932    /// Distribution of name cultures
933    #[serde(default)]
934    pub culture_distribution: CultureDistribution,
935    /// Email domain for generated users
936    #[serde(default = "default_email_domain")]
937    pub email_domain: String,
938    /// Generate realistic display names
939    #[serde(default = "default_true")]
940    pub generate_realistic_names: bool,
941}
942
943fn default_email_domain() -> String {
944    "company.com".to_string()
945}
946
947impl Default for NameTemplateConfig {
948    fn default() -> Self {
949        Self {
950            culture_distribution: CultureDistribution::default(),
951            email_domain: default_email_domain(),
952            generate_realistic_names: true,
953        }
954    }
955}
956
957/// Distribution of name cultures for generation.
958#[derive(Debug, Clone, Serialize, Deserialize)]
959pub struct CultureDistribution {
960    pub western_us: f64,
961    pub hispanic: f64,
962    pub german: f64,
963    pub french: f64,
964    pub chinese: f64,
965    pub japanese: f64,
966    pub indian: f64,
967}
968
969impl Default for CultureDistribution {
970    fn default() -> Self {
971        Self {
972            western_us: 0.40,
973            hispanic: 0.20,
974            german: 0.10,
975            french: 0.05,
976            chinese: 0.10,
977            japanese: 0.05,
978            indian: 0.10,
979        }
980    }
981}
982
983/// Description template configuration.
984#[derive(Debug, Clone, Serialize, Deserialize)]
985pub struct DescriptionTemplateConfig {
986    /// Generate header text for journal entries
987    #[serde(default = "default_true")]
988    pub generate_header_text: bool,
989    /// Generate line text for journal entry lines
990    #[serde(default = "default_true")]
991    pub generate_line_text: bool,
992}
993
994impl Default for DescriptionTemplateConfig {
995    fn default() -> Self {
996        Self {
997            generate_header_text: true,
998            generate_line_text: true,
999        }
1000    }
1001}
1002
1003/// Reference number template configuration.
1004#[derive(Debug, Clone, Serialize, Deserialize)]
1005pub struct ReferenceTemplateConfig {
1006    /// Generate reference numbers
1007    #[serde(default = "default_true")]
1008    pub generate_references: bool,
1009    /// Invoice prefix
1010    #[serde(default = "default_invoice_prefix")]
1011    pub invoice_prefix: String,
1012    /// Purchase order prefix
1013    #[serde(default = "default_po_prefix")]
1014    pub po_prefix: String,
1015    /// Sales order prefix
1016    #[serde(default = "default_so_prefix")]
1017    pub so_prefix: String,
1018}
1019
1020fn default_invoice_prefix() -> String {
1021    "INV".to_string()
1022}
1023fn default_po_prefix() -> String {
1024    "PO".to_string()
1025}
1026fn default_so_prefix() -> String {
1027    "SO".to_string()
1028}
1029
1030impl Default for ReferenceTemplateConfig {
1031    fn default() -> Self {
1032        Self {
1033            generate_references: true,
1034            invoice_prefix: default_invoice_prefix(),
1035            po_prefix: default_po_prefix(),
1036            so_prefix: default_so_prefix(),
1037        }
1038    }
1039}
1040
1041/// Approval workflow configuration.
1042#[derive(Debug, Clone, Serialize, Deserialize)]
1043pub struct ApprovalConfig {
1044    /// Enable approval workflow generation
1045    #[serde(default)]
1046    pub enabled: bool,
1047    /// Threshold below which transactions are auto-approved
1048    #[serde(default = "default_auto_approve_threshold")]
1049    pub auto_approve_threshold: f64,
1050    /// Rate at which approvals are rejected (0.0 to 1.0)
1051    #[serde(default = "default_rejection_rate")]
1052    pub rejection_rate: f64,
1053    /// Rate at which approvals require revision (0.0 to 1.0)
1054    #[serde(default = "default_revision_rate")]
1055    pub revision_rate: f64,
1056    /// Average delay in hours for approval processing
1057    #[serde(default = "default_approval_delay_hours")]
1058    pub average_approval_delay_hours: f64,
1059    /// Approval chain thresholds
1060    #[serde(default)]
1061    pub thresholds: Vec<ApprovalThresholdConfig>,
1062}
1063
1064fn default_auto_approve_threshold() -> f64 {
1065    1000.0
1066}
1067fn default_rejection_rate() -> f64 {
1068    0.02
1069}
1070fn default_revision_rate() -> f64 {
1071    0.05
1072}
1073fn default_approval_delay_hours() -> f64 {
1074    4.0
1075}
1076
1077impl Default for ApprovalConfig {
1078    fn default() -> Self {
1079        Self {
1080            enabled: false,
1081            auto_approve_threshold: default_auto_approve_threshold(),
1082            rejection_rate: default_rejection_rate(),
1083            revision_rate: default_revision_rate(),
1084            average_approval_delay_hours: default_approval_delay_hours(),
1085            thresholds: vec![
1086                ApprovalThresholdConfig {
1087                    amount: 1000.0,
1088                    level: 1,
1089                    roles: vec!["senior_accountant".to_string()],
1090                },
1091                ApprovalThresholdConfig {
1092                    amount: 10000.0,
1093                    level: 2,
1094                    roles: vec!["senior_accountant".to_string(), "controller".to_string()],
1095                },
1096                ApprovalThresholdConfig {
1097                    amount: 100000.0,
1098                    level: 3,
1099                    roles: vec![
1100                        "senior_accountant".to_string(),
1101                        "controller".to_string(),
1102                        "manager".to_string(),
1103                    ],
1104                },
1105                ApprovalThresholdConfig {
1106                    amount: 500000.0,
1107                    level: 4,
1108                    roles: vec![
1109                        "senior_accountant".to_string(),
1110                        "controller".to_string(),
1111                        "manager".to_string(),
1112                        "executive".to_string(),
1113                    ],
1114                },
1115            ],
1116        }
1117    }
1118}
1119
1120/// Configuration for a single approval threshold.
1121#[derive(Debug, Clone, Serialize, Deserialize)]
1122pub struct ApprovalThresholdConfig {
1123    /// Amount threshold
1124    pub amount: f64,
1125    /// Approval level required
1126    pub level: u8,
1127    /// Roles that can approve at this level
1128    pub roles: Vec<String>,
1129}
1130
1131/// Department configuration.
1132#[derive(Debug, Clone, Serialize, Deserialize)]
1133pub struct DepartmentConfig {
1134    /// Enable department assignment
1135    #[serde(default)]
1136    pub enabled: bool,
1137    /// Multiplier for department headcounts
1138    #[serde(default = "default_headcount_multiplier")]
1139    pub headcount_multiplier: f64,
1140    /// Custom department definitions (optional)
1141    #[serde(default)]
1142    pub custom_departments: Vec<CustomDepartmentConfig>,
1143}
1144
1145fn default_headcount_multiplier() -> f64 {
1146    1.0
1147}
1148
1149impl Default for DepartmentConfig {
1150    fn default() -> Self {
1151        Self {
1152            enabled: false,
1153            headcount_multiplier: default_headcount_multiplier(),
1154            custom_departments: Vec::new(),
1155        }
1156    }
1157}
1158
1159/// Custom department definition.
1160#[derive(Debug, Clone, Serialize, Deserialize)]
1161pub struct CustomDepartmentConfig {
1162    /// Department code
1163    pub code: String,
1164    /// Department name
1165    pub name: String,
1166    /// Associated cost center
1167    #[serde(default)]
1168    pub cost_center: Option<String>,
1169    /// Primary business processes
1170    #[serde(default)]
1171    pub primary_processes: Vec<String>,
1172    /// Parent department code
1173    #[serde(default)]
1174    pub parent_code: Option<String>,
1175}
1176
1177// ============================================================================
1178// Master Data Configuration
1179// ============================================================================
1180
1181/// Master data generation configuration.
1182#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1183pub struct MasterDataConfig {
1184    /// Vendor master data settings
1185    #[serde(default)]
1186    pub vendors: VendorMasterConfig,
1187    /// Customer master data settings
1188    #[serde(default)]
1189    pub customers: CustomerMasterConfig,
1190    /// Material master data settings
1191    #[serde(default)]
1192    pub materials: MaterialMasterConfig,
1193    /// Fixed asset master data settings
1194    #[serde(default)]
1195    pub fixed_assets: FixedAssetMasterConfig,
1196    /// Employee master data settings
1197    #[serde(default)]
1198    pub employees: EmployeeMasterConfig,
1199    /// Cost center master data settings
1200    #[serde(default)]
1201    pub cost_centers: CostCenterMasterConfig,
1202}
1203
1204/// Vendor master data configuration.
1205#[derive(Debug, Clone, Serialize, Deserialize)]
1206pub struct VendorMasterConfig {
1207    /// Number of vendors to generate
1208    #[serde(default = "default_vendor_count")]
1209    pub count: usize,
1210    /// Percentage of vendors that are intercompany (0.0 to 1.0)
1211    #[serde(default = "default_intercompany_percent")]
1212    pub intercompany_percent: f64,
1213    /// Payment terms distribution
1214    #[serde(default)]
1215    pub payment_terms_distribution: PaymentTermsDistribution,
1216    /// Vendor behavior distribution
1217    #[serde(default)]
1218    pub behavior_distribution: VendorBehaviorDistribution,
1219    /// Generate bank account details
1220    #[serde(default = "default_true")]
1221    pub generate_bank_accounts: bool,
1222    /// Generate tax IDs
1223    #[serde(default = "default_true")]
1224    pub generate_tax_ids: bool,
1225}
1226
1227fn default_vendor_count() -> usize {
1228    500
1229}
1230
1231fn default_intercompany_percent() -> f64 {
1232    0.05
1233}
1234
1235impl Default for VendorMasterConfig {
1236    fn default() -> Self {
1237        Self {
1238            count: default_vendor_count(),
1239            intercompany_percent: default_intercompany_percent(),
1240            payment_terms_distribution: PaymentTermsDistribution::default(),
1241            behavior_distribution: VendorBehaviorDistribution::default(),
1242            generate_bank_accounts: true,
1243            generate_tax_ids: true,
1244        }
1245    }
1246}
1247
1248/// Payment terms distribution for vendors.
1249#[derive(Debug, Clone, Serialize, Deserialize)]
1250pub struct PaymentTermsDistribution {
1251    /// Net 30 days
1252    pub net_30: f64,
1253    /// Net 60 days
1254    pub net_60: f64,
1255    /// Net 90 days
1256    pub net_90: f64,
1257    /// 2% 10 Net 30 (early payment discount)
1258    pub two_ten_net_30: f64,
1259    /// Due on receipt
1260    pub due_on_receipt: f64,
1261    /// End of month
1262    pub end_of_month: f64,
1263}
1264
1265impl Default for PaymentTermsDistribution {
1266    fn default() -> Self {
1267        Self {
1268            net_30: 0.40,
1269            net_60: 0.20,
1270            net_90: 0.10,
1271            two_ten_net_30: 0.15,
1272            due_on_receipt: 0.05,
1273            end_of_month: 0.10,
1274        }
1275    }
1276}
1277
1278/// Vendor behavior distribution.
1279#[derive(Debug, Clone, Serialize, Deserialize)]
1280pub struct VendorBehaviorDistribution {
1281    /// Reliable vendors (consistent delivery, quality)
1282    pub reliable: f64,
1283    /// Sometimes late vendors
1284    pub sometimes_late: f64,
1285    /// Inconsistent quality vendors
1286    pub inconsistent_quality: f64,
1287    /// Premium vendors (high quality, premium pricing)
1288    pub premium: f64,
1289    /// Budget vendors (lower quality, lower pricing)
1290    pub budget: f64,
1291}
1292
1293impl Default for VendorBehaviorDistribution {
1294    fn default() -> Self {
1295        Self {
1296            reliable: 0.50,
1297            sometimes_late: 0.20,
1298            inconsistent_quality: 0.10,
1299            premium: 0.10,
1300            budget: 0.10,
1301        }
1302    }
1303}
1304
1305/// Customer master data configuration.
1306#[derive(Debug, Clone, Serialize, Deserialize)]
1307pub struct CustomerMasterConfig {
1308    /// Number of customers to generate
1309    #[serde(default = "default_customer_count")]
1310    pub count: usize,
1311    /// Percentage of customers that are intercompany (0.0 to 1.0)
1312    #[serde(default = "default_intercompany_percent")]
1313    pub intercompany_percent: f64,
1314    /// Credit rating distribution
1315    #[serde(default)]
1316    pub credit_rating_distribution: CreditRatingDistribution,
1317    /// Payment behavior distribution
1318    #[serde(default)]
1319    pub payment_behavior_distribution: PaymentBehaviorDistribution,
1320    /// Generate credit limits based on rating
1321    #[serde(default = "default_true")]
1322    pub generate_credit_limits: bool,
1323}
1324
1325fn default_customer_count() -> usize {
1326    2000
1327}
1328
1329impl Default for CustomerMasterConfig {
1330    fn default() -> Self {
1331        Self {
1332            count: default_customer_count(),
1333            intercompany_percent: default_intercompany_percent(),
1334            credit_rating_distribution: CreditRatingDistribution::default(),
1335            payment_behavior_distribution: PaymentBehaviorDistribution::default(),
1336            generate_credit_limits: true,
1337        }
1338    }
1339}
1340
1341/// Credit rating distribution for customers.
1342#[derive(Debug, Clone, Serialize, Deserialize)]
1343pub struct CreditRatingDistribution {
1344    /// AAA rating
1345    pub aaa: f64,
1346    /// AA rating
1347    pub aa: f64,
1348    /// A rating
1349    pub a: f64,
1350    /// BBB rating
1351    pub bbb: f64,
1352    /// BB rating
1353    pub bb: f64,
1354    /// B rating
1355    pub b: f64,
1356    /// Below B rating
1357    pub below_b: f64,
1358}
1359
1360impl Default for CreditRatingDistribution {
1361    fn default() -> Self {
1362        Self {
1363            aaa: 0.05,
1364            aa: 0.10,
1365            a: 0.20,
1366            bbb: 0.30,
1367            bb: 0.20,
1368            b: 0.10,
1369            below_b: 0.05,
1370        }
1371    }
1372}
1373
1374/// Payment behavior distribution for customers.
1375#[derive(Debug, Clone, Serialize, Deserialize)]
1376pub struct PaymentBehaviorDistribution {
1377    /// Always pays early
1378    pub early_payer: f64,
1379    /// Pays on time
1380    pub on_time: f64,
1381    /// Occasionally late
1382    pub occasional_late: f64,
1383    /// Frequently late
1384    pub frequent_late: f64,
1385    /// Takes early payment discounts
1386    pub discount_taker: f64,
1387}
1388
1389impl Default for PaymentBehaviorDistribution {
1390    fn default() -> Self {
1391        Self {
1392            early_payer: 0.10,
1393            on_time: 0.50,
1394            occasional_late: 0.25,
1395            frequent_late: 0.10,
1396            discount_taker: 0.05,
1397        }
1398    }
1399}
1400
1401/// Material master data configuration.
1402#[derive(Debug, Clone, Serialize, Deserialize)]
1403pub struct MaterialMasterConfig {
1404    /// Number of materials to generate
1405    #[serde(default = "default_material_count")]
1406    pub count: usize,
1407    /// Material type distribution
1408    #[serde(default)]
1409    pub type_distribution: MaterialTypeDistribution,
1410    /// Valuation method distribution
1411    #[serde(default)]
1412    pub valuation_distribution: ValuationMethodDistribution,
1413    /// Percentage of materials with BOM (bill of materials)
1414    #[serde(default = "default_bom_percent")]
1415    pub bom_percent: f64,
1416    /// Maximum BOM depth
1417    #[serde(default = "default_max_bom_depth")]
1418    pub max_bom_depth: u8,
1419}
1420
1421fn default_material_count() -> usize {
1422    5000
1423}
1424
1425fn default_bom_percent() -> f64 {
1426    0.20
1427}
1428
1429fn default_max_bom_depth() -> u8 {
1430    3
1431}
1432
1433impl Default for MaterialMasterConfig {
1434    fn default() -> Self {
1435        Self {
1436            count: default_material_count(),
1437            type_distribution: MaterialTypeDistribution::default(),
1438            valuation_distribution: ValuationMethodDistribution::default(),
1439            bom_percent: default_bom_percent(),
1440            max_bom_depth: default_max_bom_depth(),
1441        }
1442    }
1443}
1444
1445/// Material type distribution.
1446#[derive(Debug, Clone, Serialize, Deserialize)]
1447pub struct MaterialTypeDistribution {
1448    /// Raw materials
1449    pub raw_material: f64,
1450    /// Semi-finished goods
1451    pub semi_finished: f64,
1452    /// Finished goods
1453    pub finished_good: f64,
1454    /// Trading goods (purchased for resale)
1455    pub trading_good: f64,
1456    /// Operating supplies
1457    pub operating_supply: f64,
1458    /// Services
1459    pub service: f64,
1460}
1461
1462impl Default for MaterialTypeDistribution {
1463    fn default() -> Self {
1464        Self {
1465            raw_material: 0.30,
1466            semi_finished: 0.15,
1467            finished_good: 0.25,
1468            trading_good: 0.15,
1469            operating_supply: 0.10,
1470            service: 0.05,
1471        }
1472    }
1473}
1474
1475/// Valuation method distribution for materials.
1476#[derive(Debug, Clone, Serialize, Deserialize)]
1477pub struct ValuationMethodDistribution {
1478    /// Standard cost
1479    pub standard_cost: f64,
1480    /// Moving average
1481    pub moving_average: f64,
1482    /// FIFO (First In, First Out)
1483    pub fifo: f64,
1484    /// LIFO (Last In, First Out)
1485    pub lifo: f64,
1486}
1487
1488impl Default for ValuationMethodDistribution {
1489    fn default() -> Self {
1490        Self {
1491            standard_cost: 0.50,
1492            moving_average: 0.30,
1493            fifo: 0.15,
1494            lifo: 0.05,
1495        }
1496    }
1497}
1498
1499/// Fixed asset master data configuration.
1500#[derive(Debug, Clone, Serialize, Deserialize)]
1501pub struct FixedAssetMasterConfig {
1502    /// Number of fixed assets to generate
1503    #[serde(default = "default_asset_count")]
1504    pub count: usize,
1505    /// Asset class distribution
1506    #[serde(default)]
1507    pub class_distribution: AssetClassDistribution,
1508    /// Depreciation method distribution
1509    #[serde(default)]
1510    pub depreciation_distribution: DepreciationMethodDistribution,
1511    /// Percentage of assets that are fully depreciated
1512    #[serde(default = "default_fully_depreciated_percent")]
1513    pub fully_depreciated_percent: f64,
1514    /// Generate acquisition history
1515    #[serde(default = "default_true")]
1516    pub generate_acquisition_history: bool,
1517}
1518
1519fn default_asset_count() -> usize {
1520    800
1521}
1522
1523fn default_fully_depreciated_percent() -> f64 {
1524    0.15
1525}
1526
1527impl Default for FixedAssetMasterConfig {
1528    fn default() -> Self {
1529        Self {
1530            count: default_asset_count(),
1531            class_distribution: AssetClassDistribution::default(),
1532            depreciation_distribution: DepreciationMethodDistribution::default(),
1533            fully_depreciated_percent: default_fully_depreciated_percent(),
1534            generate_acquisition_history: true,
1535        }
1536    }
1537}
1538
1539/// Asset class distribution.
1540#[derive(Debug, Clone, Serialize, Deserialize)]
1541pub struct AssetClassDistribution {
1542    /// Buildings and structures
1543    pub buildings: f64,
1544    /// Machinery and equipment
1545    pub machinery: f64,
1546    /// Vehicles
1547    pub vehicles: f64,
1548    /// IT equipment
1549    pub it_equipment: f64,
1550    /// Furniture and fixtures
1551    pub furniture: f64,
1552    /// Land (non-depreciable)
1553    pub land: f64,
1554    /// Leasehold improvements
1555    pub leasehold: f64,
1556}
1557
1558impl Default for AssetClassDistribution {
1559    fn default() -> Self {
1560        Self {
1561            buildings: 0.15,
1562            machinery: 0.30,
1563            vehicles: 0.15,
1564            it_equipment: 0.20,
1565            furniture: 0.10,
1566            land: 0.05,
1567            leasehold: 0.05,
1568        }
1569    }
1570}
1571
1572/// Depreciation method distribution.
1573#[derive(Debug, Clone, Serialize, Deserialize)]
1574pub struct DepreciationMethodDistribution {
1575    /// Straight line
1576    pub straight_line: f64,
1577    /// Declining balance
1578    pub declining_balance: f64,
1579    /// Double declining balance
1580    pub double_declining: f64,
1581    /// Sum of years' digits
1582    pub sum_of_years: f64,
1583    /// Units of production
1584    pub units_of_production: f64,
1585}
1586
1587impl Default for DepreciationMethodDistribution {
1588    fn default() -> Self {
1589        Self {
1590            straight_line: 0.60,
1591            declining_balance: 0.20,
1592            double_declining: 0.10,
1593            sum_of_years: 0.05,
1594            units_of_production: 0.05,
1595        }
1596    }
1597}
1598
1599/// Employee master data configuration.
1600#[derive(Debug, Clone, Serialize, Deserialize)]
1601pub struct EmployeeMasterConfig {
1602    /// Number of employees to generate
1603    #[serde(default = "default_employee_count")]
1604    pub count: usize,
1605    /// Generate organizational hierarchy
1606    #[serde(default = "default_true")]
1607    pub generate_hierarchy: bool,
1608    /// Maximum hierarchy depth
1609    #[serde(default = "default_hierarchy_depth")]
1610    pub max_hierarchy_depth: u8,
1611    /// Average span of control (direct reports per manager)
1612    #[serde(default = "default_span_of_control")]
1613    pub average_span_of_control: f64,
1614    /// Approval limit distribution by job level
1615    #[serde(default)]
1616    pub approval_limits: ApprovalLimitDistribution,
1617    /// Department distribution
1618    #[serde(default)]
1619    pub department_distribution: EmployeeDepartmentDistribution,
1620}
1621
1622fn default_employee_count() -> usize {
1623    1500
1624}
1625
1626fn default_hierarchy_depth() -> u8 {
1627    6
1628}
1629
1630fn default_span_of_control() -> f64 {
1631    5.0
1632}
1633
1634impl Default for EmployeeMasterConfig {
1635    fn default() -> Self {
1636        Self {
1637            count: default_employee_count(),
1638            generate_hierarchy: true,
1639            max_hierarchy_depth: default_hierarchy_depth(),
1640            average_span_of_control: default_span_of_control(),
1641            approval_limits: ApprovalLimitDistribution::default(),
1642            department_distribution: EmployeeDepartmentDistribution::default(),
1643        }
1644    }
1645}
1646
1647/// Approval limit distribution by job level.
1648#[derive(Debug, Clone, Serialize, Deserialize)]
1649pub struct ApprovalLimitDistribution {
1650    /// Staff level approval limit
1651    #[serde(default = "default_staff_limit")]
1652    pub staff: f64,
1653    /// Senior staff approval limit
1654    #[serde(default = "default_senior_limit")]
1655    pub senior: f64,
1656    /// Manager approval limit
1657    #[serde(default = "default_manager_limit")]
1658    pub manager: f64,
1659    /// Director approval limit
1660    #[serde(default = "default_director_limit")]
1661    pub director: f64,
1662    /// VP approval limit
1663    #[serde(default = "default_vp_limit")]
1664    pub vp: f64,
1665    /// Executive approval limit
1666    #[serde(default = "default_executive_limit")]
1667    pub executive: f64,
1668}
1669
1670fn default_staff_limit() -> f64 {
1671    1000.0
1672}
1673fn default_senior_limit() -> f64 {
1674    5000.0
1675}
1676fn default_manager_limit() -> f64 {
1677    25000.0
1678}
1679fn default_director_limit() -> f64 {
1680    100000.0
1681}
1682fn default_vp_limit() -> f64 {
1683    500000.0
1684}
1685fn default_executive_limit() -> f64 {
1686    f64::INFINITY
1687}
1688
1689impl Default for ApprovalLimitDistribution {
1690    fn default() -> Self {
1691        Self {
1692            staff: default_staff_limit(),
1693            senior: default_senior_limit(),
1694            manager: default_manager_limit(),
1695            director: default_director_limit(),
1696            vp: default_vp_limit(),
1697            executive: default_executive_limit(),
1698        }
1699    }
1700}
1701
1702/// Employee distribution across departments.
1703#[derive(Debug, Clone, Serialize, Deserialize)]
1704pub struct EmployeeDepartmentDistribution {
1705    /// Finance and Accounting
1706    pub finance: f64,
1707    /// Procurement
1708    pub procurement: f64,
1709    /// Sales
1710    pub sales: f64,
1711    /// Warehouse and Logistics
1712    pub warehouse: f64,
1713    /// IT
1714    pub it: f64,
1715    /// Human Resources
1716    pub hr: f64,
1717    /// Operations
1718    pub operations: f64,
1719    /// Executive
1720    pub executive: f64,
1721}
1722
1723impl Default for EmployeeDepartmentDistribution {
1724    fn default() -> Self {
1725        Self {
1726            finance: 0.12,
1727            procurement: 0.10,
1728            sales: 0.25,
1729            warehouse: 0.15,
1730            it: 0.10,
1731            hr: 0.05,
1732            operations: 0.20,
1733            executive: 0.03,
1734        }
1735    }
1736}
1737
1738/// Cost center master data configuration.
1739#[derive(Debug, Clone, Serialize, Deserialize)]
1740pub struct CostCenterMasterConfig {
1741    /// Number of cost centers to generate
1742    #[serde(default = "default_cost_center_count")]
1743    pub count: usize,
1744    /// Generate cost center hierarchy
1745    #[serde(default = "default_true")]
1746    pub generate_hierarchy: bool,
1747    /// Maximum hierarchy depth
1748    #[serde(default = "default_cc_hierarchy_depth")]
1749    pub max_hierarchy_depth: u8,
1750}
1751
1752fn default_cost_center_count() -> usize {
1753    50
1754}
1755
1756fn default_cc_hierarchy_depth() -> u8 {
1757    3
1758}
1759
1760impl Default for CostCenterMasterConfig {
1761    fn default() -> Self {
1762        Self {
1763            count: default_cost_center_count(),
1764            generate_hierarchy: true,
1765            max_hierarchy_depth: default_cc_hierarchy_depth(),
1766        }
1767    }
1768}
1769
1770// ============================================================================
1771// Document Flow Configuration
1772// ============================================================================
1773
1774/// Document flow generation configuration.
1775#[derive(Debug, Clone, Serialize, Deserialize)]
1776pub struct DocumentFlowConfig {
1777    /// P2P (Procure-to-Pay) flow configuration
1778    #[serde(default)]
1779    pub p2p: P2PFlowConfig,
1780    /// O2C (Order-to-Cash) flow configuration
1781    #[serde(default)]
1782    pub o2c: O2CFlowConfig,
1783    /// Generate document reference chains
1784    #[serde(default = "default_true")]
1785    pub generate_document_references: bool,
1786    /// Export document flow graph
1787    #[serde(default)]
1788    pub export_flow_graph: bool,
1789}
1790
1791impl Default for DocumentFlowConfig {
1792    fn default() -> Self {
1793        Self {
1794            p2p: P2PFlowConfig::default(),
1795            o2c: O2CFlowConfig::default(),
1796            generate_document_references: true,
1797            export_flow_graph: false,
1798        }
1799    }
1800}
1801
1802/// P2P (Procure-to-Pay) flow configuration.
1803#[derive(Debug, Clone, Serialize, Deserialize)]
1804pub struct P2PFlowConfig {
1805    /// Enable P2P document flow generation
1806    #[serde(default = "default_true")]
1807    pub enabled: bool,
1808    /// Three-way match success rate (PO-GR-Invoice)
1809    #[serde(default = "default_three_way_match_rate")]
1810    pub three_way_match_rate: f64,
1811    /// Rate of partial deliveries
1812    #[serde(default = "default_partial_delivery_rate")]
1813    pub partial_delivery_rate: f64,
1814    /// Rate of price variances between PO and Invoice
1815    #[serde(default = "default_price_variance_rate")]
1816    pub price_variance_rate: f64,
1817    /// Maximum price variance percentage
1818    #[serde(default = "default_max_price_variance")]
1819    pub max_price_variance_percent: f64,
1820    /// Rate of quantity variances between PO/GR and Invoice
1821    #[serde(default = "default_quantity_variance_rate")]
1822    pub quantity_variance_rate: f64,
1823    /// Average days from PO to goods receipt
1824    #[serde(default = "default_po_to_gr_days")]
1825    pub average_po_to_gr_days: u32,
1826    /// Average days from GR to invoice
1827    #[serde(default = "default_gr_to_invoice_days")]
1828    pub average_gr_to_invoice_days: u32,
1829    /// Average days from invoice to payment
1830    #[serde(default = "default_invoice_to_payment_days")]
1831    pub average_invoice_to_payment_days: u32,
1832    /// PO line count distribution
1833    #[serde(default)]
1834    pub line_count_distribution: DocumentLineCountDistribution,
1835    /// Payment behavior configuration
1836    #[serde(default)]
1837    pub payment_behavior: P2PPaymentBehaviorConfig,
1838}
1839
1840fn default_three_way_match_rate() -> f64 {
1841    0.95
1842}
1843
1844fn default_partial_delivery_rate() -> f64 {
1845    0.15
1846}
1847
1848fn default_price_variance_rate() -> f64 {
1849    0.08
1850}
1851
1852fn default_max_price_variance() -> f64 {
1853    0.05
1854}
1855
1856fn default_quantity_variance_rate() -> f64 {
1857    0.05
1858}
1859
1860fn default_po_to_gr_days() -> u32 {
1861    14
1862}
1863
1864fn default_gr_to_invoice_days() -> u32 {
1865    5
1866}
1867
1868fn default_invoice_to_payment_days() -> u32 {
1869    30
1870}
1871
1872impl Default for P2PFlowConfig {
1873    fn default() -> Self {
1874        Self {
1875            enabled: true,
1876            three_way_match_rate: default_three_way_match_rate(),
1877            partial_delivery_rate: default_partial_delivery_rate(),
1878            price_variance_rate: default_price_variance_rate(),
1879            max_price_variance_percent: default_max_price_variance(),
1880            quantity_variance_rate: default_quantity_variance_rate(),
1881            average_po_to_gr_days: default_po_to_gr_days(),
1882            average_gr_to_invoice_days: default_gr_to_invoice_days(),
1883            average_invoice_to_payment_days: default_invoice_to_payment_days(),
1884            line_count_distribution: DocumentLineCountDistribution::default(),
1885            payment_behavior: P2PPaymentBehaviorConfig::default(),
1886        }
1887    }
1888}
1889
1890// ============================================================================
1891// P2P Payment Behavior Configuration
1892// ============================================================================
1893
1894/// P2P payment behavior configuration.
1895#[derive(Debug, Clone, Serialize, Deserialize)]
1896pub struct P2PPaymentBehaviorConfig {
1897    /// Rate of late payments (beyond due date)
1898    #[serde(default = "default_p2p_late_payment_rate")]
1899    pub late_payment_rate: f64,
1900    /// Distribution of late payment days
1901    #[serde(default)]
1902    pub late_payment_days_distribution: LatePaymentDaysDistribution,
1903    /// Rate of partial payments
1904    #[serde(default = "default_p2p_partial_payment_rate")]
1905    pub partial_payment_rate: f64,
1906    /// Rate of payment corrections (NSF, chargebacks, reversals)
1907    #[serde(default = "default_p2p_payment_correction_rate")]
1908    pub payment_correction_rate: f64,
1909}
1910
1911fn default_p2p_late_payment_rate() -> f64 {
1912    0.15
1913}
1914
1915fn default_p2p_partial_payment_rate() -> f64 {
1916    0.05
1917}
1918
1919fn default_p2p_payment_correction_rate() -> f64 {
1920    0.02
1921}
1922
1923impl Default for P2PPaymentBehaviorConfig {
1924    fn default() -> Self {
1925        Self {
1926            late_payment_rate: default_p2p_late_payment_rate(),
1927            late_payment_days_distribution: LatePaymentDaysDistribution::default(),
1928            partial_payment_rate: default_p2p_partial_payment_rate(),
1929            payment_correction_rate: default_p2p_payment_correction_rate(),
1930        }
1931    }
1932}
1933
1934/// Distribution of late payment days for P2P.
1935#[derive(Debug, Clone, Serialize, Deserialize)]
1936pub struct LatePaymentDaysDistribution {
1937    /// 1-7 days late (slightly late)
1938    #[serde(default = "default_slightly_late")]
1939    pub slightly_late_1_to_7: f64,
1940    /// 8-14 days late
1941    #[serde(default = "default_late_8_14")]
1942    pub late_8_to_14: f64,
1943    /// 15-30 days late (very late)
1944    #[serde(default = "default_very_late")]
1945    pub very_late_15_to_30: f64,
1946    /// 31-60 days late (severely late)
1947    #[serde(default = "default_severely_late")]
1948    pub severely_late_31_to_60: f64,
1949    /// Over 60 days late (extremely late)
1950    #[serde(default = "default_extremely_late")]
1951    pub extremely_late_over_60: f64,
1952}
1953
1954fn default_slightly_late() -> f64 {
1955    0.50
1956}
1957
1958fn default_late_8_14() -> f64 {
1959    0.25
1960}
1961
1962fn default_very_late() -> f64 {
1963    0.15
1964}
1965
1966fn default_severely_late() -> f64 {
1967    0.07
1968}
1969
1970fn default_extremely_late() -> f64 {
1971    0.03
1972}
1973
1974impl Default for LatePaymentDaysDistribution {
1975    fn default() -> Self {
1976        Self {
1977            slightly_late_1_to_7: default_slightly_late(),
1978            late_8_to_14: default_late_8_14(),
1979            very_late_15_to_30: default_very_late(),
1980            severely_late_31_to_60: default_severely_late(),
1981            extremely_late_over_60: default_extremely_late(),
1982        }
1983    }
1984}
1985
1986/// O2C (Order-to-Cash) flow configuration.
1987#[derive(Debug, Clone, Serialize, Deserialize)]
1988pub struct O2CFlowConfig {
1989    /// Enable O2C document flow generation
1990    #[serde(default = "default_true")]
1991    pub enabled: bool,
1992    /// Credit check failure rate
1993    #[serde(default = "default_credit_check_failure_rate")]
1994    pub credit_check_failure_rate: f64,
1995    /// Rate of partial shipments
1996    #[serde(default = "default_partial_shipment_rate")]
1997    pub partial_shipment_rate: f64,
1998    /// Rate of returns
1999    #[serde(default = "default_return_rate")]
2000    pub return_rate: f64,
2001    /// Bad debt write-off rate
2002    #[serde(default = "default_bad_debt_rate")]
2003    pub bad_debt_rate: f64,
2004    /// Average days from SO to delivery
2005    #[serde(default = "default_so_to_delivery_days")]
2006    pub average_so_to_delivery_days: u32,
2007    /// Average days from delivery to invoice
2008    #[serde(default = "default_delivery_to_invoice_days")]
2009    pub average_delivery_to_invoice_days: u32,
2010    /// Average days from invoice to receipt
2011    #[serde(default = "default_invoice_to_receipt_days")]
2012    pub average_invoice_to_receipt_days: u32,
2013    /// SO line count distribution
2014    #[serde(default)]
2015    pub line_count_distribution: DocumentLineCountDistribution,
2016    /// Cash discount configuration
2017    #[serde(default)]
2018    pub cash_discount: CashDiscountConfig,
2019    /// Payment behavior configuration
2020    #[serde(default)]
2021    pub payment_behavior: O2CPaymentBehaviorConfig,
2022}
2023
2024fn default_credit_check_failure_rate() -> f64 {
2025    0.02
2026}
2027
2028fn default_partial_shipment_rate() -> f64 {
2029    0.10
2030}
2031
2032fn default_return_rate() -> f64 {
2033    0.03
2034}
2035
2036fn default_bad_debt_rate() -> f64 {
2037    0.01
2038}
2039
2040fn default_so_to_delivery_days() -> u32 {
2041    7
2042}
2043
2044fn default_delivery_to_invoice_days() -> u32 {
2045    1
2046}
2047
2048fn default_invoice_to_receipt_days() -> u32 {
2049    45
2050}
2051
2052impl Default for O2CFlowConfig {
2053    fn default() -> Self {
2054        Self {
2055            enabled: true,
2056            credit_check_failure_rate: default_credit_check_failure_rate(),
2057            partial_shipment_rate: default_partial_shipment_rate(),
2058            return_rate: default_return_rate(),
2059            bad_debt_rate: default_bad_debt_rate(),
2060            average_so_to_delivery_days: default_so_to_delivery_days(),
2061            average_delivery_to_invoice_days: default_delivery_to_invoice_days(),
2062            average_invoice_to_receipt_days: default_invoice_to_receipt_days(),
2063            line_count_distribution: DocumentLineCountDistribution::default(),
2064            cash_discount: CashDiscountConfig::default(),
2065            payment_behavior: O2CPaymentBehaviorConfig::default(),
2066        }
2067    }
2068}
2069
2070// ============================================================================
2071// O2C Payment Behavior Configuration
2072// ============================================================================
2073
2074/// O2C payment behavior configuration.
2075#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2076pub struct O2CPaymentBehaviorConfig {
2077    /// Dunning (Mahnung) configuration
2078    #[serde(default)]
2079    pub dunning: DunningConfig,
2080    /// Partial payment configuration
2081    #[serde(default)]
2082    pub partial_payments: PartialPaymentConfig,
2083    /// Short payment configuration (unauthorized deductions)
2084    #[serde(default)]
2085    pub short_payments: ShortPaymentConfig,
2086    /// On-account payment configuration (unapplied payments)
2087    #[serde(default)]
2088    pub on_account_payments: OnAccountPaymentConfig,
2089    /// Payment correction configuration (NSF, chargebacks)
2090    #[serde(default)]
2091    pub payment_corrections: PaymentCorrectionConfig,
2092}
2093
2094/// Dunning (Mahnungen) configuration for AR collections.
2095#[derive(Debug, Clone, Serialize, Deserialize)]
2096pub struct DunningConfig {
2097    /// Enable dunning process
2098    #[serde(default)]
2099    pub enabled: bool,
2100    /// Days overdue for level 1 dunning (1st reminder)
2101    #[serde(default = "default_dunning_level_1_days")]
2102    pub level_1_days_overdue: u32,
2103    /// Days overdue for level 2 dunning (2nd reminder)
2104    #[serde(default = "default_dunning_level_2_days")]
2105    pub level_2_days_overdue: u32,
2106    /// Days overdue for level 3 dunning (final notice)
2107    #[serde(default = "default_dunning_level_3_days")]
2108    pub level_3_days_overdue: u32,
2109    /// Days overdue for collection handover
2110    #[serde(default = "default_collection_days")]
2111    pub collection_days_overdue: u32,
2112    /// Payment rates after each dunning level
2113    #[serde(default)]
2114    pub payment_after_dunning_rates: DunningPaymentRates,
2115    /// Rate of invoices blocked from dunning (disputes)
2116    #[serde(default = "default_dunning_block_rate")]
2117    pub dunning_block_rate: f64,
2118    /// Interest rate per year for overdue amounts
2119    #[serde(default = "default_dunning_interest_rate")]
2120    pub interest_rate_per_year: f64,
2121    /// Fixed dunning charge per letter
2122    #[serde(default = "default_dunning_charge")]
2123    pub dunning_charge: f64,
2124}
2125
2126fn default_dunning_level_1_days() -> u32 {
2127    14
2128}
2129
2130fn default_dunning_level_2_days() -> u32 {
2131    28
2132}
2133
2134fn default_dunning_level_3_days() -> u32 {
2135    42
2136}
2137
2138fn default_collection_days() -> u32 {
2139    60
2140}
2141
2142fn default_dunning_block_rate() -> f64 {
2143    0.05
2144}
2145
2146fn default_dunning_interest_rate() -> f64 {
2147    0.09
2148}
2149
2150fn default_dunning_charge() -> f64 {
2151    25.0
2152}
2153
2154impl Default for DunningConfig {
2155    fn default() -> Self {
2156        Self {
2157            enabled: false,
2158            level_1_days_overdue: default_dunning_level_1_days(),
2159            level_2_days_overdue: default_dunning_level_2_days(),
2160            level_3_days_overdue: default_dunning_level_3_days(),
2161            collection_days_overdue: default_collection_days(),
2162            payment_after_dunning_rates: DunningPaymentRates::default(),
2163            dunning_block_rate: default_dunning_block_rate(),
2164            interest_rate_per_year: default_dunning_interest_rate(),
2165            dunning_charge: default_dunning_charge(),
2166        }
2167    }
2168}
2169
2170/// Payment rates after each dunning level.
2171#[derive(Debug, Clone, Serialize, Deserialize)]
2172pub struct DunningPaymentRates {
2173    /// Rate that pays after level 1 reminder
2174    #[serde(default = "default_after_level_1")]
2175    pub after_level_1: f64,
2176    /// Rate that pays after level 2 reminder
2177    #[serde(default = "default_after_level_2")]
2178    pub after_level_2: f64,
2179    /// Rate that pays after level 3 final notice
2180    #[serde(default = "default_after_level_3")]
2181    pub after_level_3: f64,
2182    /// Rate that pays during collection
2183    #[serde(default = "default_during_collection")]
2184    pub during_collection: f64,
2185    /// Rate that never pays (becomes bad debt)
2186    #[serde(default = "default_never_pay")]
2187    pub never_pay: f64,
2188}
2189
2190fn default_after_level_1() -> f64 {
2191    0.40
2192}
2193
2194fn default_after_level_2() -> f64 {
2195    0.30
2196}
2197
2198fn default_after_level_3() -> f64 {
2199    0.15
2200}
2201
2202fn default_during_collection() -> f64 {
2203    0.05
2204}
2205
2206fn default_never_pay() -> f64 {
2207    0.10
2208}
2209
2210impl Default for DunningPaymentRates {
2211    fn default() -> Self {
2212        Self {
2213            after_level_1: default_after_level_1(),
2214            after_level_2: default_after_level_2(),
2215            after_level_3: default_after_level_3(),
2216            during_collection: default_during_collection(),
2217            never_pay: default_never_pay(),
2218        }
2219    }
2220}
2221
2222/// Partial payment configuration.
2223#[derive(Debug, Clone, Serialize, Deserialize)]
2224pub struct PartialPaymentConfig {
2225    /// Rate of invoices paid partially
2226    #[serde(default = "default_partial_payment_rate")]
2227    pub rate: f64,
2228    /// Distribution of partial payment percentages
2229    #[serde(default)]
2230    pub percentage_distribution: PartialPaymentPercentageDistribution,
2231    /// Average days until remainder is paid
2232    #[serde(default = "default_avg_days_until_remainder")]
2233    pub avg_days_until_remainder: u32,
2234}
2235
2236fn default_partial_payment_rate() -> f64 {
2237    0.08
2238}
2239
2240fn default_avg_days_until_remainder() -> u32 {
2241    30
2242}
2243
2244impl Default for PartialPaymentConfig {
2245    fn default() -> Self {
2246        Self {
2247            rate: default_partial_payment_rate(),
2248            percentage_distribution: PartialPaymentPercentageDistribution::default(),
2249            avg_days_until_remainder: default_avg_days_until_remainder(),
2250        }
2251    }
2252}
2253
2254/// Distribution of partial payment percentages.
2255#[derive(Debug, Clone, Serialize, Deserialize)]
2256pub struct PartialPaymentPercentageDistribution {
2257    /// Pay 25% of invoice
2258    #[serde(default = "default_partial_25")]
2259    pub pay_25_percent: f64,
2260    /// Pay 50% of invoice
2261    #[serde(default = "default_partial_50")]
2262    pub pay_50_percent: f64,
2263    /// Pay 75% of invoice
2264    #[serde(default = "default_partial_75")]
2265    pub pay_75_percent: f64,
2266    /// Pay random percentage
2267    #[serde(default = "default_partial_random")]
2268    pub pay_random_percent: f64,
2269}
2270
2271fn default_partial_25() -> f64 {
2272    0.15
2273}
2274
2275fn default_partial_50() -> f64 {
2276    0.50
2277}
2278
2279fn default_partial_75() -> f64 {
2280    0.25
2281}
2282
2283fn default_partial_random() -> f64 {
2284    0.10
2285}
2286
2287impl Default for PartialPaymentPercentageDistribution {
2288    fn default() -> Self {
2289        Self {
2290            pay_25_percent: default_partial_25(),
2291            pay_50_percent: default_partial_50(),
2292            pay_75_percent: default_partial_75(),
2293            pay_random_percent: default_partial_random(),
2294        }
2295    }
2296}
2297
2298/// Short payment configuration (unauthorized deductions).
2299#[derive(Debug, Clone, Serialize, Deserialize)]
2300pub struct ShortPaymentConfig {
2301    /// Rate of payments that are short
2302    #[serde(default = "default_short_payment_rate")]
2303    pub rate: f64,
2304    /// Distribution of short payment reasons
2305    #[serde(default)]
2306    pub reason_distribution: ShortPaymentReasonDistribution,
2307    /// Maximum percentage that can be short
2308    #[serde(default = "default_max_short_percent")]
2309    pub max_short_percent: f64,
2310}
2311
2312fn default_short_payment_rate() -> f64 {
2313    0.03
2314}
2315
2316fn default_max_short_percent() -> f64 {
2317    0.10
2318}
2319
2320impl Default for ShortPaymentConfig {
2321    fn default() -> Self {
2322        Self {
2323            rate: default_short_payment_rate(),
2324            reason_distribution: ShortPaymentReasonDistribution::default(),
2325            max_short_percent: default_max_short_percent(),
2326        }
2327    }
2328}
2329
2330/// Distribution of short payment reasons.
2331#[derive(Debug, Clone, Serialize, Deserialize)]
2332pub struct ShortPaymentReasonDistribution {
2333    /// Pricing dispute
2334    #[serde(default = "default_pricing_dispute")]
2335    pub pricing_dispute: f64,
2336    /// Quality issue
2337    #[serde(default = "default_quality_issue")]
2338    pub quality_issue: f64,
2339    /// Quantity discrepancy
2340    #[serde(default = "default_quantity_discrepancy")]
2341    pub quantity_discrepancy: f64,
2342    /// Unauthorized deduction
2343    #[serde(default = "default_unauthorized_deduction")]
2344    pub unauthorized_deduction: f64,
2345    /// Early payment discount taken incorrectly
2346    #[serde(default = "default_incorrect_discount")]
2347    pub incorrect_discount: f64,
2348}
2349
2350fn default_pricing_dispute() -> f64 {
2351    0.30
2352}
2353
2354fn default_quality_issue() -> f64 {
2355    0.20
2356}
2357
2358fn default_quantity_discrepancy() -> f64 {
2359    0.20
2360}
2361
2362fn default_unauthorized_deduction() -> f64 {
2363    0.15
2364}
2365
2366fn default_incorrect_discount() -> f64 {
2367    0.15
2368}
2369
2370impl Default for ShortPaymentReasonDistribution {
2371    fn default() -> Self {
2372        Self {
2373            pricing_dispute: default_pricing_dispute(),
2374            quality_issue: default_quality_issue(),
2375            quantity_discrepancy: default_quantity_discrepancy(),
2376            unauthorized_deduction: default_unauthorized_deduction(),
2377            incorrect_discount: default_incorrect_discount(),
2378        }
2379    }
2380}
2381
2382/// On-account payment configuration (unapplied payments).
2383#[derive(Debug, Clone, Serialize, Deserialize)]
2384pub struct OnAccountPaymentConfig {
2385    /// Rate of payments that are on-account (unapplied)
2386    #[serde(default = "default_on_account_rate")]
2387    pub rate: f64,
2388    /// Average days until on-account payments are applied
2389    #[serde(default = "default_avg_days_until_applied")]
2390    pub avg_days_until_applied: u32,
2391}
2392
2393fn default_on_account_rate() -> f64 {
2394    0.02
2395}
2396
2397fn default_avg_days_until_applied() -> u32 {
2398    14
2399}
2400
2401impl Default for OnAccountPaymentConfig {
2402    fn default() -> Self {
2403        Self {
2404            rate: default_on_account_rate(),
2405            avg_days_until_applied: default_avg_days_until_applied(),
2406        }
2407    }
2408}
2409
2410/// Payment correction configuration.
2411#[derive(Debug, Clone, Serialize, Deserialize)]
2412pub struct PaymentCorrectionConfig {
2413    /// Rate of payments requiring correction
2414    #[serde(default = "default_payment_correction_rate")]
2415    pub rate: f64,
2416    /// Distribution of correction types
2417    #[serde(default)]
2418    pub type_distribution: PaymentCorrectionTypeDistribution,
2419}
2420
2421fn default_payment_correction_rate() -> f64 {
2422    0.02
2423}
2424
2425impl Default for PaymentCorrectionConfig {
2426    fn default() -> Self {
2427        Self {
2428            rate: default_payment_correction_rate(),
2429            type_distribution: PaymentCorrectionTypeDistribution::default(),
2430        }
2431    }
2432}
2433
2434/// Distribution of payment correction types.
2435#[derive(Debug, Clone, Serialize, Deserialize)]
2436pub struct PaymentCorrectionTypeDistribution {
2437    /// NSF (Non-sufficient funds) / bounced check
2438    #[serde(default = "default_nsf_rate")]
2439    pub nsf: f64,
2440    /// Chargeback
2441    #[serde(default = "default_chargeback_rate")]
2442    pub chargeback: f64,
2443    /// Wrong amount applied
2444    #[serde(default = "default_wrong_amount_rate")]
2445    pub wrong_amount: f64,
2446    /// Wrong customer applied
2447    #[serde(default = "default_wrong_customer_rate")]
2448    pub wrong_customer: f64,
2449    /// Duplicate payment
2450    #[serde(default = "default_duplicate_payment_rate")]
2451    pub duplicate_payment: f64,
2452}
2453
2454fn default_nsf_rate() -> f64 {
2455    0.30
2456}
2457
2458fn default_chargeback_rate() -> f64 {
2459    0.20
2460}
2461
2462fn default_wrong_amount_rate() -> f64 {
2463    0.20
2464}
2465
2466fn default_wrong_customer_rate() -> f64 {
2467    0.15
2468}
2469
2470fn default_duplicate_payment_rate() -> f64 {
2471    0.15
2472}
2473
2474impl Default for PaymentCorrectionTypeDistribution {
2475    fn default() -> Self {
2476        Self {
2477            nsf: default_nsf_rate(),
2478            chargeback: default_chargeback_rate(),
2479            wrong_amount: default_wrong_amount_rate(),
2480            wrong_customer: default_wrong_customer_rate(),
2481            duplicate_payment: default_duplicate_payment_rate(),
2482        }
2483    }
2484}
2485
2486/// Document line count distribution.
2487#[derive(Debug, Clone, Serialize, Deserialize)]
2488pub struct DocumentLineCountDistribution {
2489    /// Minimum number of lines
2490    #[serde(default = "default_min_lines")]
2491    pub min_lines: u32,
2492    /// Maximum number of lines
2493    #[serde(default = "default_max_lines")]
2494    pub max_lines: u32,
2495    /// Most common line count (mode)
2496    #[serde(default = "default_mode_lines")]
2497    pub mode_lines: u32,
2498}
2499
2500fn default_min_lines() -> u32 {
2501    1
2502}
2503
2504fn default_max_lines() -> u32 {
2505    20
2506}
2507
2508fn default_mode_lines() -> u32 {
2509    3
2510}
2511
2512impl Default for DocumentLineCountDistribution {
2513    fn default() -> Self {
2514        Self {
2515            min_lines: default_min_lines(),
2516            max_lines: default_max_lines(),
2517            mode_lines: default_mode_lines(),
2518        }
2519    }
2520}
2521
2522/// Cash discount configuration.
2523#[derive(Debug, Clone, Serialize, Deserialize)]
2524pub struct CashDiscountConfig {
2525    /// Percentage of invoices eligible for cash discount
2526    #[serde(default = "default_discount_eligible_rate")]
2527    pub eligible_rate: f64,
2528    /// Rate at which customers take the discount
2529    #[serde(default = "default_discount_taken_rate")]
2530    pub taken_rate: f64,
2531    /// Standard discount percentage
2532    #[serde(default = "default_discount_percent")]
2533    pub discount_percent: f64,
2534    /// Days within which discount must be taken
2535    #[serde(default = "default_discount_days")]
2536    pub discount_days: u32,
2537}
2538
2539fn default_discount_eligible_rate() -> f64 {
2540    0.30
2541}
2542
2543fn default_discount_taken_rate() -> f64 {
2544    0.60
2545}
2546
2547fn default_discount_percent() -> f64 {
2548    0.02
2549}
2550
2551fn default_discount_days() -> u32 {
2552    10
2553}
2554
2555impl Default for CashDiscountConfig {
2556    fn default() -> Self {
2557        Self {
2558            eligible_rate: default_discount_eligible_rate(),
2559            taken_rate: default_discount_taken_rate(),
2560            discount_percent: default_discount_percent(),
2561            discount_days: default_discount_days(),
2562        }
2563    }
2564}
2565
2566// ============================================================================
2567// Intercompany Configuration
2568// ============================================================================
2569
2570/// Intercompany transaction configuration.
2571#[derive(Debug, Clone, Serialize, Deserialize)]
2572pub struct IntercompanyConfig {
2573    /// Enable intercompany transaction generation
2574    #[serde(default)]
2575    pub enabled: bool,
2576    /// Rate of transactions that are intercompany
2577    #[serde(default = "default_ic_transaction_rate")]
2578    pub ic_transaction_rate: f64,
2579    /// Transfer pricing method
2580    #[serde(default)]
2581    pub transfer_pricing_method: TransferPricingMethod,
2582    /// Transfer pricing markup percentage (for cost-plus)
2583    #[serde(default = "default_markup_percent")]
2584    pub markup_percent: f64,
2585    /// Generate matched IC pairs (offsetting entries)
2586    #[serde(default = "default_true")]
2587    pub generate_matched_pairs: bool,
2588    /// IC transaction type distribution
2589    #[serde(default)]
2590    pub transaction_type_distribution: ICTransactionTypeDistribution,
2591    /// Generate elimination entries for consolidation
2592    #[serde(default)]
2593    pub generate_eliminations: bool,
2594}
2595
2596fn default_ic_transaction_rate() -> f64 {
2597    0.15
2598}
2599
2600fn default_markup_percent() -> f64 {
2601    0.05
2602}
2603
2604impl Default for IntercompanyConfig {
2605    fn default() -> Self {
2606        Self {
2607            enabled: false,
2608            ic_transaction_rate: default_ic_transaction_rate(),
2609            transfer_pricing_method: TransferPricingMethod::default(),
2610            markup_percent: default_markup_percent(),
2611            generate_matched_pairs: true,
2612            transaction_type_distribution: ICTransactionTypeDistribution::default(),
2613            generate_eliminations: false,
2614        }
2615    }
2616}
2617
2618/// Transfer pricing method.
2619#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2620#[serde(rename_all = "snake_case")]
2621pub enum TransferPricingMethod {
2622    /// Cost plus a markup
2623    #[default]
2624    CostPlus,
2625    /// Comparable uncontrolled price
2626    ComparableUncontrolled,
2627    /// Resale price method
2628    ResalePrice,
2629    /// Transactional net margin method
2630    TransactionalNetMargin,
2631    /// Profit split method
2632    ProfitSplit,
2633}
2634
2635/// IC transaction type distribution.
2636#[derive(Debug, Clone, Serialize, Deserialize)]
2637pub struct ICTransactionTypeDistribution {
2638    /// Goods sales between entities
2639    pub goods_sale: f64,
2640    /// Services provided
2641    pub service_provided: f64,
2642    /// Intercompany loans
2643    pub loan: f64,
2644    /// Dividends
2645    pub dividend: f64,
2646    /// Management fees
2647    pub management_fee: f64,
2648    /// Royalties
2649    pub royalty: f64,
2650    /// Cost sharing
2651    pub cost_sharing: f64,
2652}
2653
2654impl Default for ICTransactionTypeDistribution {
2655    fn default() -> Self {
2656        Self {
2657            goods_sale: 0.35,
2658            service_provided: 0.20,
2659            loan: 0.10,
2660            dividend: 0.05,
2661            management_fee: 0.15,
2662            royalty: 0.10,
2663            cost_sharing: 0.05,
2664        }
2665    }
2666}
2667
2668// ============================================================================
2669// Balance Configuration
2670// ============================================================================
2671
2672/// Balance and trial balance configuration.
2673#[derive(Debug, Clone, Serialize, Deserialize)]
2674pub struct BalanceConfig {
2675    /// Generate opening balances
2676    #[serde(default)]
2677    pub generate_opening_balances: bool,
2678    /// Generate trial balances
2679    #[serde(default = "default_true")]
2680    pub generate_trial_balances: bool,
2681    /// Target gross margin (for revenue/COGS coherence)
2682    #[serde(default = "default_gross_margin")]
2683    pub target_gross_margin: f64,
2684    /// Target DSO (Days Sales Outstanding)
2685    #[serde(default = "default_dso")]
2686    pub target_dso_days: u32,
2687    /// Target DPO (Days Payable Outstanding)
2688    #[serde(default = "default_dpo")]
2689    pub target_dpo_days: u32,
2690    /// Target current ratio
2691    #[serde(default = "default_current_ratio")]
2692    pub target_current_ratio: f64,
2693    /// Target debt-to-equity ratio
2694    #[serde(default = "default_debt_equity")]
2695    pub target_debt_to_equity: f64,
2696    /// Validate balance sheet equation (A = L + E)
2697    #[serde(default = "default_true")]
2698    pub validate_balance_equation: bool,
2699    /// Reconcile subledgers to GL control accounts
2700    #[serde(default = "default_true")]
2701    pub reconcile_subledgers: bool,
2702}
2703
2704fn default_gross_margin() -> f64 {
2705    0.35
2706}
2707
2708fn default_dso() -> u32 {
2709    45
2710}
2711
2712fn default_dpo() -> u32 {
2713    30
2714}
2715
2716fn default_current_ratio() -> f64 {
2717    1.5
2718}
2719
2720fn default_debt_equity() -> f64 {
2721    0.5
2722}
2723
2724impl Default for BalanceConfig {
2725    fn default() -> Self {
2726        Self {
2727            generate_opening_balances: false,
2728            generate_trial_balances: true,
2729            target_gross_margin: default_gross_margin(),
2730            target_dso_days: default_dso(),
2731            target_dpo_days: default_dpo(),
2732            target_current_ratio: default_current_ratio(),
2733            target_debt_to_equity: default_debt_equity(),
2734            validate_balance_equation: true,
2735            reconcile_subledgers: true,
2736        }
2737    }
2738}
2739
2740// ==========================================================================
2741// OCPM (Object-Centric Process Mining) Configuration
2742// ==========================================================================
2743
2744/// OCPM (Object-Centric Process Mining) configuration.
2745///
2746/// Controls generation of OCEL 2.0 compatible event logs with
2747/// many-to-many event-to-object relationships.
2748#[derive(Debug, Clone, Serialize, Deserialize)]
2749pub struct OcpmConfig {
2750    /// Enable OCPM event log generation
2751    #[serde(default)]
2752    pub enabled: bool,
2753
2754    /// Generate lifecycle events (Start/Complete pairs vs atomic events)
2755    #[serde(default = "default_true")]
2756    pub generate_lifecycle_events: bool,
2757
2758    /// Include object-to-object relationships in output
2759    #[serde(default = "default_true")]
2760    pub include_object_relationships: bool,
2761
2762    /// Compute and export process variants
2763    #[serde(default = "default_true")]
2764    pub compute_variants: bool,
2765
2766    /// Maximum variants to track (0 = unlimited)
2767    #[serde(default)]
2768    pub max_variants: usize,
2769
2770    /// P2P process configuration
2771    #[serde(default)]
2772    pub p2p_process: OcpmProcessConfig,
2773
2774    /// O2C process configuration
2775    #[serde(default)]
2776    pub o2c_process: OcpmProcessConfig,
2777
2778    /// Output format configuration
2779    #[serde(default)]
2780    pub output: OcpmOutputConfig,
2781}
2782
2783impl Default for OcpmConfig {
2784    fn default() -> Self {
2785        Self {
2786            enabled: false,
2787            generate_lifecycle_events: true,
2788            include_object_relationships: true,
2789            compute_variants: true,
2790            max_variants: 0,
2791            p2p_process: OcpmProcessConfig::default(),
2792            o2c_process: OcpmProcessConfig::default(),
2793            output: OcpmOutputConfig::default(),
2794        }
2795    }
2796}
2797
2798/// Process-specific OCPM configuration.
2799#[derive(Debug, Clone, Serialize, Deserialize)]
2800pub struct OcpmProcessConfig {
2801    /// Rework probability (0.0-1.0)
2802    #[serde(default = "default_rework_probability")]
2803    pub rework_probability: f64,
2804
2805    /// Skip step probability (0.0-1.0)
2806    #[serde(default = "default_skip_probability")]
2807    pub skip_step_probability: f64,
2808
2809    /// Out-of-order step probability (0.0-1.0)
2810    #[serde(default = "default_out_of_order_probability")]
2811    pub out_of_order_probability: f64,
2812}
2813
2814fn default_rework_probability() -> f64 {
2815    0.05
2816}
2817
2818fn default_skip_probability() -> f64 {
2819    0.02
2820}
2821
2822fn default_out_of_order_probability() -> f64 {
2823    0.03
2824}
2825
2826impl Default for OcpmProcessConfig {
2827    fn default() -> Self {
2828        Self {
2829            rework_probability: default_rework_probability(),
2830            skip_step_probability: default_skip_probability(),
2831            out_of_order_probability: default_out_of_order_probability(),
2832        }
2833    }
2834}
2835
2836/// OCPM output format configuration.
2837#[derive(Debug, Clone, Serialize, Deserialize)]
2838pub struct OcpmOutputConfig {
2839    /// Export OCEL 2.0 JSON format
2840    #[serde(default = "default_true")]
2841    pub ocel_json: bool,
2842
2843    /// Export OCEL 2.0 XML format
2844    #[serde(default)]
2845    pub ocel_xml: bool,
2846
2847    /// Export flattened CSV for each object type
2848    #[serde(default = "default_true")]
2849    pub flattened_csv: bool,
2850
2851    /// Export event-object relationship table
2852    #[serde(default = "default_true")]
2853    pub event_object_csv: bool,
2854
2855    /// Export object-object relationship table
2856    #[serde(default = "default_true")]
2857    pub object_relationship_csv: bool,
2858
2859    /// Export process variants summary
2860    #[serde(default = "default_true")]
2861    pub variants_csv: bool,
2862}
2863
2864impl Default for OcpmOutputConfig {
2865    fn default() -> Self {
2866        Self {
2867            ocel_json: true,
2868            ocel_xml: false,
2869            flattened_csv: true,
2870            event_object_csv: true,
2871            object_relationship_csv: true,
2872            variants_csv: true,
2873        }
2874    }
2875}
2876
2877/// Audit engagement and workpaper generation configuration.
2878#[derive(Debug, Clone, Serialize, Deserialize)]
2879pub struct AuditGenerationConfig {
2880    /// Enable audit engagement generation
2881    #[serde(default)]
2882    pub enabled: bool,
2883
2884    /// Generate engagement documents and workpapers
2885    #[serde(default = "default_true")]
2886    pub generate_workpapers: bool,
2887
2888    /// Default engagement type distribution
2889    #[serde(default)]
2890    pub engagement_types: AuditEngagementTypesConfig,
2891
2892    /// Workpaper configuration
2893    #[serde(default)]
2894    pub workpapers: WorkpaperConfig,
2895
2896    /// Team configuration
2897    #[serde(default)]
2898    pub team: AuditTeamConfig,
2899
2900    /// Review workflow configuration
2901    #[serde(default)]
2902    pub review: ReviewWorkflowConfig,
2903}
2904
2905impl Default for AuditGenerationConfig {
2906    fn default() -> Self {
2907        Self {
2908            enabled: false,
2909            generate_workpapers: true,
2910            engagement_types: AuditEngagementTypesConfig::default(),
2911            workpapers: WorkpaperConfig::default(),
2912            team: AuditTeamConfig::default(),
2913            review: ReviewWorkflowConfig::default(),
2914        }
2915    }
2916}
2917
2918/// Engagement type distribution configuration.
2919#[derive(Debug, Clone, Serialize, Deserialize)]
2920pub struct AuditEngagementTypesConfig {
2921    /// Financial statement audit probability
2922    #[serde(default = "default_financial_audit_prob")]
2923    pub financial_statement: f64,
2924    /// SOX/ICFR audit probability
2925    #[serde(default = "default_sox_audit_prob")]
2926    pub sox_icfr: f64,
2927    /// Integrated audit probability
2928    #[serde(default = "default_integrated_audit_prob")]
2929    pub integrated: f64,
2930    /// Review engagement probability
2931    #[serde(default = "default_review_prob")]
2932    pub review: f64,
2933    /// Agreed-upon procedures probability
2934    #[serde(default = "default_aup_prob")]
2935    pub agreed_upon_procedures: f64,
2936}
2937
2938fn default_financial_audit_prob() -> f64 {
2939    0.40
2940}
2941fn default_sox_audit_prob() -> f64 {
2942    0.20
2943}
2944fn default_integrated_audit_prob() -> f64 {
2945    0.25
2946}
2947fn default_review_prob() -> f64 {
2948    0.10
2949}
2950fn default_aup_prob() -> f64 {
2951    0.05
2952}
2953
2954impl Default for AuditEngagementTypesConfig {
2955    fn default() -> Self {
2956        Self {
2957            financial_statement: default_financial_audit_prob(),
2958            sox_icfr: default_sox_audit_prob(),
2959            integrated: default_integrated_audit_prob(),
2960            review: default_review_prob(),
2961            agreed_upon_procedures: default_aup_prob(),
2962        }
2963    }
2964}
2965
2966/// Workpaper generation configuration.
2967#[derive(Debug, Clone, Serialize, Deserialize)]
2968pub struct WorkpaperConfig {
2969    /// Average workpapers per engagement phase
2970    #[serde(default = "default_workpapers_per_phase")]
2971    pub average_per_phase: usize,
2972
2973    /// Include ISA compliance references
2974    #[serde(default = "default_true")]
2975    pub include_isa_references: bool,
2976
2977    /// Generate sample details
2978    #[serde(default = "default_true")]
2979    pub include_sample_details: bool,
2980
2981    /// Include cross-references between workpapers
2982    #[serde(default = "default_true")]
2983    pub include_cross_references: bool,
2984
2985    /// Sampling configuration
2986    #[serde(default)]
2987    pub sampling: SamplingConfig,
2988}
2989
2990fn default_workpapers_per_phase() -> usize {
2991    5
2992}
2993
2994impl Default for WorkpaperConfig {
2995    fn default() -> Self {
2996        Self {
2997            average_per_phase: default_workpapers_per_phase(),
2998            include_isa_references: true,
2999            include_sample_details: true,
3000            include_cross_references: true,
3001            sampling: SamplingConfig::default(),
3002        }
3003    }
3004}
3005
3006/// Sampling method configuration.
3007#[derive(Debug, Clone, Serialize, Deserialize)]
3008pub struct SamplingConfig {
3009    /// Statistical sampling rate (0.0-1.0)
3010    #[serde(default = "default_statistical_rate")]
3011    pub statistical_rate: f64,
3012    /// Judgmental sampling rate (0.0-1.0)
3013    #[serde(default = "default_judgmental_rate")]
3014    pub judgmental_rate: f64,
3015    /// Haphazard sampling rate (0.0-1.0)
3016    #[serde(default = "default_haphazard_rate")]
3017    pub haphazard_rate: f64,
3018    /// 100% examination rate (0.0-1.0)
3019    #[serde(default = "default_complete_examination_rate")]
3020    pub complete_examination_rate: f64,
3021}
3022
3023fn default_statistical_rate() -> f64 {
3024    0.40
3025}
3026fn default_judgmental_rate() -> f64 {
3027    0.30
3028}
3029fn default_haphazard_rate() -> f64 {
3030    0.20
3031}
3032fn default_complete_examination_rate() -> f64 {
3033    0.10
3034}
3035
3036impl Default for SamplingConfig {
3037    fn default() -> Self {
3038        Self {
3039            statistical_rate: default_statistical_rate(),
3040            judgmental_rate: default_judgmental_rate(),
3041            haphazard_rate: default_haphazard_rate(),
3042            complete_examination_rate: default_complete_examination_rate(),
3043        }
3044    }
3045}
3046
3047/// Audit team configuration.
3048#[derive(Debug, Clone, Serialize, Deserialize)]
3049pub struct AuditTeamConfig {
3050    /// Minimum team size
3051    #[serde(default = "default_min_team_size")]
3052    pub min_team_size: usize,
3053    /// Maximum team size
3054    #[serde(default = "default_max_team_size")]
3055    pub max_team_size: usize,
3056    /// Probability of having a specialist on the team
3057    #[serde(default = "default_specialist_probability")]
3058    pub specialist_probability: f64,
3059}
3060
3061fn default_min_team_size() -> usize {
3062    3
3063}
3064fn default_max_team_size() -> usize {
3065    8
3066}
3067fn default_specialist_probability() -> f64 {
3068    0.30
3069}
3070
3071impl Default for AuditTeamConfig {
3072    fn default() -> Self {
3073        Self {
3074            min_team_size: default_min_team_size(),
3075            max_team_size: default_max_team_size(),
3076            specialist_probability: default_specialist_probability(),
3077        }
3078    }
3079}
3080
3081/// Review workflow configuration.
3082#[derive(Debug, Clone, Serialize, Deserialize)]
3083pub struct ReviewWorkflowConfig {
3084    /// Average days between preparer completion and first review
3085    #[serde(default = "default_review_delay_days")]
3086    pub average_review_delay_days: u32,
3087    /// Probability of review notes requiring rework
3088    #[serde(default = "default_rework_probability_review")]
3089    pub rework_probability: f64,
3090    /// Require partner sign-off for all workpapers
3091    #[serde(default = "default_true")]
3092    pub require_partner_signoff: bool,
3093}
3094
3095fn default_review_delay_days() -> u32 {
3096    2
3097}
3098fn default_rework_probability_review() -> f64 {
3099    0.15
3100}
3101
3102impl Default for ReviewWorkflowConfig {
3103    fn default() -> Self {
3104        Self {
3105            average_review_delay_days: default_review_delay_days(),
3106            rework_probability: default_rework_probability_review(),
3107            require_partner_signoff: true,
3108        }
3109    }
3110}
3111
3112// =============================================================================
3113// Data Quality Configuration
3114// =============================================================================
3115
3116/// Data quality variation settings for realistic flakiness injection.
3117#[derive(Debug, Clone, Serialize, Deserialize)]
3118pub struct DataQualitySchemaConfig {
3119    /// Enable data quality variations
3120    #[serde(default)]
3121    pub enabled: bool,
3122    /// Preset to use (overrides individual settings if set)
3123    #[serde(default)]
3124    pub preset: DataQualityPreset,
3125    /// Missing value injection settings
3126    #[serde(default)]
3127    pub missing_values: MissingValuesSchemaConfig,
3128    /// Typo injection settings
3129    #[serde(default)]
3130    pub typos: TypoSchemaConfig,
3131    /// Format variation settings
3132    #[serde(default)]
3133    pub format_variations: FormatVariationSchemaConfig,
3134    /// Duplicate injection settings
3135    #[serde(default)]
3136    pub duplicates: DuplicateSchemaConfig,
3137    /// Encoding issue settings
3138    #[serde(default)]
3139    pub encoding_issues: EncodingIssueSchemaConfig,
3140    /// Generate quality issue labels for ML training
3141    #[serde(default)]
3142    pub generate_labels: bool,
3143    /// Per-sink quality profiles (different settings for CSV vs JSON etc.)
3144    #[serde(default)]
3145    pub sink_profiles: SinkQualityProfiles,
3146}
3147
3148impl Default for DataQualitySchemaConfig {
3149    fn default() -> Self {
3150        Self {
3151            enabled: false,
3152            preset: DataQualityPreset::None,
3153            missing_values: MissingValuesSchemaConfig::default(),
3154            typos: TypoSchemaConfig::default(),
3155            format_variations: FormatVariationSchemaConfig::default(),
3156            duplicates: DuplicateSchemaConfig::default(),
3157            encoding_issues: EncodingIssueSchemaConfig::default(),
3158            generate_labels: true,
3159            sink_profiles: SinkQualityProfiles::default(),
3160        }
3161    }
3162}
3163
3164impl DataQualitySchemaConfig {
3165    /// Creates a config for a specific preset profile.
3166    pub fn with_preset(preset: DataQualityPreset) -> Self {
3167        let mut config = Self {
3168            preset,
3169            ..Default::default()
3170        };
3171        config.apply_preset();
3172        config
3173    }
3174
3175    /// Applies the preset settings to the individual configuration fields.
3176    /// Call this after deserializing if preset is not Custom or None.
3177    pub fn apply_preset(&mut self) {
3178        if !self.preset.overrides_settings() {
3179            return;
3180        }
3181
3182        self.enabled = true;
3183
3184        // Missing values
3185        self.missing_values.enabled = self.preset.missing_rate() > 0.0;
3186        self.missing_values.rate = self.preset.missing_rate();
3187
3188        // Typos
3189        self.typos.enabled = self.preset.typo_rate() > 0.0;
3190        self.typos.char_error_rate = self.preset.typo_rate();
3191
3192        // Duplicates
3193        self.duplicates.enabled = self.preset.duplicate_rate() > 0.0;
3194        self.duplicates.exact_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
3195        self.duplicates.near_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
3196        self.duplicates.fuzzy_duplicate_ratio = self.preset.duplicate_rate() * 0.2;
3197
3198        // Format variations
3199        self.format_variations.enabled = self.preset.format_variations_enabled();
3200
3201        // Encoding issues
3202        self.encoding_issues.enabled = self.preset.encoding_issues_enabled();
3203        self.encoding_issues.rate = self.preset.encoding_issue_rate();
3204
3205        // OCR errors for typos in legacy preset
3206        if self.preset.ocr_errors_enabled() {
3207            self.typos.type_weights.ocr_errors = 0.3;
3208        }
3209    }
3210
3211    /// Returns the effective missing value rate (considering preset).
3212    pub fn effective_missing_rate(&self) -> f64 {
3213        if self.preset.overrides_settings() {
3214            self.preset.missing_rate()
3215        } else {
3216            self.missing_values.rate
3217        }
3218    }
3219
3220    /// Returns the effective typo rate (considering preset).
3221    pub fn effective_typo_rate(&self) -> f64 {
3222        if self.preset.overrides_settings() {
3223            self.preset.typo_rate()
3224        } else {
3225            self.typos.char_error_rate
3226        }
3227    }
3228
3229    /// Returns the effective duplicate rate (considering preset).
3230    pub fn effective_duplicate_rate(&self) -> f64 {
3231        if self.preset.overrides_settings() {
3232            self.preset.duplicate_rate()
3233        } else {
3234            self.duplicates.exact_duplicate_ratio
3235                + self.duplicates.near_duplicate_ratio
3236                + self.duplicates.fuzzy_duplicate_ratio
3237        }
3238    }
3239
3240    /// Creates a clean profile config.
3241    pub fn clean() -> Self {
3242        Self::with_preset(DataQualityPreset::Clean)
3243    }
3244
3245    /// Creates a noisy profile config.
3246    pub fn noisy() -> Self {
3247        Self::with_preset(DataQualityPreset::Noisy)
3248    }
3249
3250    /// Creates a legacy profile config.
3251    pub fn legacy() -> Self {
3252        Self::with_preset(DataQualityPreset::Legacy)
3253    }
3254}
3255
3256/// Preset configurations for common data quality scenarios.
3257#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
3258#[serde(rename_all = "snake_case")]
3259pub enum DataQualityPreset {
3260    /// No data quality variations (clean data)
3261    #[default]
3262    None,
3263    /// Minimal variations (very clean data with rare issues)
3264    Minimal,
3265    /// Normal variations (realistic enterprise data quality)
3266    Normal,
3267    /// High variations (messy data for stress testing)
3268    High,
3269    /// Custom (use individual settings)
3270    Custom,
3271
3272    // ========================================
3273    // ML-Oriented Profiles (Phase 2.1)
3274    // ========================================
3275    /// Clean profile for ML training - minimal data quality issues
3276    /// Missing: 0.1%, Typos: 0.05%, Duplicates: 0%, Format: None
3277    Clean,
3278    /// Noisy profile simulating typical production data issues
3279    /// Missing: 5%, Typos: 2%, Duplicates: 1%, Format: Medium
3280    Noisy,
3281    /// Legacy profile simulating migrated/OCR'd historical data
3282    /// Missing: 10%, Typos: 5%, Duplicates: 3%, Format: Heavy + OCR
3283    Legacy,
3284}
3285
3286impl DataQualityPreset {
3287    /// Returns the missing value rate for this preset.
3288    pub fn missing_rate(&self) -> f64 {
3289        match self {
3290            DataQualityPreset::None => 0.0,
3291            DataQualityPreset::Minimal => 0.005,
3292            DataQualityPreset::Normal => 0.02,
3293            DataQualityPreset::High => 0.08,
3294            DataQualityPreset::Custom => 0.01, // Use config value
3295            DataQualityPreset::Clean => 0.001,
3296            DataQualityPreset::Noisy => 0.05,
3297            DataQualityPreset::Legacy => 0.10,
3298        }
3299    }
3300
3301    /// Returns the typo rate for this preset.
3302    pub fn typo_rate(&self) -> f64 {
3303        match self {
3304            DataQualityPreset::None => 0.0,
3305            DataQualityPreset::Minimal => 0.0005,
3306            DataQualityPreset::Normal => 0.002,
3307            DataQualityPreset::High => 0.01,
3308            DataQualityPreset::Custom => 0.001, // Use config value
3309            DataQualityPreset::Clean => 0.0005,
3310            DataQualityPreset::Noisy => 0.02,
3311            DataQualityPreset::Legacy => 0.05,
3312        }
3313    }
3314
3315    /// Returns the duplicate rate for this preset.
3316    pub fn duplicate_rate(&self) -> f64 {
3317        match self {
3318            DataQualityPreset::None => 0.0,
3319            DataQualityPreset::Minimal => 0.001,
3320            DataQualityPreset::Normal => 0.005,
3321            DataQualityPreset::High => 0.02,
3322            DataQualityPreset::Custom => 0.0, // Use config value
3323            DataQualityPreset::Clean => 0.0,
3324            DataQualityPreset::Noisy => 0.01,
3325            DataQualityPreset::Legacy => 0.03,
3326        }
3327    }
3328
3329    /// Returns whether format variations are enabled for this preset.
3330    pub fn format_variations_enabled(&self) -> bool {
3331        match self {
3332            DataQualityPreset::None | DataQualityPreset::Clean => false,
3333            DataQualityPreset::Minimal => true,
3334            DataQualityPreset::Normal => true,
3335            DataQualityPreset::High => true,
3336            DataQualityPreset::Custom => true,
3337            DataQualityPreset::Noisy => true,
3338            DataQualityPreset::Legacy => true,
3339        }
3340    }
3341
3342    /// Returns whether OCR-style errors are enabled for this preset.
3343    pub fn ocr_errors_enabled(&self) -> bool {
3344        matches!(self, DataQualityPreset::Legacy | DataQualityPreset::High)
3345    }
3346
3347    /// Returns whether encoding issues are enabled for this preset.
3348    pub fn encoding_issues_enabled(&self) -> bool {
3349        matches!(
3350            self,
3351            DataQualityPreset::Legacy | DataQualityPreset::High | DataQualityPreset::Noisy
3352        )
3353    }
3354
3355    /// Returns the encoding issue rate for this preset.
3356    pub fn encoding_issue_rate(&self) -> f64 {
3357        match self {
3358            DataQualityPreset::None | DataQualityPreset::Clean | DataQualityPreset::Minimal => 0.0,
3359            DataQualityPreset::Normal => 0.002,
3360            DataQualityPreset::High => 0.01,
3361            DataQualityPreset::Custom => 0.0,
3362            DataQualityPreset::Noisy => 0.005,
3363            DataQualityPreset::Legacy => 0.02,
3364        }
3365    }
3366
3367    /// Returns true if this preset overrides individual settings.
3368    pub fn overrides_settings(&self) -> bool {
3369        !matches!(self, DataQualityPreset::Custom | DataQualityPreset::None)
3370    }
3371
3372    /// Returns a human-readable description of this preset.
3373    pub fn description(&self) -> &'static str {
3374        match self {
3375            DataQualityPreset::None => "No data quality issues (pristine data)",
3376            DataQualityPreset::Minimal => "Very rare data quality issues",
3377            DataQualityPreset::Normal => "Realistic enterprise data quality",
3378            DataQualityPreset::High => "Messy data for stress testing",
3379            DataQualityPreset::Custom => "Custom settings from configuration",
3380            DataQualityPreset::Clean => "ML-ready clean data with minimal issues",
3381            DataQualityPreset::Noisy => "Typical production data with moderate issues",
3382            DataQualityPreset::Legacy => "Legacy/migrated data with heavy issues and OCR errors",
3383        }
3384    }
3385}
3386
3387/// Missing value injection configuration.
3388#[derive(Debug, Clone, Serialize, Deserialize)]
3389pub struct MissingValuesSchemaConfig {
3390    /// Enable missing value injection
3391    #[serde(default)]
3392    pub enabled: bool,
3393    /// Global missing rate (0.0 to 1.0)
3394    #[serde(default = "default_missing_rate")]
3395    pub rate: f64,
3396    /// Missing value strategy
3397    #[serde(default)]
3398    pub strategy: MissingValueStrategy,
3399    /// Field-specific rates (field name -> rate)
3400    #[serde(default)]
3401    pub field_rates: std::collections::HashMap<String, f64>,
3402    /// Fields that should never have missing values
3403    #[serde(default)]
3404    pub protected_fields: Vec<String>,
3405}
3406
3407fn default_missing_rate() -> f64 {
3408    0.01
3409}
3410
3411impl Default for MissingValuesSchemaConfig {
3412    fn default() -> Self {
3413        Self {
3414            enabled: false,
3415            rate: default_missing_rate(),
3416            strategy: MissingValueStrategy::Mcar,
3417            field_rates: std::collections::HashMap::new(),
3418            protected_fields: vec![
3419                "document_id".to_string(),
3420                "company_code".to_string(),
3421                "posting_date".to_string(),
3422            ],
3423        }
3424    }
3425}
3426
3427/// Missing value strategy types.
3428#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
3429#[serde(rename_all = "snake_case")]
3430pub enum MissingValueStrategy {
3431    /// Missing Completely At Random - equal probability for all values
3432    #[default]
3433    Mcar,
3434    /// Missing At Random - depends on other observed values
3435    Mar,
3436    /// Missing Not At Random - depends on the value itself
3437    Mnar,
3438    /// Systematic - entire field groups missing together
3439    Systematic,
3440}
3441
3442/// Typo injection configuration.
3443#[derive(Debug, Clone, Serialize, Deserialize)]
3444pub struct TypoSchemaConfig {
3445    /// Enable typo injection
3446    #[serde(default)]
3447    pub enabled: bool,
3448    /// Character error rate (per character, not per field)
3449    #[serde(default = "default_typo_rate")]
3450    pub char_error_rate: f64,
3451    /// Typo type weights
3452    #[serde(default)]
3453    pub type_weights: TypoTypeWeights,
3454    /// Fields that should never have typos
3455    #[serde(default)]
3456    pub protected_fields: Vec<String>,
3457}
3458
3459fn default_typo_rate() -> f64 {
3460    0.001
3461}
3462
3463impl Default for TypoSchemaConfig {
3464    fn default() -> Self {
3465        Self {
3466            enabled: false,
3467            char_error_rate: default_typo_rate(),
3468            type_weights: TypoTypeWeights::default(),
3469            protected_fields: vec![
3470                "document_id".to_string(),
3471                "gl_account".to_string(),
3472                "company_code".to_string(),
3473            ],
3474        }
3475    }
3476}
3477
3478/// Weights for different typo types.
3479#[derive(Debug, Clone, Serialize, Deserialize)]
3480pub struct TypoTypeWeights {
3481    /// Keyboard-adjacent substitution (e.g., 'a' -> 's')
3482    #[serde(default = "default_substitution_weight")]
3483    pub substitution: f64,
3484    /// Adjacent character transposition (e.g., 'ab' -> 'ba')
3485    #[serde(default = "default_transposition_weight")]
3486    pub transposition: f64,
3487    /// Character insertion
3488    #[serde(default = "default_insertion_weight")]
3489    pub insertion: f64,
3490    /// Character deletion
3491    #[serde(default = "default_deletion_weight")]
3492    pub deletion: f64,
3493    /// OCR-style errors (e.g., '0' -> 'O')
3494    #[serde(default = "default_ocr_weight")]
3495    pub ocr_errors: f64,
3496    /// Homophone substitution (e.g., 'their' -> 'there')
3497    #[serde(default = "default_homophone_weight")]
3498    pub homophones: f64,
3499}
3500
3501fn default_substitution_weight() -> f64 {
3502    0.35
3503}
3504fn default_transposition_weight() -> f64 {
3505    0.25
3506}
3507fn default_insertion_weight() -> f64 {
3508    0.10
3509}
3510fn default_deletion_weight() -> f64 {
3511    0.15
3512}
3513fn default_ocr_weight() -> f64 {
3514    0.10
3515}
3516fn default_homophone_weight() -> f64 {
3517    0.05
3518}
3519
3520impl Default for TypoTypeWeights {
3521    fn default() -> Self {
3522        Self {
3523            substitution: default_substitution_weight(),
3524            transposition: default_transposition_weight(),
3525            insertion: default_insertion_weight(),
3526            deletion: default_deletion_weight(),
3527            ocr_errors: default_ocr_weight(),
3528            homophones: default_homophone_weight(),
3529        }
3530    }
3531}
3532
3533/// Format variation configuration.
3534#[derive(Debug, Clone, Serialize, Deserialize, Default)]
3535pub struct FormatVariationSchemaConfig {
3536    /// Enable format variations
3537    #[serde(default)]
3538    pub enabled: bool,
3539    /// Date format variation settings
3540    #[serde(default)]
3541    pub dates: DateFormatVariationConfig,
3542    /// Amount format variation settings
3543    #[serde(default)]
3544    pub amounts: AmountFormatVariationConfig,
3545    /// Identifier format variation settings
3546    #[serde(default)]
3547    pub identifiers: IdentifierFormatVariationConfig,
3548}
3549
3550/// Date format variation configuration.
3551#[derive(Debug, Clone, Serialize, Deserialize)]
3552pub struct DateFormatVariationConfig {
3553    /// Enable date format variations
3554    #[serde(default)]
3555    pub enabled: bool,
3556    /// Overall variation rate
3557    #[serde(default = "default_date_variation_rate")]
3558    pub rate: f64,
3559    /// Include ISO format (2024-01-15)
3560    #[serde(default = "default_true")]
3561    pub iso_format: bool,
3562    /// Include US format (01/15/2024)
3563    #[serde(default)]
3564    pub us_format: bool,
3565    /// Include EU format (15.01.2024)
3566    #[serde(default)]
3567    pub eu_format: bool,
3568    /// Include long format (January 15, 2024)
3569    #[serde(default)]
3570    pub long_format: bool,
3571}
3572
3573fn default_date_variation_rate() -> f64 {
3574    0.05
3575}
3576
3577impl Default for DateFormatVariationConfig {
3578    fn default() -> Self {
3579        Self {
3580            enabled: false,
3581            rate: default_date_variation_rate(),
3582            iso_format: true,
3583            us_format: false,
3584            eu_format: false,
3585            long_format: false,
3586        }
3587    }
3588}
3589
3590/// Amount format variation configuration.
3591#[derive(Debug, Clone, Serialize, Deserialize)]
3592pub struct AmountFormatVariationConfig {
3593    /// Enable amount format variations
3594    #[serde(default)]
3595    pub enabled: bool,
3596    /// Overall variation rate
3597    #[serde(default = "default_amount_variation_rate")]
3598    pub rate: f64,
3599    /// Include US comma format (1,234.56)
3600    #[serde(default)]
3601    pub us_comma_format: bool,
3602    /// Include EU format (1.234,56)
3603    #[serde(default)]
3604    pub eu_format: bool,
3605    /// Include currency prefix ($1,234.56)
3606    #[serde(default)]
3607    pub currency_prefix: bool,
3608    /// Include accounting format with parentheses for negatives
3609    #[serde(default)]
3610    pub accounting_format: bool,
3611}
3612
3613fn default_amount_variation_rate() -> f64 {
3614    0.02
3615}
3616
3617impl Default for AmountFormatVariationConfig {
3618    fn default() -> Self {
3619        Self {
3620            enabled: false,
3621            rate: default_amount_variation_rate(),
3622            us_comma_format: false,
3623            eu_format: false,
3624            currency_prefix: false,
3625            accounting_format: false,
3626        }
3627    }
3628}
3629
3630/// Identifier format variation configuration.
3631#[derive(Debug, Clone, Serialize, Deserialize)]
3632pub struct IdentifierFormatVariationConfig {
3633    /// Enable identifier format variations
3634    #[serde(default)]
3635    pub enabled: bool,
3636    /// Overall variation rate
3637    #[serde(default = "default_identifier_variation_rate")]
3638    pub rate: f64,
3639    /// Case variations (uppercase, lowercase, mixed)
3640    #[serde(default)]
3641    pub case_variations: bool,
3642    /// Padding variations (leading zeros)
3643    #[serde(default)]
3644    pub padding_variations: bool,
3645    /// Separator variations (dash vs underscore)
3646    #[serde(default)]
3647    pub separator_variations: bool,
3648}
3649
3650fn default_identifier_variation_rate() -> f64 {
3651    0.02
3652}
3653
3654impl Default for IdentifierFormatVariationConfig {
3655    fn default() -> Self {
3656        Self {
3657            enabled: false,
3658            rate: default_identifier_variation_rate(),
3659            case_variations: false,
3660            padding_variations: false,
3661            separator_variations: false,
3662        }
3663    }
3664}
3665
3666/// Duplicate injection configuration.
3667#[derive(Debug, Clone, Serialize, Deserialize)]
3668pub struct DuplicateSchemaConfig {
3669    /// Enable duplicate injection
3670    #[serde(default)]
3671    pub enabled: bool,
3672    /// Overall duplicate rate
3673    #[serde(default = "default_duplicate_rate")]
3674    pub rate: f64,
3675    /// Exact duplicate proportion (out of duplicates)
3676    #[serde(default = "default_exact_duplicate_ratio")]
3677    pub exact_duplicate_ratio: f64,
3678    /// Near duplicate proportion (slight variations)
3679    #[serde(default = "default_near_duplicate_ratio")]
3680    pub near_duplicate_ratio: f64,
3681    /// Fuzzy duplicate proportion (typos in key fields)
3682    #[serde(default = "default_fuzzy_duplicate_ratio")]
3683    pub fuzzy_duplicate_ratio: f64,
3684    /// Maximum date offset for near/fuzzy duplicates (days)
3685    #[serde(default = "default_max_date_offset")]
3686    pub max_date_offset_days: u32,
3687    /// Maximum amount variance for near duplicates (fraction)
3688    #[serde(default = "default_max_amount_variance")]
3689    pub max_amount_variance: f64,
3690}
3691
3692fn default_duplicate_rate() -> f64 {
3693    0.005
3694}
3695fn default_exact_duplicate_ratio() -> f64 {
3696    0.4
3697}
3698fn default_near_duplicate_ratio() -> f64 {
3699    0.35
3700}
3701fn default_fuzzy_duplicate_ratio() -> f64 {
3702    0.25
3703}
3704fn default_max_date_offset() -> u32 {
3705    3
3706}
3707fn default_max_amount_variance() -> f64 {
3708    0.01
3709}
3710
3711impl Default for DuplicateSchemaConfig {
3712    fn default() -> Self {
3713        Self {
3714            enabled: false,
3715            rate: default_duplicate_rate(),
3716            exact_duplicate_ratio: default_exact_duplicate_ratio(),
3717            near_duplicate_ratio: default_near_duplicate_ratio(),
3718            fuzzy_duplicate_ratio: default_fuzzy_duplicate_ratio(),
3719            max_date_offset_days: default_max_date_offset(),
3720            max_amount_variance: default_max_amount_variance(),
3721        }
3722    }
3723}
3724
3725/// Encoding issue configuration.
3726#[derive(Debug, Clone, Serialize, Deserialize)]
3727pub struct EncodingIssueSchemaConfig {
3728    /// Enable encoding issue injection
3729    #[serde(default)]
3730    pub enabled: bool,
3731    /// Overall encoding issue rate
3732    #[serde(default = "default_encoding_rate")]
3733    pub rate: f64,
3734    /// Include mojibake (UTF-8/Latin-1 confusion)
3735    #[serde(default)]
3736    pub mojibake: bool,
3737    /// Include HTML entity corruption
3738    #[serde(default)]
3739    pub html_entities: bool,
3740    /// Include BOM issues
3741    #[serde(default)]
3742    pub bom_issues: bool,
3743}
3744
3745fn default_encoding_rate() -> f64 {
3746    0.001
3747}
3748
3749impl Default for EncodingIssueSchemaConfig {
3750    fn default() -> Self {
3751        Self {
3752            enabled: false,
3753            rate: default_encoding_rate(),
3754            mojibake: false,
3755            html_entities: false,
3756            bom_issues: false,
3757        }
3758    }
3759}
3760
3761/// Per-sink quality profiles for different output formats.
3762#[derive(Debug, Clone, Serialize, Deserialize, Default)]
3763pub struct SinkQualityProfiles {
3764    /// CSV-specific quality settings
3765    #[serde(default)]
3766    pub csv: Option<SinkQualityOverride>,
3767    /// JSON-specific quality settings
3768    #[serde(default)]
3769    pub json: Option<SinkQualityOverride>,
3770    /// Parquet-specific quality settings
3771    #[serde(default)]
3772    pub parquet: Option<SinkQualityOverride>,
3773}
3774
3775/// Quality setting overrides for a specific sink type.
3776#[derive(Debug, Clone, Serialize, Deserialize)]
3777pub struct SinkQualityOverride {
3778    /// Override enabled state
3779    pub enabled: Option<bool>,
3780    /// Override missing value rate
3781    pub missing_rate: Option<f64>,
3782    /// Override typo rate
3783    pub typo_rate: Option<f64>,
3784    /// Override format variation rate
3785    pub format_variation_rate: Option<f64>,
3786    /// Override duplicate rate
3787    pub duplicate_rate: Option<f64>,
3788}
3789
3790#[cfg(test)]
3791mod tests {
3792    use super::*;
3793    use crate::presets::demo_preset;
3794
3795    // ==========================================================================
3796    // Serialization/Deserialization Tests
3797    // ==========================================================================
3798
3799    #[test]
3800    fn test_config_yaml_roundtrip() {
3801        let config = demo_preset();
3802        let yaml = serde_yaml::to_string(&config).expect("Failed to serialize to YAML");
3803        let deserialized: GeneratorConfig =
3804            serde_yaml::from_str(&yaml).expect("Failed to deserialize from YAML");
3805
3806        assert_eq!(
3807            config.global.period_months,
3808            deserialized.global.period_months
3809        );
3810        assert_eq!(config.global.industry, deserialized.global.industry);
3811        assert_eq!(config.companies.len(), deserialized.companies.len());
3812        assert_eq!(config.companies[0].code, deserialized.companies[0].code);
3813    }
3814
3815    #[test]
3816    fn test_config_json_roundtrip() {
3817        // Create a config without infinity values (JSON can't serialize f64::INFINITY)
3818        let mut config = demo_preset();
3819        // Replace infinity with a large but finite value for JSON compatibility
3820        config.master_data.employees.approval_limits.executive = 1e12;
3821
3822        let json = serde_json::to_string(&config).expect("Failed to serialize to JSON");
3823        let deserialized: GeneratorConfig =
3824            serde_json::from_str(&json).expect("Failed to deserialize from JSON");
3825
3826        assert_eq!(
3827            config.global.period_months,
3828            deserialized.global.period_months
3829        );
3830        assert_eq!(config.global.industry, deserialized.global.industry);
3831        assert_eq!(config.companies.len(), deserialized.companies.len());
3832    }
3833
3834    #[test]
3835    fn test_transaction_volume_serialization() {
3836        // Test various transaction volumes serialize correctly
3837        let volumes = vec![
3838            (TransactionVolume::TenK, "ten_k"),
3839            (TransactionVolume::HundredK, "hundred_k"),
3840            (TransactionVolume::OneM, "one_m"),
3841            (TransactionVolume::TenM, "ten_m"),
3842            (TransactionVolume::HundredM, "hundred_m"),
3843        ];
3844
3845        for (volume, expected_key) in volumes {
3846            let json = serde_json::to_string(&volume).expect("Failed to serialize");
3847            assert!(
3848                json.contains(expected_key),
3849                "Expected {} in JSON: {}",
3850                expected_key,
3851                json
3852            );
3853        }
3854    }
3855
3856    #[test]
3857    fn test_transaction_volume_custom_serialization() {
3858        let volume = TransactionVolume::Custom(12345);
3859        let json = serde_json::to_string(&volume).expect("Failed to serialize");
3860        let deserialized: TransactionVolume =
3861            serde_json::from_str(&json).expect("Failed to deserialize");
3862        assert_eq!(deserialized.count(), 12345);
3863    }
3864
3865    #[test]
3866    fn test_output_mode_serialization() {
3867        let modes = vec![
3868            OutputMode::Streaming,
3869            OutputMode::FlatFile,
3870            OutputMode::Both,
3871        ];
3872
3873        for mode in modes {
3874            let json = serde_json::to_string(&mode).expect("Failed to serialize");
3875            let deserialized: OutputMode =
3876                serde_json::from_str(&json).expect("Failed to deserialize");
3877            assert!(format!("{:?}", mode) == format!("{:?}", deserialized));
3878        }
3879    }
3880
3881    #[test]
3882    fn test_file_format_serialization() {
3883        let formats = vec![
3884            FileFormat::Csv,
3885            FileFormat::Parquet,
3886            FileFormat::Json,
3887            FileFormat::JsonLines,
3888        ];
3889
3890        for format in formats {
3891            let json = serde_json::to_string(&format).expect("Failed to serialize");
3892            let deserialized: FileFormat =
3893                serde_json::from_str(&json).expect("Failed to deserialize");
3894            assert!(format!("{:?}", format) == format!("{:?}", deserialized));
3895        }
3896    }
3897
3898    #[test]
3899    fn test_compression_algorithm_serialization() {
3900        let algos = vec![
3901            CompressionAlgorithm::Gzip,
3902            CompressionAlgorithm::Zstd,
3903            CompressionAlgorithm::Lz4,
3904            CompressionAlgorithm::Snappy,
3905        ];
3906
3907        for algo in algos {
3908            let json = serde_json::to_string(&algo).expect("Failed to serialize");
3909            let deserialized: CompressionAlgorithm =
3910                serde_json::from_str(&json).expect("Failed to deserialize");
3911            assert!(format!("{:?}", algo) == format!("{:?}", deserialized));
3912        }
3913    }
3914
3915    #[test]
3916    fn test_transfer_pricing_method_serialization() {
3917        let methods = vec![
3918            TransferPricingMethod::CostPlus,
3919            TransferPricingMethod::ComparableUncontrolled,
3920            TransferPricingMethod::ResalePrice,
3921            TransferPricingMethod::TransactionalNetMargin,
3922            TransferPricingMethod::ProfitSplit,
3923        ];
3924
3925        for method in methods {
3926            let json = serde_json::to_string(&method).expect("Failed to serialize");
3927            let deserialized: TransferPricingMethod =
3928                serde_json::from_str(&json).expect("Failed to deserialize");
3929            assert!(format!("{:?}", method) == format!("{:?}", deserialized));
3930        }
3931    }
3932
3933    #[test]
3934    fn test_benford_exemption_serialization() {
3935        let exemptions = vec![
3936            BenfordExemption::Recurring,
3937            BenfordExemption::Payroll,
3938            BenfordExemption::FixedFees,
3939            BenfordExemption::RoundAmounts,
3940        ];
3941
3942        for exemption in exemptions {
3943            let json = serde_json::to_string(&exemption).expect("Failed to serialize");
3944            let deserialized: BenfordExemption =
3945                serde_json::from_str(&json).expect("Failed to deserialize");
3946            assert!(format!("{:?}", exemption) == format!("{:?}", deserialized));
3947        }
3948    }
3949
3950    // ==========================================================================
3951    // Default Value Tests
3952    // ==========================================================================
3953
3954    #[test]
3955    fn test_global_config_defaults() {
3956        let yaml = r#"
3957            industry: manufacturing
3958            start_date: "2024-01-01"
3959            period_months: 6
3960        "#;
3961        let config: GlobalConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
3962        assert_eq!(config.group_currency, "USD");
3963        assert!(config.parallel);
3964        assert_eq!(config.worker_threads, 0);
3965        assert_eq!(config.memory_limit_mb, 0);
3966    }
3967
3968    #[test]
3969    fn test_fraud_config_defaults() {
3970        let config = FraudConfig::default();
3971        assert!(!config.enabled);
3972        assert_eq!(config.fraud_rate, 0.005);
3973        assert!(!config.clustering_enabled);
3974    }
3975
3976    #[test]
3977    fn test_internal_controls_config_defaults() {
3978        let config = InternalControlsConfig::default();
3979        assert!(!config.enabled);
3980        assert_eq!(config.exception_rate, 0.02);
3981        assert_eq!(config.sod_violation_rate, 0.01);
3982        assert!(config.export_control_master_data);
3983        assert_eq!(config.sox_materiality_threshold, 10000.0);
3984    }
3985
3986    #[test]
3987    fn test_output_config_defaults() {
3988        let config = OutputConfig::default();
3989        assert!(matches!(config.mode, OutputMode::FlatFile));
3990        assert_eq!(config.formats, vec![FileFormat::Parquet]);
3991        assert!(config.compression.enabled);
3992        assert!(matches!(
3993            config.compression.algorithm,
3994            CompressionAlgorithm::Zstd
3995        ));
3996        assert!(config.include_acdoca);
3997        assert!(!config.include_bseg);
3998        assert!(config.partition_by_period);
3999        assert!(!config.partition_by_company);
4000    }
4001
4002    #[test]
4003    fn test_approval_config_defaults() {
4004        let config = ApprovalConfig::default();
4005        assert!(!config.enabled);
4006        assert_eq!(config.auto_approve_threshold, 1000.0);
4007        assert_eq!(config.rejection_rate, 0.02);
4008        assert_eq!(config.revision_rate, 0.05);
4009        assert_eq!(config.average_approval_delay_hours, 4.0);
4010        assert_eq!(config.thresholds.len(), 4);
4011    }
4012
4013    #[test]
4014    fn test_p2p_flow_config_defaults() {
4015        let config = P2PFlowConfig::default();
4016        assert!(config.enabled);
4017        assert_eq!(config.three_way_match_rate, 0.95);
4018        assert_eq!(config.partial_delivery_rate, 0.15);
4019        assert_eq!(config.average_po_to_gr_days, 14);
4020    }
4021
4022    #[test]
4023    fn test_o2c_flow_config_defaults() {
4024        let config = O2CFlowConfig::default();
4025        assert!(config.enabled);
4026        assert_eq!(config.credit_check_failure_rate, 0.02);
4027        assert_eq!(config.return_rate, 0.03);
4028        assert_eq!(config.bad_debt_rate, 0.01);
4029    }
4030
4031    #[test]
4032    fn test_balance_config_defaults() {
4033        let config = BalanceConfig::default();
4034        assert!(!config.generate_opening_balances);
4035        assert!(config.generate_trial_balances);
4036        assert_eq!(config.target_gross_margin, 0.35);
4037        assert!(config.validate_balance_equation);
4038        assert!(config.reconcile_subledgers);
4039    }
4040
4041    // ==========================================================================
4042    // Partial Config Deserialization Tests
4043    // ==========================================================================
4044
4045    #[test]
4046    fn test_partial_config_with_defaults() {
4047        // Minimal config that should use all defaults
4048        let yaml = r#"
4049            global:
4050              industry: manufacturing
4051              start_date: "2024-01-01"
4052              period_months: 3
4053            companies:
4054              - code: "TEST"
4055                name: "Test Company"
4056                currency: "USD"
4057                country: "US"
4058                annual_transaction_volume: ten_k
4059            chart_of_accounts:
4060              complexity: small
4061            output:
4062              output_directory: "./output"
4063        "#;
4064
4065        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4066        assert_eq!(config.global.period_months, 3);
4067        assert_eq!(config.companies.len(), 1);
4068        assert!(!config.fraud.enabled); // Default
4069        assert!(!config.internal_controls.enabled); // Default
4070    }
4071
4072    #[test]
4073    fn test_config_with_fraud_enabled() {
4074        let yaml = r#"
4075            global:
4076              industry: retail
4077              start_date: "2024-01-01"
4078              period_months: 12
4079            companies:
4080              - code: "RETAIL"
4081                name: "Retail Co"
4082                currency: "USD"
4083                country: "US"
4084                annual_transaction_volume: hundred_k
4085            chart_of_accounts:
4086              complexity: medium
4087            output:
4088              output_directory: "./output"
4089            fraud:
4090              enabled: true
4091              fraud_rate: 0.05
4092              clustering_enabled: true
4093        "#;
4094
4095        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4096        assert!(config.fraud.enabled);
4097        assert_eq!(config.fraud.fraud_rate, 0.05);
4098        assert!(config.fraud.clustering_enabled);
4099    }
4100
4101    #[test]
4102    fn test_config_with_multiple_companies() {
4103        let yaml = r#"
4104            global:
4105              industry: manufacturing
4106              start_date: "2024-01-01"
4107              period_months: 6
4108            companies:
4109              - code: "HQ"
4110                name: "Headquarters"
4111                currency: "USD"
4112                country: "US"
4113                annual_transaction_volume: hundred_k
4114                volume_weight: 1.0
4115              - code: "EU"
4116                name: "European Subsidiary"
4117                currency: "EUR"
4118                country: "DE"
4119                annual_transaction_volume: hundred_k
4120                volume_weight: 0.5
4121              - code: "APAC"
4122                name: "Asia Pacific"
4123                currency: "JPY"
4124                country: "JP"
4125                annual_transaction_volume: ten_k
4126                volume_weight: 0.3
4127            chart_of_accounts:
4128              complexity: large
4129            output:
4130              output_directory: "./output"
4131        "#;
4132
4133        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4134        assert_eq!(config.companies.len(), 3);
4135        assert_eq!(config.companies[0].code, "HQ");
4136        assert_eq!(config.companies[1].currency, "EUR");
4137        assert_eq!(config.companies[2].volume_weight, 0.3);
4138    }
4139
4140    #[test]
4141    fn test_intercompany_config() {
4142        let yaml = r#"
4143            enabled: true
4144            ic_transaction_rate: 0.20
4145            transfer_pricing_method: cost_plus
4146            markup_percent: 0.08
4147            generate_matched_pairs: true
4148            generate_eliminations: true
4149        "#;
4150
4151        let config: IntercompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4152        assert!(config.enabled);
4153        assert_eq!(config.ic_transaction_rate, 0.20);
4154        assert!(matches!(
4155            config.transfer_pricing_method,
4156            TransferPricingMethod::CostPlus
4157        ));
4158        assert_eq!(config.markup_percent, 0.08);
4159        assert!(config.generate_eliminations);
4160    }
4161
4162    // ==========================================================================
4163    // Company Config Tests
4164    // ==========================================================================
4165
4166    #[test]
4167    fn test_company_config_defaults() {
4168        let yaml = r#"
4169            code: "TEST"
4170            name: "Test Company"
4171            currency: "USD"
4172            country: "US"
4173            annual_transaction_volume: ten_k
4174        "#;
4175
4176        let config: CompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4177        assert_eq!(config.fiscal_year_variant, "K4"); // Default
4178        assert_eq!(config.volume_weight, 1.0); // Default
4179    }
4180
4181    // ==========================================================================
4182    // Chart of Accounts Config Tests
4183    // ==========================================================================
4184
4185    #[test]
4186    fn test_coa_config_defaults() {
4187        let yaml = r#"
4188            complexity: medium
4189        "#;
4190
4191        let config: ChartOfAccountsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4192        assert!(config.industry_specific); // Default true
4193        assert!(config.custom_accounts.is_none());
4194        assert_eq!(config.min_hierarchy_depth, 2); // Default
4195        assert_eq!(config.max_hierarchy_depth, 5); // Default
4196    }
4197}