datasynth_config/
schema.rs

1//! Configuration schema for synthetic data generation.
2
3use datasynth_core::distributions::{
4    AmountDistributionConfig, DebitCreditDistributionConfig, EvenOddDistributionConfig,
5    LineItemDistributionConfig, SeasonalityConfig,
6};
7use datasynth_core::models::{CoAComplexity, IndustrySector};
8use serde::{Deserialize, Serialize};
9use std::path::PathBuf;
10
11/// Root configuration for the synthetic data generator.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct GeneratorConfig {
14    /// Global settings
15    pub global: GlobalConfig,
16    /// Company configuration
17    pub companies: Vec<CompanyConfig>,
18    /// Chart of Accounts configuration
19    pub chart_of_accounts: ChartOfAccountsConfig,
20    /// Transaction generation settings
21    #[serde(default)]
22    pub transactions: TransactionConfig,
23    /// Output configuration
24    pub output: OutputConfig,
25    /// Fraud simulation settings
26    #[serde(default)]
27    pub fraud: FraudConfig,
28    /// Data quality variation settings
29    #[serde(default)]
30    pub data_quality: DataQualitySchemaConfig,
31    /// Internal Controls System settings
32    #[serde(default)]
33    pub internal_controls: InternalControlsConfig,
34    /// Business process mix
35    #[serde(default)]
36    pub business_processes: BusinessProcessConfig,
37    /// User persona distribution
38    #[serde(default)]
39    pub user_personas: UserPersonaConfig,
40    /// Template configuration for realistic data
41    #[serde(default)]
42    pub templates: TemplateConfig,
43    /// Approval workflow configuration
44    #[serde(default)]
45    pub approval: ApprovalConfig,
46    /// Department structure configuration
47    #[serde(default)]
48    pub departments: DepartmentConfig,
49    /// Master data generation settings
50    #[serde(default)]
51    pub master_data: MasterDataConfig,
52    /// Document flow generation settings
53    #[serde(default)]
54    pub document_flows: DocumentFlowConfig,
55    /// Intercompany transaction settings
56    #[serde(default)]
57    pub intercompany: IntercompanyConfig,
58    /// Balance and trial balance settings
59    #[serde(default)]
60    pub balance: BalanceConfig,
61    /// OCPM (Object-Centric Process Mining) settings
62    #[serde(default)]
63    pub ocpm: OcpmConfig,
64    /// Audit engagement and workpaper generation settings
65    #[serde(default)]
66    pub audit: AuditGenerationConfig,
67    /// Banking KYC/AML transaction generation settings
68    #[serde(default)]
69    pub banking: datasynth_banking::BankingConfig,
70}
71
72/// Global configuration settings.
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct GlobalConfig {
75    /// Random seed for reproducibility
76    pub seed: Option<u64>,
77    /// Industry sector
78    pub industry: IndustrySector,
79    /// Simulation start date (YYYY-MM-DD)
80    pub start_date: String,
81    /// Simulation period in months
82    pub period_months: u32,
83    /// Base currency for group reporting
84    #[serde(default = "default_currency")]
85    pub group_currency: String,
86    /// Enable parallel generation
87    #[serde(default = "default_true")]
88    pub parallel: bool,
89    /// Number of worker threads (0 = auto-detect)
90    #[serde(default)]
91    pub worker_threads: usize,
92    /// Memory limit in MB (0 = unlimited)
93    #[serde(default)]
94    pub memory_limit_mb: usize,
95}
96
97fn default_currency() -> String {
98    "USD".to_string()
99}
100fn default_true() -> bool {
101    true
102}
103
104/// Company code configuration.
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct CompanyConfig {
107    /// Company code identifier
108    pub code: String,
109    /// Company name
110    pub name: String,
111    /// Local currency (ISO 4217)
112    pub currency: String,
113    /// Country code (ISO 3166-1 alpha-2)
114    pub country: String,
115    /// Fiscal year variant
116    #[serde(default = "default_fiscal_variant")]
117    pub fiscal_year_variant: String,
118    /// Transaction volume per year
119    pub annual_transaction_volume: TransactionVolume,
120    /// Company-specific transaction weight
121    #[serde(default = "default_weight")]
122    pub volume_weight: f64,
123}
124
125fn default_fiscal_variant() -> String {
126    "K4".to_string()
127}
128fn default_weight() -> f64 {
129    1.0
130}
131
132/// Transaction volume presets.
133#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
134#[serde(rename_all = "snake_case")]
135pub enum TransactionVolume {
136    /// 10,000 transactions per year
137    TenK,
138    /// 100,000 transactions per year
139    HundredK,
140    /// 1,000,000 transactions per year
141    OneM,
142    /// 10,000,000 transactions per year
143    TenM,
144    /// 100,000,000 transactions per year
145    HundredM,
146    /// Custom count
147    Custom(u64),
148}
149
150impl TransactionVolume {
151    /// Get the transaction count.
152    pub fn count(&self) -> u64 {
153        match self {
154            Self::TenK => 10_000,
155            Self::HundredK => 100_000,
156            Self::OneM => 1_000_000,
157            Self::TenM => 10_000_000,
158            Self::HundredM => 100_000_000,
159            Self::Custom(n) => *n,
160        }
161    }
162}
163
164/// Chart of Accounts configuration.
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct ChartOfAccountsConfig {
167    /// CoA complexity level
168    pub complexity: CoAComplexity,
169    /// Use industry-specific accounts
170    #[serde(default = "default_true")]
171    pub industry_specific: bool,
172    /// Custom account definitions file
173    pub custom_accounts: Option<PathBuf>,
174    /// Minimum hierarchy depth
175    #[serde(default = "default_min_depth")]
176    pub min_hierarchy_depth: u8,
177    /// Maximum hierarchy depth
178    #[serde(default = "default_max_depth")]
179    pub max_hierarchy_depth: u8,
180}
181
182fn default_min_depth() -> u8 {
183    2
184}
185fn default_max_depth() -> u8 {
186    5
187}
188
189/// Transaction generation configuration.
190#[derive(Debug, Clone, Serialize, Deserialize, Default)]
191pub struct TransactionConfig {
192    /// Line item distribution
193    #[serde(default)]
194    pub line_item_distribution: LineItemDistributionConfig,
195    /// Debit/credit balance distribution
196    #[serde(default)]
197    pub debit_credit_distribution: DebitCreditDistributionConfig,
198    /// Even/odd line count distribution
199    #[serde(default)]
200    pub even_odd_distribution: EvenOddDistributionConfig,
201    /// Transaction source distribution
202    #[serde(default)]
203    pub source_distribution: SourceDistribution,
204    /// Seasonality configuration
205    #[serde(default)]
206    pub seasonality: SeasonalityConfig,
207    /// Amount distribution
208    #[serde(default)]
209    pub amounts: AmountDistributionConfig,
210    /// Benford's Law compliance configuration
211    #[serde(default)]
212    pub benford: BenfordConfig,
213}
214
215/// Benford's Law compliance configuration.
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct BenfordConfig {
218    /// Enable Benford's Law compliance for amount generation
219    #[serde(default = "default_true")]
220    pub enabled: bool,
221    /// Tolerance for deviation from ideal Benford distribution (0.0-1.0)
222    #[serde(default = "default_benford_tolerance")]
223    pub tolerance: f64,
224    /// Transaction sources exempt from Benford's Law (fixed amounts)
225    #[serde(default)]
226    pub exempt_sources: Vec<BenfordExemption>,
227}
228
229fn default_benford_tolerance() -> f64 {
230    0.05
231}
232
233impl Default for BenfordConfig {
234    fn default() -> Self {
235        Self {
236            enabled: true,
237            tolerance: default_benford_tolerance(),
238            exempt_sources: vec![BenfordExemption::Recurring, BenfordExemption::Payroll],
239        }
240    }
241}
242
243/// Types of transactions exempt from Benford's Law.
244#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
245#[serde(rename_all = "snake_case")]
246pub enum BenfordExemption {
247    /// Recurring fixed amounts (rent, subscriptions)
248    Recurring,
249    /// Payroll (standardized salaries)
250    Payroll,
251    /// Fixed fees and charges
252    FixedFees,
253    /// Round number purchases (often legitimate)
254    RoundAmounts,
255}
256
257/// Distribution of transaction sources.
258#[derive(Debug, Clone, Serialize, Deserialize)]
259pub struct SourceDistribution {
260    /// Manual entries percentage
261    pub manual: f64,
262    /// Automated system entries
263    pub automated: f64,
264    /// Recurring entries
265    pub recurring: f64,
266    /// Adjustment entries
267    pub adjustment: f64,
268}
269
270impl Default for SourceDistribution {
271    fn default() -> Self {
272        Self {
273            manual: 0.20,
274            automated: 0.70,
275            recurring: 0.07,
276            adjustment: 0.03,
277        }
278    }
279}
280
281/// Output configuration.
282#[derive(Debug, Clone, Serialize, Deserialize)]
283pub struct OutputConfig {
284    /// Output mode
285    #[serde(default)]
286    pub mode: OutputMode,
287    /// Output directory
288    pub output_directory: PathBuf,
289    /// File formats to generate
290    #[serde(default = "default_formats")]
291    pub formats: Vec<FileFormat>,
292    /// Compression settings
293    #[serde(default)]
294    pub compression: CompressionConfig,
295    /// Batch size for writes
296    #[serde(default = "default_batch_size")]
297    pub batch_size: usize,
298    /// Include ACDOCA format
299    #[serde(default = "default_true")]
300    pub include_acdoca: bool,
301    /// Include BSEG format
302    #[serde(default)]
303    pub include_bseg: bool,
304    /// Partition by fiscal period
305    #[serde(default = "default_true")]
306    pub partition_by_period: bool,
307    /// Partition by company code
308    #[serde(default)]
309    pub partition_by_company: bool,
310}
311
312fn default_formats() -> Vec<FileFormat> {
313    vec![FileFormat::Parquet]
314}
315fn default_batch_size() -> usize {
316    100_000
317}
318
319impl Default for OutputConfig {
320    fn default() -> Self {
321        Self {
322            mode: OutputMode::FlatFile,
323            output_directory: PathBuf::from("./output"),
324            formats: default_formats(),
325            compression: CompressionConfig::default(),
326            batch_size: default_batch_size(),
327            include_acdoca: true,
328            include_bseg: false,
329            partition_by_period: true,
330            partition_by_company: false,
331        }
332    }
333}
334
335/// Output mode.
336#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
337#[serde(rename_all = "snake_case")]
338pub enum OutputMode {
339    /// Stream records as generated
340    Streaming,
341    /// Write to flat files
342    #[default]
343    FlatFile,
344    /// Both streaming and flat file
345    Both,
346}
347
348/// Supported file formats.
349#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
350#[serde(rename_all = "snake_case")]
351pub enum FileFormat {
352    Csv,
353    Parquet,
354    Json,
355    JsonLines,
356}
357
358/// Compression configuration.
359#[derive(Debug, Clone, Serialize, Deserialize)]
360pub struct CompressionConfig {
361    /// Enable compression
362    #[serde(default = "default_true")]
363    pub enabled: bool,
364    /// Compression algorithm
365    #[serde(default)]
366    pub algorithm: CompressionAlgorithm,
367    /// Compression level (1-9)
368    #[serde(default = "default_compression_level")]
369    pub level: u8,
370}
371
372fn default_compression_level() -> u8 {
373    3
374}
375
376impl Default for CompressionConfig {
377    fn default() -> Self {
378        Self {
379            enabled: true,
380            algorithm: CompressionAlgorithm::default(),
381            level: default_compression_level(),
382        }
383    }
384}
385
386/// Compression algorithms.
387#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
388#[serde(rename_all = "snake_case")]
389pub enum CompressionAlgorithm {
390    Gzip,
391    #[default]
392    Zstd,
393    Lz4,
394    Snappy,
395}
396
397/// Fraud simulation configuration.
398#[derive(Debug, Clone, Serialize, Deserialize)]
399pub struct FraudConfig {
400    /// Enable fraud scenario generation
401    #[serde(default)]
402    pub enabled: bool,
403    /// Overall fraud rate (0.0 to 1.0)
404    #[serde(default = "default_fraud_rate")]
405    pub fraud_rate: f64,
406    /// Fraud type distribution
407    #[serde(default)]
408    pub fraud_type_distribution: FraudTypeDistribution,
409    /// Enable fraud clustering
410    #[serde(default)]
411    pub clustering_enabled: bool,
412    /// Clustering factor
413    #[serde(default = "default_clustering_factor")]
414    pub clustering_factor: f64,
415    /// Approval thresholds for threshold-adjacent fraud pattern
416    #[serde(default = "default_approval_thresholds")]
417    pub approval_thresholds: Vec<f64>,
418}
419
420fn default_approval_thresholds() -> Vec<f64> {
421    vec![1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0]
422}
423
424fn default_fraud_rate() -> f64 {
425    0.005
426}
427fn default_clustering_factor() -> f64 {
428    3.0
429}
430
431impl Default for FraudConfig {
432    fn default() -> Self {
433        Self {
434            enabled: false,
435            fraud_rate: default_fraud_rate(),
436            fraud_type_distribution: FraudTypeDistribution::default(),
437            clustering_enabled: false,
438            clustering_factor: default_clustering_factor(),
439            approval_thresholds: default_approval_thresholds(),
440        }
441    }
442}
443
444/// Distribution of fraud types.
445#[derive(Debug, Clone, Serialize, Deserialize)]
446pub struct FraudTypeDistribution {
447    pub suspense_account_abuse: f64,
448    pub fictitious_transaction: f64,
449    pub revenue_manipulation: f64,
450    pub expense_capitalization: f64,
451    pub split_transaction: f64,
452    pub timing_anomaly: f64,
453    pub unauthorized_access: f64,
454    pub duplicate_payment: f64,
455}
456
457impl Default for FraudTypeDistribution {
458    fn default() -> Self {
459        Self {
460            suspense_account_abuse: 0.25,
461            fictitious_transaction: 0.15,
462            revenue_manipulation: 0.10,
463            expense_capitalization: 0.10,
464            split_transaction: 0.15,
465            timing_anomaly: 0.10,
466            unauthorized_access: 0.10,
467            duplicate_payment: 0.05,
468        }
469    }
470}
471
472/// Internal Controls System (ICS) configuration.
473#[derive(Debug, Clone, Serialize, Deserialize)]
474pub struct InternalControlsConfig {
475    /// Enable internal controls system
476    #[serde(default)]
477    pub enabled: bool,
478    /// Rate at which controls result in exceptions (0.0 - 1.0)
479    #[serde(default = "default_exception_rate")]
480    pub exception_rate: f64,
481    /// Rate at which SoD violations occur (0.0 - 1.0)
482    #[serde(default = "default_sod_violation_rate")]
483    pub sod_violation_rate: f64,
484    /// Export control master data to separate files
485    #[serde(default = "default_true")]
486    pub export_control_master_data: bool,
487    /// SOX materiality threshold for marking transactions as SOX-relevant
488    #[serde(default = "default_sox_materiality_threshold")]
489    pub sox_materiality_threshold: f64,
490}
491
492fn default_exception_rate() -> f64 {
493    0.02
494}
495
496fn default_sod_violation_rate() -> f64 {
497    0.01
498}
499
500fn default_sox_materiality_threshold() -> f64 {
501    10000.0
502}
503
504impl Default for InternalControlsConfig {
505    fn default() -> Self {
506        Self {
507            enabled: false,
508            exception_rate: default_exception_rate(),
509            sod_violation_rate: default_sod_violation_rate(),
510            export_control_master_data: true,
511            sox_materiality_threshold: default_sox_materiality_threshold(),
512        }
513    }
514}
515
516/// Business process configuration.
517#[derive(Debug, Clone, Serialize, Deserialize)]
518pub struct BusinessProcessConfig {
519    /// Order-to-Cash weight
520    #[serde(default = "default_o2c")]
521    pub o2c_weight: f64,
522    /// Procure-to-Pay weight
523    #[serde(default = "default_p2p")]
524    pub p2p_weight: f64,
525    /// Record-to-Report weight
526    #[serde(default = "default_r2r")]
527    pub r2r_weight: f64,
528    /// Hire-to-Retire weight
529    #[serde(default = "default_h2r")]
530    pub h2r_weight: f64,
531    /// Acquire-to-Retire weight
532    #[serde(default = "default_a2r")]
533    pub a2r_weight: f64,
534}
535
536fn default_o2c() -> f64 {
537    0.35
538}
539fn default_p2p() -> f64 {
540    0.30
541}
542fn default_r2r() -> f64 {
543    0.20
544}
545fn default_h2r() -> f64 {
546    0.10
547}
548fn default_a2r() -> f64 {
549    0.05
550}
551
552impl Default for BusinessProcessConfig {
553    fn default() -> Self {
554        Self {
555            o2c_weight: default_o2c(),
556            p2p_weight: default_p2p(),
557            r2r_weight: default_r2r(),
558            h2r_weight: default_h2r(),
559            a2r_weight: default_a2r(),
560        }
561    }
562}
563
564/// User persona configuration.
565#[derive(Debug, Clone, Serialize, Deserialize, Default)]
566pub struct UserPersonaConfig {
567    /// Distribution of user personas
568    #[serde(default)]
569    pub persona_distribution: PersonaDistribution,
570    /// Users per persona type
571    #[serde(default)]
572    pub users_per_persona: UsersPerPersona,
573}
574
575/// Distribution of user personas for transaction generation.
576#[derive(Debug, Clone, Serialize, Deserialize)]
577pub struct PersonaDistribution {
578    pub junior_accountant: f64,
579    pub senior_accountant: f64,
580    pub controller: f64,
581    pub manager: f64,
582    pub automated_system: f64,
583}
584
585impl Default for PersonaDistribution {
586    fn default() -> Self {
587        Self {
588            junior_accountant: 0.15,
589            senior_accountant: 0.15,
590            controller: 0.05,
591            manager: 0.05,
592            automated_system: 0.60,
593        }
594    }
595}
596
597/// Number of users per persona type.
598#[derive(Debug, Clone, Serialize, Deserialize)]
599pub struct UsersPerPersona {
600    pub junior_accountant: usize,
601    pub senior_accountant: usize,
602    pub controller: usize,
603    pub manager: usize,
604    pub automated_system: usize,
605}
606
607impl Default for UsersPerPersona {
608    fn default() -> Self {
609        Self {
610            junior_accountant: 10,
611            senior_accountant: 5,
612            controller: 2,
613            manager: 3,
614            automated_system: 20,
615        }
616    }
617}
618
619/// Template configuration for realistic data generation.
620#[derive(Debug, Clone, Serialize, Deserialize, Default)]
621pub struct TemplateConfig {
622    /// Name generation settings
623    #[serde(default)]
624    pub names: NameTemplateConfig,
625    /// Description generation settings
626    #[serde(default)]
627    pub descriptions: DescriptionTemplateConfig,
628    /// Reference number settings
629    #[serde(default)]
630    pub references: ReferenceTemplateConfig,
631}
632
633/// Name template configuration.
634#[derive(Debug, Clone, Serialize, Deserialize)]
635pub struct NameTemplateConfig {
636    /// Distribution of name cultures
637    #[serde(default)]
638    pub culture_distribution: CultureDistribution,
639    /// Email domain for generated users
640    #[serde(default = "default_email_domain")]
641    pub email_domain: String,
642    /// Generate realistic display names
643    #[serde(default = "default_true")]
644    pub generate_realistic_names: bool,
645}
646
647fn default_email_domain() -> String {
648    "company.com".to_string()
649}
650
651impl Default for NameTemplateConfig {
652    fn default() -> Self {
653        Self {
654            culture_distribution: CultureDistribution::default(),
655            email_domain: default_email_domain(),
656            generate_realistic_names: true,
657        }
658    }
659}
660
661/// Distribution of name cultures for generation.
662#[derive(Debug, Clone, Serialize, Deserialize)]
663pub struct CultureDistribution {
664    pub western_us: f64,
665    pub hispanic: f64,
666    pub german: f64,
667    pub french: f64,
668    pub chinese: f64,
669    pub japanese: f64,
670    pub indian: f64,
671}
672
673impl Default for CultureDistribution {
674    fn default() -> Self {
675        Self {
676            western_us: 0.40,
677            hispanic: 0.20,
678            german: 0.10,
679            french: 0.05,
680            chinese: 0.10,
681            japanese: 0.05,
682            indian: 0.10,
683        }
684    }
685}
686
687/// Description template configuration.
688#[derive(Debug, Clone, Serialize, Deserialize)]
689pub struct DescriptionTemplateConfig {
690    /// Generate header text for journal entries
691    #[serde(default = "default_true")]
692    pub generate_header_text: bool,
693    /// Generate line text for journal entry lines
694    #[serde(default = "default_true")]
695    pub generate_line_text: bool,
696}
697
698impl Default for DescriptionTemplateConfig {
699    fn default() -> Self {
700        Self {
701            generate_header_text: true,
702            generate_line_text: true,
703        }
704    }
705}
706
707/// Reference number template configuration.
708#[derive(Debug, Clone, Serialize, Deserialize)]
709pub struct ReferenceTemplateConfig {
710    /// Generate reference numbers
711    #[serde(default = "default_true")]
712    pub generate_references: bool,
713    /// Invoice prefix
714    #[serde(default = "default_invoice_prefix")]
715    pub invoice_prefix: String,
716    /// Purchase order prefix
717    #[serde(default = "default_po_prefix")]
718    pub po_prefix: String,
719    /// Sales order prefix
720    #[serde(default = "default_so_prefix")]
721    pub so_prefix: String,
722}
723
724fn default_invoice_prefix() -> String {
725    "INV".to_string()
726}
727fn default_po_prefix() -> String {
728    "PO".to_string()
729}
730fn default_so_prefix() -> String {
731    "SO".to_string()
732}
733
734impl Default for ReferenceTemplateConfig {
735    fn default() -> Self {
736        Self {
737            generate_references: true,
738            invoice_prefix: default_invoice_prefix(),
739            po_prefix: default_po_prefix(),
740            so_prefix: default_so_prefix(),
741        }
742    }
743}
744
745/// Approval workflow configuration.
746#[derive(Debug, Clone, Serialize, Deserialize)]
747pub struct ApprovalConfig {
748    /// Enable approval workflow generation
749    #[serde(default)]
750    pub enabled: bool,
751    /// Threshold below which transactions are auto-approved
752    #[serde(default = "default_auto_approve_threshold")]
753    pub auto_approve_threshold: f64,
754    /// Rate at which approvals are rejected (0.0 to 1.0)
755    #[serde(default = "default_rejection_rate")]
756    pub rejection_rate: f64,
757    /// Rate at which approvals require revision (0.0 to 1.0)
758    #[serde(default = "default_revision_rate")]
759    pub revision_rate: f64,
760    /// Average delay in hours for approval processing
761    #[serde(default = "default_approval_delay_hours")]
762    pub average_approval_delay_hours: f64,
763    /// Approval chain thresholds
764    #[serde(default)]
765    pub thresholds: Vec<ApprovalThresholdConfig>,
766}
767
768fn default_auto_approve_threshold() -> f64 {
769    1000.0
770}
771fn default_rejection_rate() -> f64 {
772    0.02
773}
774fn default_revision_rate() -> f64 {
775    0.05
776}
777fn default_approval_delay_hours() -> f64 {
778    4.0
779}
780
781impl Default for ApprovalConfig {
782    fn default() -> Self {
783        Self {
784            enabled: false,
785            auto_approve_threshold: default_auto_approve_threshold(),
786            rejection_rate: default_rejection_rate(),
787            revision_rate: default_revision_rate(),
788            average_approval_delay_hours: default_approval_delay_hours(),
789            thresholds: vec![
790                ApprovalThresholdConfig {
791                    amount: 1000.0,
792                    level: 1,
793                    roles: vec!["senior_accountant".to_string()],
794                },
795                ApprovalThresholdConfig {
796                    amount: 10000.0,
797                    level: 2,
798                    roles: vec!["senior_accountant".to_string(), "controller".to_string()],
799                },
800                ApprovalThresholdConfig {
801                    amount: 100000.0,
802                    level: 3,
803                    roles: vec![
804                        "senior_accountant".to_string(),
805                        "controller".to_string(),
806                        "manager".to_string(),
807                    ],
808                },
809                ApprovalThresholdConfig {
810                    amount: 500000.0,
811                    level: 4,
812                    roles: vec![
813                        "senior_accountant".to_string(),
814                        "controller".to_string(),
815                        "manager".to_string(),
816                        "executive".to_string(),
817                    ],
818                },
819            ],
820        }
821    }
822}
823
824/// Configuration for a single approval threshold.
825#[derive(Debug, Clone, Serialize, Deserialize)]
826pub struct ApprovalThresholdConfig {
827    /// Amount threshold
828    pub amount: f64,
829    /// Approval level required
830    pub level: u8,
831    /// Roles that can approve at this level
832    pub roles: Vec<String>,
833}
834
835/// Department configuration.
836#[derive(Debug, Clone, Serialize, Deserialize)]
837pub struct DepartmentConfig {
838    /// Enable department assignment
839    #[serde(default)]
840    pub enabled: bool,
841    /// Multiplier for department headcounts
842    #[serde(default = "default_headcount_multiplier")]
843    pub headcount_multiplier: f64,
844    /// Custom department definitions (optional)
845    #[serde(default)]
846    pub custom_departments: Vec<CustomDepartmentConfig>,
847}
848
849fn default_headcount_multiplier() -> f64 {
850    1.0
851}
852
853impl Default for DepartmentConfig {
854    fn default() -> Self {
855        Self {
856            enabled: false,
857            headcount_multiplier: default_headcount_multiplier(),
858            custom_departments: Vec::new(),
859        }
860    }
861}
862
863/// Custom department definition.
864#[derive(Debug, Clone, Serialize, Deserialize)]
865pub struct CustomDepartmentConfig {
866    /// Department code
867    pub code: String,
868    /// Department name
869    pub name: String,
870    /// Associated cost center
871    #[serde(default)]
872    pub cost_center: Option<String>,
873    /// Primary business processes
874    #[serde(default)]
875    pub primary_processes: Vec<String>,
876    /// Parent department code
877    #[serde(default)]
878    pub parent_code: Option<String>,
879}
880
881// ============================================================================
882// Master Data Configuration
883// ============================================================================
884
885/// Master data generation configuration.
886#[derive(Debug, Clone, Default, Serialize, Deserialize)]
887pub struct MasterDataConfig {
888    /// Vendor master data settings
889    #[serde(default)]
890    pub vendors: VendorMasterConfig,
891    /// Customer master data settings
892    #[serde(default)]
893    pub customers: CustomerMasterConfig,
894    /// Material master data settings
895    #[serde(default)]
896    pub materials: MaterialMasterConfig,
897    /// Fixed asset master data settings
898    #[serde(default)]
899    pub fixed_assets: FixedAssetMasterConfig,
900    /// Employee master data settings
901    #[serde(default)]
902    pub employees: EmployeeMasterConfig,
903    /// Cost center master data settings
904    #[serde(default)]
905    pub cost_centers: CostCenterMasterConfig,
906}
907
908/// Vendor master data configuration.
909#[derive(Debug, Clone, Serialize, Deserialize)]
910pub struct VendorMasterConfig {
911    /// Number of vendors to generate
912    #[serde(default = "default_vendor_count")]
913    pub count: usize,
914    /// Percentage of vendors that are intercompany (0.0 to 1.0)
915    #[serde(default = "default_intercompany_percent")]
916    pub intercompany_percent: f64,
917    /// Payment terms distribution
918    #[serde(default)]
919    pub payment_terms_distribution: PaymentTermsDistribution,
920    /// Vendor behavior distribution
921    #[serde(default)]
922    pub behavior_distribution: VendorBehaviorDistribution,
923    /// Generate bank account details
924    #[serde(default = "default_true")]
925    pub generate_bank_accounts: bool,
926    /// Generate tax IDs
927    #[serde(default = "default_true")]
928    pub generate_tax_ids: bool,
929}
930
931fn default_vendor_count() -> usize {
932    500
933}
934
935fn default_intercompany_percent() -> f64 {
936    0.05
937}
938
939impl Default for VendorMasterConfig {
940    fn default() -> Self {
941        Self {
942            count: default_vendor_count(),
943            intercompany_percent: default_intercompany_percent(),
944            payment_terms_distribution: PaymentTermsDistribution::default(),
945            behavior_distribution: VendorBehaviorDistribution::default(),
946            generate_bank_accounts: true,
947            generate_tax_ids: true,
948        }
949    }
950}
951
952/// Payment terms distribution for vendors.
953#[derive(Debug, Clone, Serialize, Deserialize)]
954pub struct PaymentTermsDistribution {
955    /// Net 30 days
956    pub net_30: f64,
957    /// Net 60 days
958    pub net_60: f64,
959    /// Net 90 days
960    pub net_90: f64,
961    /// 2% 10 Net 30 (early payment discount)
962    pub two_ten_net_30: f64,
963    /// Due on receipt
964    pub due_on_receipt: f64,
965    /// End of month
966    pub end_of_month: f64,
967}
968
969impl Default for PaymentTermsDistribution {
970    fn default() -> Self {
971        Self {
972            net_30: 0.40,
973            net_60: 0.20,
974            net_90: 0.10,
975            two_ten_net_30: 0.15,
976            due_on_receipt: 0.05,
977            end_of_month: 0.10,
978        }
979    }
980}
981
982/// Vendor behavior distribution.
983#[derive(Debug, Clone, Serialize, Deserialize)]
984pub struct VendorBehaviorDistribution {
985    /// Reliable vendors (consistent delivery, quality)
986    pub reliable: f64,
987    /// Sometimes late vendors
988    pub sometimes_late: f64,
989    /// Inconsistent quality vendors
990    pub inconsistent_quality: f64,
991    /// Premium vendors (high quality, premium pricing)
992    pub premium: f64,
993    /// Budget vendors (lower quality, lower pricing)
994    pub budget: f64,
995}
996
997impl Default for VendorBehaviorDistribution {
998    fn default() -> Self {
999        Self {
1000            reliable: 0.50,
1001            sometimes_late: 0.20,
1002            inconsistent_quality: 0.10,
1003            premium: 0.10,
1004            budget: 0.10,
1005        }
1006    }
1007}
1008
1009/// Customer master data configuration.
1010#[derive(Debug, Clone, Serialize, Deserialize)]
1011pub struct CustomerMasterConfig {
1012    /// Number of customers to generate
1013    #[serde(default = "default_customer_count")]
1014    pub count: usize,
1015    /// Percentage of customers that are intercompany (0.0 to 1.0)
1016    #[serde(default = "default_intercompany_percent")]
1017    pub intercompany_percent: f64,
1018    /// Credit rating distribution
1019    #[serde(default)]
1020    pub credit_rating_distribution: CreditRatingDistribution,
1021    /// Payment behavior distribution
1022    #[serde(default)]
1023    pub payment_behavior_distribution: PaymentBehaviorDistribution,
1024    /// Generate credit limits based on rating
1025    #[serde(default = "default_true")]
1026    pub generate_credit_limits: bool,
1027}
1028
1029fn default_customer_count() -> usize {
1030    2000
1031}
1032
1033impl Default for CustomerMasterConfig {
1034    fn default() -> Self {
1035        Self {
1036            count: default_customer_count(),
1037            intercompany_percent: default_intercompany_percent(),
1038            credit_rating_distribution: CreditRatingDistribution::default(),
1039            payment_behavior_distribution: PaymentBehaviorDistribution::default(),
1040            generate_credit_limits: true,
1041        }
1042    }
1043}
1044
1045/// Credit rating distribution for customers.
1046#[derive(Debug, Clone, Serialize, Deserialize)]
1047pub struct CreditRatingDistribution {
1048    /// AAA rating
1049    pub aaa: f64,
1050    /// AA rating
1051    pub aa: f64,
1052    /// A rating
1053    pub a: f64,
1054    /// BBB rating
1055    pub bbb: f64,
1056    /// BB rating
1057    pub bb: f64,
1058    /// B rating
1059    pub b: f64,
1060    /// Below B rating
1061    pub below_b: f64,
1062}
1063
1064impl Default for CreditRatingDistribution {
1065    fn default() -> Self {
1066        Self {
1067            aaa: 0.05,
1068            aa: 0.10,
1069            a: 0.20,
1070            bbb: 0.30,
1071            bb: 0.20,
1072            b: 0.10,
1073            below_b: 0.05,
1074        }
1075    }
1076}
1077
1078/// Payment behavior distribution for customers.
1079#[derive(Debug, Clone, Serialize, Deserialize)]
1080pub struct PaymentBehaviorDistribution {
1081    /// Always pays early
1082    pub early_payer: f64,
1083    /// Pays on time
1084    pub on_time: f64,
1085    /// Occasionally late
1086    pub occasional_late: f64,
1087    /// Frequently late
1088    pub frequent_late: f64,
1089    /// Takes early payment discounts
1090    pub discount_taker: f64,
1091}
1092
1093impl Default for PaymentBehaviorDistribution {
1094    fn default() -> Self {
1095        Self {
1096            early_payer: 0.10,
1097            on_time: 0.50,
1098            occasional_late: 0.25,
1099            frequent_late: 0.10,
1100            discount_taker: 0.05,
1101        }
1102    }
1103}
1104
1105/// Material master data configuration.
1106#[derive(Debug, Clone, Serialize, Deserialize)]
1107pub struct MaterialMasterConfig {
1108    /// Number of materials to generate
1109    #[serde(default = "default_material_count")]
1110    pub count: usize,
1111    /// Material type distribution
1112    #[serde(default)]
1113    pub type_distribution: MaterialTypeDistribution,
1114    /// Valuation method distribution
1115    #[serde(default)]
1116    pub valuation_distribution: ValuationMethodDistribution,
1117    /// Percentage of materials with BOM (bill of materials)
1118    #[serde(default = "default_bom_percent")]
1119    pub bom_percent: f64,
1120    /// Maximum BOM depth
1121    #[serde(default = "default_max_bom_depth")]
1122    pub max_bom_depth: u8,
1123}
1124
1125fn default_material_count() -> usize {
1126    5000
1127}
1128
1129fn default_bom_percent() -> f64 {
1130    0.20
1131}
1132
1133fn default_max_bom_depth() -> u8 {
1134    3
1135}
1136
1137impl Default for MaterialMasterConfig {
1138    fn default() -> Self {
1139        Self {
1140            count: default_material_count(),
1141            type_distribution: MaterialTypeDistribution::default(),
1142            valuation_distribution: ValuationMethodDistribution::default(),
1143            bom_percent: default_bom_percent(),
1144            max_bom_depth: default_max_bom_depth(),
1145        }
1146    }
1147}
1148
1149/// Material type distribution.
1150#[derive(Debug, Clone, Serialize, Deserialize)]
1151pub struct MaterialTypeDistribution {
1152    /// Raw materials
1153    pub raw_material: f64,
1154    /// Semi-finished goods
1155    pub semi_finished: f64,
1156    /// Finished goods
1157    pub finished_good: f64,
1158    /// Trading goods (purchased for resale)
1159    pub trading_good: f64,
1160    /// Operating supplies
1161    pub operating_supply: f64,
1162    /// Services
1163    pub service: f64,
1164}
1165
1166impl Default for MaterialTypeDistribution {
1167    fn default() -> Self {
1168        Self {
1169            raw_material: 0.30,
1170            semi_finished: 0.15,
1171            finished_good: 0.25,
1172            trading_good: 0.15,
1173            operating_supply: 0.10,
1174            service: 0.05,
1175        }
1176    }
1177}
1178
1179/// Valuation method distribution for materials.
1180#[derive(Debug, Clone, Serialize, Deserialize)]
1181pub struct ValuationMethodDistribution {
1182    /// Standard cost
1183    pub standard_cost: f64,
1184    /// Moving average
1185    pub moving_average: f64,
1186    /// FIFO (First In, First Out)
1187    pub fifo: f64,
1188    /// LIFO (Last In, First Out)
1189    pub lifo: f64,
1190}
1191
1192impl Default for ValuationMethodDistribution {
1193    fn default() -> Self {
1194        Self {
1195            standard_cost: 0.50,
1196            moving_average: 0.30,
1197            fifo: 0.15,
1198            lifo: 0.05,
1199        }
1200    }
1201}
1202
1203/// Fixed asset master data configuration.
1204#[derive(Debug, Clone, Serialize, Deserialize)]
1205pub struct FixedAssetMasterConfig {
1206    /// Number of fixed assets to generate
1207    #[serde(default = "default_asset_count")]
1208    pub count: usize,
1209    /// Asset class distribution
1210    #[serde(default)]
1211    pub class_distribution: AssetClassDistribution,
1212    /// Depreciation method distribution
1213    #[serde(default)]
1214    pub depreciation_distribution: DepreciationMethodDistribution,
1215    /// Percentage of assets that are fully depreciated
1216    #[serde(default = "default_fully_depreciated_percent")]
1217    pub fully_depreciated_percent: f64,
1218    /// Generate acquisition history
1219    #[serde(default = "default_true")]
1220    pub generate_acquisition_history: bool,
1221}
1222
1223fn default_asset_count() -> usize {
1224    800
1225}
1226
1227fn default_fully_depreciated_percent() -> f64 {
1228    0.15
1229}
1230
1231impl Default for FixedAssetMasterConfig {
1232    fn default() -> Self {
1233        Self {
1234            count: default_asset_count(),
1235            class_distribution: AssetClassDistribution::default(),
1236            depreciation_distribution: DepreciationMethodDistribution::default(),
1237            fully_depreciated_percent: default_fully_depreciated_percent(),
1238            generate_acquisition_history: true,
1239        }
1240    }
1241}
1242
1243/// Asset class distribution.
1244#[derive(Debug, Clone, Serialize, Deserialize)]
1245pub struct AssetClassDistribution {
1246    /// Buildings and structures
1247    pub buildings: f64,
1248    /// Machinery and equipment
1249    pub machinery: f64,
1250    /// Vehicles
1251    pub vehicles: f64,
1252    /// IT equipment
1253    pub it_equipment: f64,
1254    /// Furniture and fixtures
1255    pub furniture: f64,
1256    /// Land (non-depreciable)
1257    pub land: f64,
1258    /// Leasehold improvements
1259    pub leasehold: f64,
1260}
1261
1262impl Default for AssetClassDistribution {
1263    fn default() -> Self {
1264        Self {
1265            buildings: 0.15,
1266            machinery: 0.30,
1267            vehicles: 0.15,
1268            it_equipment: 0.20,
1269            furniture: 0.10,
1270            land: 0.05,
1271            leasehold: 0.05,
1272        }
1273    }
1274}
1275
1276/// Depreciation method distribution.
1277#[derive(Debug, Clone, Serialize, Deserialize)]
1278pub struct DepreciationMethodDistribution {
1279    /// Straight line
1280    pub straight_line: f64,
1281    /// Declining balance
1282    pub declining_balance: f64,
1283    /// Double declining balance
1284    pub double_declining: f64,
1285    /// Sum of years' digits
1286    pub sum_of_years: f64,
1287    /// Units of production
1288    pub units_of_production: f64,
1289}
1290
1291impl Default for DepreciationMethodDistribution {
1292    fn default() -> Self {
1293        Self {
1294            straight_line: 0.60,
1295            declining_balance: 0.20,
1296            double_declining: 0.10,
1297            sum_of_years: 0.05,
1298            units_of_production: 0.05,
1299        }
1300    }
1301}
1302
1303/// Employee master data configuration.
1304#[derive(Debug, Clone, Serialize, Deserialize)]
1305pub struct EmployeeMasterConfig {
1306    /// Number of employees to generate
1307    #[serde(default = "default_employee_count")]
1308    pub count: usize,
1309    /// Generate organizational hierarchy
1310    #[serde(default = "default_true")]
1311    pub generate_hierarchy: bool,
1312    /// Maximum hierarchy depth
1313    #[serde(default = "default_hierarchy_depth")]
1314    pub max_hierarchy_depth: u8,
1315    /// Average span of control (direct reports per manager)
1316    #[serde(default = "default_span_of_control")]
1317    pub average_span_of_control: f64,
1318    /// Approval limit distribution by job level
1319    #[serde(default)]
1320    pub approval_limits: ApprovalLimitDistribution,
1321    /// Department distribution
1322    #[serde(default)]
1323    pub department_distribution: EmployeeDepartmentDistribution,
1324}
1325
1326fn default_employee_count() -> usize {
1327    1500
1328}
1329
1330fn default_hierarchy_depth() -> u8 {
1331    6
1332}
1333
1334fn default_span_of_control() -> f64 {
1335    5.0
1336}
1337
1338impl Default for EmployeeMasterConfig {
1339    fn default() -> Self {
1340        Self {
1341            count: default_employee_count(),
1342            generate_hierarchy: true,
1343            max_hierarchy_depth: default_hierarchy_depth(),
1344            average_span_of_control: default_span_of_control(),
1345            approval_limits: ApprovalLimitDistribution::default(),
1346            department_distribution: EmployeeDepartmentDistribution::default(),
1347        }
1348    }
1349}
1350
1351/// Approval limit distribution by job level.
1352#[derive(Debug, Clone, Serialize, Deserialize)]
1353pub struct ApprovalLimitDistribution {
1354    /// Staff level approval limit
1355    #[serde(default = "default_staff_limit")]
1356    pub staff: f64,
1357    /// Senior staff approval limit
1358    #[serde(default = "default_senior_limit")]
1359    pub senior: f64,
1360    /// Manager approval limit
1361    #[serde(default = "default_manager_limit")]
1362    pub manager: f64,
1363    /// Director approval limit
1364    #[serde(default = "default_director_limit")]
1365    pub director: f64,
1366    /// VP approval limit
1367    #[serde(default = "default_vp_limit")]
1368    pub vp: f64,
1369    /// Executive approval limit
1370    #[serde(default = "default_executive_limit")]
1371    pub executive: f64,
1372}
1373
1374fn default_staff_limit() -> f64 {
1375    1000.0
1376}
1377fn default_senior_limit() -> f64 {
1378    5000.0
1379}
1380fn default_manager_limit() -> f64 {
1381    25000.0
1382}
1383fn default_director_limit() -> f64 {
1384    100000.0
1385}
1386fn default_vp_limit() -> f64 {
1387    500000.0
1388}
1389fn default_executive_limit() -> f64 {
1390    f64::INFINITY
1391}
1392
1393impl Default for ApprovalLimitDistribution {
1394    fn default() -> Self {
1395        Self {
1396            staff: default_staff_limit(),
1397            senior: default_senior_limit(),
1398            manager: default_manager_limit(),
1399            director: default_director_limit(),
1400            vp: default_vp_limit(),
1401            executive: default_executive_limit(),
1402        }
1403    }
1404}
1405
1406/// Employee distribution across departments.
1407#[derive(Debug, Clone, Serialize, Deserialize)]
1408pub struct EmployeeDepartmentDistribution {
1409    /// Finance and Accounting
1410    pub finance: f64,
1411    /// Procurement
1412    pub procurement: f64,
1413    /// Sales
1414    pub sales: f64,
1415    /// Warehouse and Logistics
1416    pub warehouse: f64,
1417    /// IT
1418    pub it: f64,
1419    /// Human Resources
1420    pub hr: f64,
1421    /// Operations
1422    pub operations: f64,
1423    /// Executive
1424    pub executive: f64,
1425}
1426
1427impl Default for EmployeeDepartmentDistribution {
1428    fn default() -> Self {
1429        Self {
1430            finance: 0.12,
1431            procurement: 0.10,
1432            sales: 0.25,
1433            warehouse: 0.15,
1434            it: 0.10,
1435            hr: 0.05,
1436            operations: 0.20,
1437            executive: 0.03,
1438        }
1439    }
1440}
1441
1442/// Cost center master data configuration.
1443#[derive(Debug, Clone, Serialize, Deserialize)]
1444pub struct CostCenterMasterConfig {
1445    /// Number of cost centers to generate
1446    #[serde(default = "default_cost_center_count")]
1447    pub count: usize,
1448    /// Generate cost center hierarchy
1449    #[serde(default = "default_true")]
1450    pub generate_hierarchy: bool,
1451    /// Maximum hierarchy depth
1452    #[serde(default = "default_cc_hierarchy_depth")]
1453    pub max_hierarchy_depth: u8,
1454}
1455
1456fn default_cost_center_count() -> usize {
1457    50
1458}
1459
1460fn default_cc_hierarchy_depth() -> u8 {
1461    3
1462}
1463
1464impl Default for CostCenterMasterConfig {
1465    fn default() -> Self {
1466        Self {
1467            count: default_cost_center_count(),
1468            generate_hierarchy: true,
1469            max_hierarchy_depth: default_cc_hierarchy_depth(),
1470        }
1471    }
1472}
1473
1474// ============================================================================
1475// Document Flow Configuration
1476// ============================================================================
1477
1478/// Document flow generation configuration.
1479#[derive(Debug, Clone, Serialize, Deserialize)]
1480pub struct DocumentFlowConfig {
1481    /// P2P (Procure-to-Pay) flow configuration
1482    #[serde(default)]
1483    pub p2p: P2PFlowConfig,
1484    /// O2C (Order-to-Cash) flow configuration
1485    #[serde(default)]
1486    pub o2c: O2CFlowConfig,
1487    /// Generate document reference chains
1488    #[serde(default = "default_true")]
1489    pub generate_document_references: bool,
1490    /// Export document flow graph
1491    #[serde(default)]
1492    pub export_flow_graph: bool,
1493}
1494
1495impl Default for DocumentFlowConfig {
1496    fn default() -> Self {
1497        Self {
1498            p2p: P2PFlowConfig::default(),
1499            o2c: O2CFlowConfig::default(),
1500            generate_document_references: true,
1501            export_flow_graph: false,
1502        }
1503    }
1504}
1505
1506/// P2P (Procure-to-Pay) flow configuration.
1507#[derive(Debug, Clone, Serialize, Deserialize)]
1508pub struct P2PFlowConfig {
1509    /// Enable P2P document flow generation
1510    #[serde(default = "default_true")]
1511    pub enabled: bool,
1512    /// Three-way match success rate (PO-GR-Invoice)
1513    #[serde(default = "default_three_way_match_rate")]
1514    pub three_way_match_rate: f64,
1515    /// Rate of partial deliveries
1516    #[serde(default = "default_partial_delivery_rate")]
1517    pub partial_delivery_rate: f64,
1518    /// Rate of price variances between PO and Invoice
1519    #[serde(default = "default_price_variance_rate")]
1520    pub price_variance_rate: f64,
1521    /// Maximum price variance percentage
1522    #[serde(default = "default_max_price_variance")]
1523    pub max_price_variance_percent: f64,
1524    /// Rate of quantity variances between PO/GR and Invoice
1525    #[serde(default = "default_quantity_variance_rate")]
1526    pub quantity_variance_rate: f64,
1527    /// Average days from PO to goods receipt
1528    #[serde(default = "default_po_to_gr_days")]
1529    pub average_po_to_gr_days: u32,
1530    /// Average days from GR to invoice
1531    #[serde(default = "default_gr_to_invoice_days")]
1532    pub average_gr_to_invoice_days: u32,
1533    /// Average days from invoice to payment
1534    #[serde(default = "default_invoice_to_payment_days")]
1535    pub average_invoice_to_payment_days: u32,
1536    /// PO line count distribution
1537    #[serde(default)]
1538    pub line_count_distribution: DocumentLineCountDistribution,
1539    /// Payment behavior configuration
1540    #[serde(default)]
1541    pub payment_behavior: P2PPaymentBehaviorConfig,
1542}
1543
1544fn default_three_way_match_rate() -> f64 {
1545    0.95
1546}
1547
1548fn default_partial_delivery_rate() -> f64 {
1549    0.15
1550}
1551
1552fn default_price_variance_rate() -> f64 {
1553    0.08
1554}
1555
1556fn default_max_price_variance() -> f64 {
1557    0.05
1558}
1559
1560fn default_quantity_variance_rate() -> f64 {
1561    0.05
1562}
1563
1564fn default_po_to_gr_days() -> u32 {
1565    14
1566}
1567
1568fn default_gr_to_invoice_days() -> u32 {
1569    5
1570}
1571
1572fn default_invoice_to_payment_days() -> u32 {
1573    30
1574}
1575
1576impl Default for P2PFlowConfig {
1577    fn default() -> Self {
1578        Self {
1579            enabled: true,
1580            three_way_match_rate: default_three_way_match_rate(),
1581            partial_delivery_rate: default_partial_delivery_rate(),
1582            price_variance_rate: default_price_variance_rate(),
1583            max_price_variance_percent: default_max_price_variance(),
1584            quantity_variance_rate: default_quantity_variance_rate(),
1585            average_po_to_gr_days: default_po_to_gr_days(),
1586            average_gr_to_invoice_days: default_gr_to_invoice_days(),
1587            average_invoice_to_payment_days: default_invoice_to_payment_days(),
1588            line_count_distribution: DocumentLineCountDistribution::default(),
1589            payment_behavior: P2PPaymentBehaviorConfig::default(),
1590        }
1591    }
1592}
1593
1594// ============================================================================
1595// P2P Payment Behavior Configuration
1596// ============================================================================
1597
1598/// P2P payment behavior configuration.
1599#[derive(Debug, Clone, Serialize, Deserialize)]
1600pub struct P2PPaymentBehaviorConfig {
1601    /// Rate of late payments (beyond due date)
1602    #[serde(default = "default_p2p_late_payment_rate")]
1603    pub late_payment_rate: f64,
1604    /// Distribution of late payment days
1605    #[serde(default)]
1606    pub late_payment_days_distribution: LatePaymentDaysDistribution,
1607    /// Rate of partial payments
1608    #[serde(default = "default_p2p_partial_payment_rate")]
1609    pub partial_payment_rate: f64,
1610    /// Rate of payment corrections (NSF, chargebacks, reversals)
1611    #[serde(default = "default_p2p_payment_correction_rate")]
1612    pub payment_correction_rate: f64,
1613}
1614
1615fn default_p2p_late_payment_rate() -> f64 {
1616    0.15
1617}
1618
1619fn default_p2p_partial_payment_rate() -> f64 {
1620    0.05
1621}
1622
1623fn default_p2p_payment_correction_rate() -> f64 {
1624    0.02
1625}
1626
1627impl Default for P2PPaymentBehaviorConfig {
1628    fn default() -> Self {
1629        Self {
1630            late_payment_rate: default_p2p_late_payment_rate(),
1631            late_payment_days_distribution: LatePaymentDaysDistribution::default(),
1632            partial_payment_rate: default_p2p_partial_payment_rate(),
1633            payment_correction_rate: default_p2p_payment_correction_rate(),
1634        }
1635    }
1636}
1637
1638/// Distribution of late payment days for P2P.
1639#[derive(Debug, Clone, Serialize, Deserialize)]
1640pub struct LatePaymentDaysDistribution {
1641    /// 1-7 days late (slightly late)
1642    #[serde(default = "default_slightly_late")]
1643    pub slightly_late_1_to_7: f64,
1644    /// 8-14 days late
1645    #[serde(default = "default_late_8_14")]
1646    pub late_8_to_14: f64,
1647    /// 15-30 days late (very late)
1648    #[serde(default = "default_very_late")]
1649    pub very_late_15_to_30: f64,
1650    /// 31-60 days late (severely late)
1651    #[serde(default = "default_severely_late")]
1652    pub severely_late_31_to_60: f64,
1653    /// Over 60 days late (extremely late)
1654    #[serde(default = "default_extremely_late")]
1655    pub extremely_late_over_60: f64,
1656}
1657
1658fn default_slightly_late() -> f64 {
1659    0.50
1660}
1661
1662fn default_late_8_14() -> f64 {
1663    0.25
1664}
1665
1666fn default_very_late() -> f64 {
1667    0.15
1668}
1669
1670fn default_severely_late() -> f64 {
1671    0.07
1672}
1673
1674fn default_extremely_late() -> f64 {
1675    0.03
1676}
1677
1678impl Default for LatePaymentDaysDistribution {
1679    fn default() -> Self {
1680        Self {
1681            slightly_late_1_to_7: default_slightly_late(),
1682            late_8_to_14: default_late_8_14(),
1683            very_late_15_to_30: default_very_late(),
1684            severely_late_31_to_60: default_severely_late(),
1685            extremely_late_over_60: default_extremely_late(),
1686        }
1687    }
1688}
1689
1690/// O2C (Order-to-Cash) flow configuration.
1691#[derive(Debug, Clone, Serialize, Deserialize)]
1692pub struct O2CFlowConfig {
1693    /// Enable O2C document flow generation
1694    #[serde(default = "default_true")]
1695    pub enabled: bool,
1696    /// Credit check failure rate
1697    #[serde(default = "default_credit_check_failure_rate")]
1698    pub credit_check_failure_rate: f64,
1699    /// Rate of partial shipments
1700    #[serde(default = "default_partial_shipment_rate")]
1701    pub partial_shipment_rate: f64,
1702    /// Rate of returns
1703    #[serde(default = "default_return_rate")]
1704    pub return_rate: f64,
1705    /// Bad debt write-off rate
1706    #[serde(default = "default_bad_debt_rate")]
1707    pub bad_debt_rate: f64,
1708    /// Average days from SO to delivery
1709    #[serde(default = "default_so_to_delivery_days")]
1710    pub average_so_to_delivery_days: u32,
1711    /// Average days from delivery to invoice
1712    #[serde(default = "default_delivery_to_invoice_days")]
1713    pub average_delivery_to_invoice_days: u32,
1714    /// Average days from invoice to receipt
1715    #[serde(default = "default_invoice_to_receipt_days")]
1716    pub average_invoice_to_receipt_days: u32,
1717    /// SO line count distribution
1718    #[serde(default)]
1719    pub line_count_distribution: DocumentLineCountDistribution,
1720    /// Cash discount configuration
1721    #[serde(default)]
1722    pub cash_discount: CashDiscountConfig,
1723    /// Payment behavior configuration
1724    #[serde(default)]
1725    pub payment_behavior: O2CPaymentBehaviorConfig,
1726}
1727
1728fn default_credit_check_failure_rate() -> f64 {
1729    0.02
1730}
1731
1732fn default_partial_shipment_rate() -> f64 {
1733    0.10
1734}
1735
1736fn default_return_rate() -> f64 {
1737    0.03
1738}
1739
1740fn default_bad_debt_rate() -> f64 {
1741    0.01
1742}
1743
1744fn default_so_to_delivery_days() -> u32 {
1745    7
1746}
1747
1748fn default_delivery_to_invoice_days() -> u32 {
1749    1
1750}
1751
1752fn default_invoice_to_receipt_days() -> u32 {
1753    45
1754}
1755
1756impl Default for O2CFlowConfig {
1757    fn default() -> Self {
1758        Self {
1759            enabled: true,
1760            credit_check_failure_rate: default_credit_check_failure_rate(),
1761            partial_shipment_rate: default_partial_shipment_rate(),
1762            return_rate: default_return_rate(),
1763            bad_debt_rate: default_bad_debt_rate(),
1764            average_so_to_delivery_days: default_so_to_delivery_days(),
1765            average_delivery_to_invoice_days: default_delivery_to_invoice_days(),
1766            average_invoice_to_receipt_days: default_invoice_to_receipt_days(),
1767            line_count_distribution: DocumentLineCountDistribution::default(),
1768            cash_discount: CashDiscountConfig::default(),
1769            payment_behavior: O2CPaymentBehaviorConfig::default(),
1770        }
1771    }
1772}
1773
1774// ============================================================================
1775// O2C Payment Behavior Configuration
1776// ============================================================================
1777
1778/// O2C payment behavior configuration.
1779#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1780pub struct O2CPaymentBehaviorConfig {
1781    /// Dunning (Mahnung) configuration
1782    #[serde(default)]
1783    pub dunning: DunningConfig,
1784    /// Partial payment configuration
1785    #[serde(default)]
1786    pub partial_payments: PartialPaymentConfig,
1787    /// Short payment configuration (unauthorized deductions)
1788    #[serde(default)]
1789    pub short_payments: ShortPaymentConfig,
1790    /// On-account payment configuration (unapplied payments)
1791    #[serde(default)]
1792    pub on_account_payments: OnAccountPaymentConfig,
1793    /// Payment correction configuration (NSF, chargebacks)
1794    #[serde(default)]
1795    pub payment_corrections: PaymentCorrectionConfig,
1796}
1797
1798/// Dunning (Mahnungen) configuration for AR collections.
1799#[derive(Debug, Clone, Serialize, Deserialize)]
1800pub struct DunningConfig {
1801    /// Enable dunning process
1802    #[serde(default)]
1803    pub enabled: bool,
1804    /// Days overdue for level 1 dunning (1st reminder)
1805    #[serde(default = "default_dunning_level_1_days")]
1806    pub level_1_days_overdue: u32,
1807    /// Days overdue for level 2 dunning (2nd reminder)
1808    #[serde(default = "default_dunning_level_2_days")]
1809    pub level_2_days_overdue: u32,
1810    /// Days overdue for level 3 dunning (final notice)
1811    #[serde(default = "default_dunning_level_3_days")]
1812    pub level_3_days_overdue: u32,
1813    /// Days overdue for collection handover
1814    #[serde(default = "default_collection_days")]
1815    pub collection_days_overdue: u32,
1816    /// Payment rates after each dunning level
1817    #[serde(default)]
1818    pub payment_after_dunning_rates: DunningPaymentRates,
1819    /// Rate of invoices blocked from dunning (disputes)
1820    #[serde(default = "default_dunning_block_rate")]
1821    pub dunning_block_rate: f64,
1822    /// Interest rate per year for overdue amounts
1823    #[serde(default = "default_dunning_interest_rate")]
1824    pub interest_rate_per_year: f64,
1825    /// Fixed dunning charge per letter
1826    #[serde(default = "default_dunning_charge")]
1827    pub dunning_charge: f64,
1828}
1829
1830fn default_dunning_level_1_days() -> u32 {
1831    14
1832}
1833
1834fn default_dunning_level_2_days() -> u32 {
1835    28
1836}
1837
1838fn default_dunning_level_3_days() -> u32 {
1839    42
1840}
1841
1842fn default_collection_days() -> u32 {
1843    60
1844}
1845
1846fn default_dunning_block_rate() -> f64 {
1847    0.05
1848}
1849
1850fn default_dunning_interest_rate() -> f64 {
1851    0.09
1852}
1853
1854fn default_dunning_charge() -> f64 {
1855    25.0
1856}
1857
1858impl Default for DunningConfig {
1859    fn default() -> Self {
1860        Self {
1861            enabled: false,
1862            level_1_days_overdue: default_dunning_level_1_days(),
1863            level_2_days_overdue: default_dunning_level_2_days(),
1864            level_3_days_overdue: default_dunning_level_3_days(),
1865            collection_days_overdue: default_collection_days(),
1866            payment_after_dunning_rates: DunningPaymentRates::default(),
1867            dunning_block_rate: default_dunning_block_rate(),
1868            interest_rate_per_year: default_dunning_interest_rate(),
1869            dunning_charge: default_dunning_charge(),
1870        }
1871    }
1872}
1873
1874/// Payment rates after each dunning level.
1875#[derive(Debug, Clone, Serialize, Deserialize)]
1876pub struct DunningPaymentRates {
1877    /// Rate that pays after level 1 reminder
1878    #[serde(default = "default_after_level_1")]
1879    pub after_level_1: f64,
1880    /// Rate that pays after level 2 reminder
1881    #[serde(default = "default_after_level_2")]
1882    pub after_level_2: f64,
1883    /// Rate that pays after level 3 final notice
1884    #[serde(default = "default_after_level_3")]
1885    pub after_level_3: f64,
1886    /// Rate that pays during collection
1887    #[serde(default = "default_during_collection")]
1888    pub during_collection: f64,
1889    /// Rate that never pays (becomes bad debt)
1890    #[serde(default = "default_never_pay")]
1891    pub never_pay: f64,
1892}
1893
1894fn default_after_level_1() -> f64 {
1895    0.40
1896}
1897
1898fn default_after_level_2() -> f64 {
1899    0.30
1900}
1901
1902fn default_after_level_3() -> f64 {
1903    0.15
1904}
1905
1906fn default_during_collection() -> f64 {
1907    0.05
1908}
1909
1910fn default_never_pay() -> f64 {
1911    0.10
1912}
1913
1914impl Default for DunningPaymentRates {
1915    fn default() -> Self {
1916        Self {
1917            after_level_1: default_after_level_1(),
1918            after_level_2: default_after_level_2(),
1919            after_level_3: default_after_level_3(),
1920            during_collection: default_during_collection(),
1921            never_pay: default_never_pay(),
1922        }
1923    }
1924}
1925
1926/// Partial payment configuration.
1927#[derive(Debug, Clone, Serialize, Deserialize)]
1928pub struct PartialPaymentConfig {
1929    /// Rate of invoices paid partially
1930    #[serde(default = "default_partial_payment_rate")]
1931    pub rate: f64,
1932    /// Distribution of partial payment percentages
1933    #[serde(default)]
1934    pub percentage_distribution: PartialPaymentPercentageDistribution,
1935    /// Average days until remainder is paid
1936    #[serde(default = "default_avg_days_until_remainder")]
1937    pub avg_days_until_remainder: u32,
1938}
1939
1940fn default_partial_payment_rate() -> f64 {
1941    0.08
1942}
1943
1944fn default_avg_days_until_remainder() -> u32 {
1945    30
1946}
1947
1948impl Default for PartialPaymentConfig {
1949    fn default() -> Self {
1950        Self {
1951            rate: default_partial_payment_rate(),
1952            percentage_distribution: PartialPaymentPercentageDistribution::default(),
1953            avg_days_until_remainder: default_avg_days_until_remainder(),
1954        }
1955    }
1956}
1957
1958/// Distribution of partial payment percentages.
1959#[derive(Debug, Clone, Serialize, Deserialize)]
1960pub struct PartialPaymentPercentageDistribution {
1961    /// Pay 25% of invoice
1962    #[serde(default = "default_partial_25")]
1963    pub pay_25_percent: f64,
1964    /// Pay 50% of invoice
1965    #[serde(default = "default_partial_50")]
1966    pub pay_50_percent: f64,
1967    /// Pay 75% of invoice
1968    #[serde(default = "default_partial_75")]
1969    pub pay_75_percent: f64,
1970    /// Pay random percentage
1971    #[serde(default = "default_partial_random")]
1972    pub pay_random_percent: f64,
1973}
1974
1975fn default_partial_25() -> f64 {
1976    0.15
1977}
1978
1979fn default_partial_50() -> f64 {
1980    0.50
1981}
1982
1983fn default_partial_75() -> f64 {
1984    0.25
1985}
1986
1987fn default_partial_random() -> f64 {
1988    0.10
1989}
1990
1991impl Default for PartialPaymentPercentageDistribution {
1992    fn default() -> Self {
1993        Self {
1994            pay_25_percent: default_partial_25(),
1995            pay_50_percent: default_partial_50(),
1996            pay_75_percent: default_partial_75(),
1997            pay_random_percent: default_partial_random(),
1998        }
1999    }
2000}
2001
2002/// Short payment configuration (unauthorized deductions).
2003#[derive(Debug, Clone, Serialize, Deserialize)]
2004pub struct ShortPaymentConfig {
2005    /// Rate of payments that are short
2006    #[serde(default = "default_short_payment_rate")]
2007    pub rate: f64,
2008    /// Distribution of short payment reasons
2009    #[serde(default)]
2010    pub reason_distribution: ShortPaymentReasonDistribution,
2011    /// Maximum percentage that can be short
2012    #[serde(default = "default_max_short_percent")]
2013    pub max_short_percent: f64,
2014}
2015
2016fn default_short_payment_rate() -> f64 {
2017    0.03
2018}
2019
2020fn default_max_short_percent() -> f64 {
2021    0.10
2022}
2023
2024impl Default for ShortPaymentConfig {
2025    fn default() -> Self {
2026        Self {
2027            rate: default_short_payment_rate(),
2028            reason_distribution: ShortPaymentReasonDistribution::default(),
2029            max_short_percent: default_max_short_percent(),
2030        }
2031    }
2032}
2033
2034/// Distribution of short payment reasons.
2035#[derive(Debug, Clone, Serialize, Deserialize)]
2036pub struct ShortPaymentReasonDistribution {
2037    /// Pricing dispute
2038    #[serde(default = "default_pricing_dispute")]
2039    pub pricing_dispute: f64,
2040    /// Quality issue
2041    #[serde(default = "default_quality_issue")]
2042    pub quality_issue: f64,
2043    /// Quantity discrepancy
2044    #[serde(default = "default_quantity_discrepancy")]
2045    pub quantity_discrepancy: f64,
2046    /// Unauthorized deduction
2047    #[serde(default = "default_unauthorized_deduction")]
2048    pub unauthorized_deduction: f64,
2049    /// Early payment discount taken incorrectly
2050    #[serde(default = "default_incorrect_discount")]
2051    pub incorrect_discount: f64,
2052}
2053
2054fn default_pricing_dispute() -> f64 {
2055    0.30
2056}
2057
2058fn default_quality_issue() -> f64 {
2059    0.20
2060}
2061
2062fn default_quantity_discrepancy() -> f64 {
2063    0.20
2064}
2065
2066fn default_unauthorized_deduction() -> f64 {
2067    0.15
2068}
2069
2070fn default_incorrect_discount() -> f64 {
2071    0.15
2072}
2073
2074impl Default for ShortPaymentReasonDistribution {
2075    fn default() -> Self {
2076        Self {
2077            pricing_dispute: default_pricing_dispute(),
2078            quality_issue: default_quality_issue(),
2079            quantity_discrepancy: default_quantity_discrepancy(),
2080            unauthorized_deduction: default_unauthorized_deduction(),
2081            incorrect_discount: default_incorrect_discount(),
2082        }
2083    }
2084}
2085
2086/// On-account payment configuration (unapplied payments).
2087#[derive(Debug, Clone, Serialize, Deserialize)]
2088pub struct OnAccountPaymentConfig {
2089    /// Rate of payments that are on-account (unapplied)
2090    #[serde(default = "default_on_account_rate")]
2091    pub rate: f64,
2092    /// Average days until on-account payments are applied
2093    #[serde(default = "default_avg_days_until_applied")]
2094    pub avg_days_until_applied: u32,
2095}
2096
2097fn default_on_account_rate() -> f64 {
2098    0.02
2099}
2100
2101fn default_avg_days_until_applied() -> u32 {
2102    14
2103}
2104
2105impl Default for OnAccountPaymentConfig {
2106    fn default() -> Self {
2107        Self {
2108            rate: default_on_account_rate(),
2109            avg_days_until_applied: default_avg_days_until_applied(),
2110        }
2111    }
2112}
2113
2114/// Payment correction configuration.
2115#[derive(Debug, Clone, Serialize, Deserialize)]
2116pub struct PaymentCorrectionConfig {
2117    /// Rate of payments requiring correction
2118    #[serde(default = "default_payment_correction_rate")]
2119    pub rate: f64,
2120    /// Distribution of correction types
2121    #[serde(default)]
2122    pub type_distribution: PaymentCorrectionTypeDistribution,
2123}
2124
2125fn default_payment_correction_rate() -> f64 {
2126    0.02
2127}
2128
2129impl Default for PaymentCorrectionConfig {
2130    fn default() -> Self {
2131        Self {
2132            rate: default_payment_correction_rate(),
2133            type_distribution: PaymentCorrectionTypeDistribution::default(),
2134        }
2135    }
2136}
2137
2138/// Distribution of payment correction types.
2139#[derive(Debug, Clone, Serialize, Deserialize)]
2140pub struct PaymentCorrectionTypeDistribution {
2141    /// NSF (Non-sufficient funds) / bounced check
2142    #[serde(default = "default_nsf_rate")]
2143    pub nsf: f64,
2144    /// Chargeback
2145    #[serde(default = "default_chargeback_rate")]
2146    pub chargeback: f64,
2147    /// Wrong amount applied
2148    #[serde(default = "default_wrong_amount_rate")]
2149    pub wrong_amount: f64,
2150    /// Wrong customer applied
2151    #[serde(default = "default_wrong_customer_rate")]
2152    pub wrong_customer: f64,
2153    /// Duplicate payment
2154    #[serde(default = "default_duplicate_payment_rate")]
2155    pub duplicate_payment: f64,
2156}
2157
2158fn default_nsf_rate() -> f64 {
2159    0.30
2160}
2161
2162fn default_chargeback_rate() -> f64 {
2163    0.20
2164}
2165
2166fn default_wrong_amount_rate() -> f64 {
2167    0.20
2168}
2169
2170fn default_wrong_customer_rate() -> f64 {
2171    0.15
2172}
2173
2174fn default_duplicate_payment_rate() -> f64 {
2175    0.15
2176}
2177
2178impl Default for PaymentCorrectionTypeDistribution {
2179    fn default() -> Self {
2180        Self {
2181            nsf: default_nsf_rate(),
2182            chargeback: default_chargeback_rate(),
2183            wrong_amount: default_wrong_amount_rate(),
2184            wrong_customer: default_wrong_customer_rate(),
2185            duplicate_payment: default_duplicate_payment_rate(),
2186        }
2187    }
2188}
2189
2190/// Document line count distribution.
2191#[derive(Debug, Clone, Serialize, Deserialize)]
2192pub struct DocumentLineCountDistribution {
2193    /// Minimum number of lines
2194    #[serde(default = "default_min_lines")]
2195    pub min_lines: u32,
2196    /// Maximum number of lines
2197    #[serde(default = "default_max_lines")]
2198    pub max_lines: u32,
2199    /// Most common line count (mode)
2200    #[serde(default = "default_mode_lines")]
2201    pub mode_lines: u32,
2202}
2203
2204fn default_min_lines() -> u32 {
2205    1
2206}
2207
2208fn default_max_lines() -> u32 {
2209    20
2210}
2211
2212fn default_mode_lines() -> u32 {
2213    3
2214}
2215
2216impl Default for DocumentLineCountDistribution {
2217    fn default() -> Self {
2218        Self {
2219            min_lines: default_min_lines(),
2220            max_lines: default_max_lines(),
2221            mode_lines: default_mode_lines(),
2222        }
2223    }
2224}
2225
2226/// Cash discount configuration.
2227#[derive(Debug, Clone, Serialize, Deserialize)]
2228pub struct CashDiscountConfig {
2229    /// Percentage of invoices eligible for cash discount
2230    #[serde(default = "default_discount_eligible_rate")]
2231    pub eligible_rate: f64,
2232    /// Rate at which customers take the discount
2233    #[serde(default = "default_discount_taken_rate")]
2234    pub taken_rate: f64,
2235    /// Standard discount percentage
2236    #[serde(default = "default_discount_percent")]
2237    pub discount_percent: f64,
2238    /// Days within which discount must be taken
2239    #[serde(default = "default_discount_days")]
2240    pub discount_days: u32,
2241}
2242
2243fn default_discount_eligible_rate() -> f64 {
2244    0.30
2245}
2246
2247fn default_discount_taken_rate() -> f64 {
2248    0.60
2249}
2250
2251fn default_discount_percent() -> f64 {
2252    0.02
2253}
2254
2255fn default_discount_days() -> u32 {
2256    10
2257}
2258
2259impl Default for CashDiscountConfig {
2260    fn default() -> Self {
2261        Self {
2262            eligible_rate: default_discount_eligible_rate(),
2263            taken_rate: default_discount_taken_rate(),
2264            discount_percent: default_discount_percent(),
2265            discount_days: default_discount_days(),
2266        }
2267    }
2268}
2269
2270// ============================================================================
2271// Intercompany Configuration
2272// ============================================================================
2273
2274/// Intercompany transaction configuration.
2275#[derive(Debug, Clone, Serialize, Deserialize)]
2276pub struct IntercompanyConfig {
2277    /// Enable intercompany transaction generation
2278    #[serde(default)]
2279    pub enabled: bool,
2280    /// Rate of transactions that are intercompany
2281    #[serde(default = "default_ic_transaction_rate")]
2282    pub ic_transaction_rate: f64,
2283    /// Transfer pricing method
2284    #[serde(default)]
2285    pub transfer_pricing_method: TransferPricingMethod,
2286    /// Transfer pricing markup percentage (for cost-plus)
2287    #[serde(default = "default_markup_percent")]
2288    pub markup_percent: f64,
2289    /// Generate matched IC pairs (offsetting entries)
2290    #[serde(default = "default_true")]
2291    pub generate_matched_pairs: bool,
2292    /// IC transaction type distribution
2293    #[serde(default)]
2294    pub transaction_type_distribution: ICTransactionTypeDistribution,
2295    /// Generate elimination entries for consolidation
2296    #[serde(default)]
2297    pub generate_eliminations: bool,
2298}
2299
2300fn default_ic_transaction_rate() -> f64 {
2301    0.15
2302}
2303
2304fn default_markup_percent() -> f64 {
2305    0.05
2306}
2307
2308impl Default for IntercompanyConfig {
2309    fn default() -> Self {
2310        Self {
2311            enabled: false,
2312            ic_transaction_rate: default_ic_transaction_rate(),
2313            transfer_pricing_method: TransferPricingMethod::default(),
2314            markup_percent: default_markup_percent(),
2315            generate_matched_pairs: true,
2316            transaction_type_distribution: ICTransactionTypeDistribution::default(),
2317            generate_eliminations: false,
2318        }
2319    }
2320}
2321
2322/// Transfer pricing method.
2323#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2324#[serde(rename_all = "snake_case")]
2325pub enum TransferPricingMethod {
2326    /// Cost plus a markup
2327    #[default]
2328    CostPlus,
2329    /// Comparable uncontrolled price
2330    ComparableUncontrolled,
2331    /// Resale price method
2332    ResalePrice,
2333    /// Transactional net margin method
2334    TransactionalNetMargin,
2335    /// Profit split method
2336    ProfitSplit,
2337}
2338
2339/// IC transaction type distribution.
2340#[derive(Debug, Clone, Serialize, Deserialize)]
2341pub struct ICTransactionTypeDistribution {
2342    /// Goods sales between entities
2343    pub goods_sale: f64,
2344    /// Services provided
2345    pub service_provided: f64,
2346    /// Intercompany loans
2347    pub loan: f64,
2348    /// Dividends
2349    pub dividend: f64,
2350    /// Management fees
2351    pub management_fee: f64,
2352    /// Royalties
2353    pub royalty: f64,
2354    /// Cost sharing
2355    pub cost_sharing: f64,
2356}
2357
2358impl Default for ICTransactionTypeDistribution {
2359    fn default() -> Self {
2360        Self {
2361            goods_sale: 0.35,
2362            service_provided: 0.20,
2363            loan: 0.10,
2364            dividend: 0.05,
2365            management_fee: 0.15,
2366            royalty: 0.10,
2367            cost_sharing: 0.05,
2368        }
2369    }
2370}
2371
2372// ============================================================================
2373// Balance Configuration
2374// ============================================================================
2375
2376/// Balance and trial balance configuration.
2377#[derive(Debug, Clone, Serialize, Deserialize)]
2378pub struct BalanceConfig {
2379    /// Generate opening balances
2380    #[serde(default)]
2381    pub generate_opening_balances: bool,
2382    /// Generate trial balances
2383    #[serde(default = "default_true")]
2384    pub generate_trial_balances: bool,
2385    /// Target gross margin (for revenue/COGS coherence)
2386    #[serde(default = "default_gross_margin")]
2387    pub target_gross_margin: f64,
2388    /// Target DSO (Days Sales Outstanding)
2389    #[serde(default = "default_dso")]
2390    pub target_dso_days: u32,
2391    /// Target DPO (Days Payable Outstanding)
2392    #[serde(default = "default_dpo")]
2393    pub target_dpo_days: u32,
2394    /// Target current ratio
2395    #[serde(default = "default_current_ratio")]
2396    pub target_current_ratio: f64,
2397    /// Target debt-to-equity ratio
2398    #[serde(default = "default_debt_equity")]
2399    pub target_debt_to_equity: f64,
2400    /// Validate balance sheet equation (A = L + E)
2401    #[serde(default = "default_true")]
2402    pub validate_balance_equation: bool,
2403    /// Reconcile subledgers to GL control accounts
2404    #[serde(default = "default_true")]
2405    pub reconcile_subledgers: bool,
2406}
2407
2408fn default_gross_margin() -> f64 {
2409    0.35
2410}
2411
2412fn default_dso() -> u32 {
2413    45
2414}
2415
2416fn default_dpo() -> u32 {
2417    30
2418}
2419
2420fn default_current_ratio() -> f64 {
2421    1.5
2422}
2423
2424fn default_debt_equity() -> f64 {
2425    0.5
2426}
2427
2428impl Default for BalanceConfig {
2429    fn default() -> Self {
2430        Self {
2431            generate_opening_balances: false,
2432            generate_trial_balances: true,
2433            target_gross_margin: default_gross_margin(),
2434            target_dso_days: default_dso(),
2435            target_dpo_days: default_dpo(),
2436            target_current_ratio: default_current_ratio(),
2437            target_debt_to_equity: default_debt_equity(),
2438            validate_balance_equation: true,
2439            reconcile_subledgers: true,
2440        }
2441    }
2442}
2443
2444// ==========================================================================
2445// OCPM (Object-Centric Process Mining) Configuration
2446// ==========================================================================
2447
2448/// OCPM (Object-Centric Process Mining) configuration.
2449///
2450/// Controls generation of OCEL 2.0 compatible event logs with
2451/// many-to-many event-to-object relationships.
2452#[derive(Debug, Clone, Serialize, Deserialize)]
2453pub struct OcpmConfig {
2454    /// Enable OCPM event log generation
2455    #[serde(default)]
2456    pub enabled: bool,
2457
2458    /// Generate lifecycle events (Start/Complete pairs vs atomic events)
2459    #[serde(default = "default_true")]
2460    pub generate_lifecycle_events: bool,
2461
2462    /// Include object-to-object relationships in output
2463    #[serde(default = "default_true")]
2464    pub include_object_relationships: bool,
2465
2466    /// Compute and export process variants
2467    #[serde(default = "default_true")]
2468    pub compute_variants: bool,
2469
2470    /// Maximum variants to track (0 = unlimited)
2471    #[serde(default)]
2472    pub max_variants: usize,
2473
2474    /// P2P process configuration
2475    #[serde(default)]
2476    pub p2p_process: OcpmProcessConfig,
2477
2478    /// O2C process configuration
2479    #[serde(default)]
2480    pub o2c_process: OcpmProcessConfig,
2481
2482    /// Output format configuration
2483    #[serde(default)]
2484    pub output: OcpmOutputConfig,
2485}
2486
2487impl Default for OcpmConfig {
2488    fn default() -> Self {
2489        Self {
2490            enabled: false,
2491            generate_lifecycle_events: true,
2492            include_object_relationships: true,
2493            compute_variants: true,
2494            max_variants: 0,
2495            p2p_process: OcpmProcessConfig::default(),
2496            o2c_process: OcpmProcessConfig::default(),
2497            output: OcpmOutputConfig::default(),
2498        }
2499    }
2500}
2501
2502/// Process-specific OCPM configuration.
2503#[derive(Debug, Clone, Serialize, Deserialize)]
2504pub struct OcpmProcessConfig {
2505    /// Rework probability (0.0-1.0)
2506    #[serde(default = "default_rework_probability")]
2507    pub rework_probability: f64,
2508
2509    /// Skip step probability (0.0-1.0)
2510    #[serde(default = "default_skip_probability")]
2511    pub skip_step_probability: f64,
2512
2513    /// Out-of-order step probability (0.0-1.0)
2514    #[serde(default = "default_out_of_order_probability")]
2515    pub out_of_order_probability: f64,
2516}
2517
2518fn default_rework_probability() -> f64 {
2519    0.05
2520}
2521
2522fn default_skip_probability() -> f64 {
2523    0.02
2524}
2525
2526fn default_out_of_order_probability() -> f64 {
2527    0.03
2528}
2529
2530impl Default for OcpmProcessConfig {
2531    fn default() -> Self {
2532        Self {
2533            rework_probability: default_rework_probability(),
2534            skip_step_probability: default_skip_probability(),
2535            out_of_order_probability: default_out_of_order_probability(),
2536        }
2537    }
2538}
2539
2540/// OCPM output format configuration.
2541#[derive(Debug, Clone, Serialize, Deserialize)]
2542pub struct OcpmOutputConfig {
2543    /// Export OCEL 2.0 JSON format
2544    #[serde(default = "default_true")]
2545    pub ocel_json: bool,
2546
2547    /// Export OCEL 2.0 XML format
2548    #[serde(default)]
2549    pub ocel_xml: bool,
2550
2551    /// Export flattened CSV for each object type
2552    #[serde(default = "default_true")]
2553    pub flattened_csv: bool,
2554
2555    /// Export event-object relationship table
2556    #[serde(default = "default_true")]
2557    pub event_object_csv: bool,
2558
2559    /// Export object-object relationship table
2560    #[serde(default = "default_true")]
2561    pub object_relationship_csv: bool,
2562
2563    /// Export process variants summary
2564    #[serde(default = "default_true")]
2565    pub variants_csv: bool,
2566}
2567
2568impl Default for OcpmOutputConfig {
2569    fn default() -> Self {
2570        Self {
2571            ocel_json: true,
2572            ocel_xml: false,
2573            flattened_csv: true,
2574            event_object_csv: true,
2575            object_relationship_csv: true,
2576            variants_csv: true,
2577        }
2578    }
2579}
2580
2581/// Audit engagement and workpaper generation configuration.
2582#[derive(Debug, Clone, Serialize, Deserialize)]
2583pub struct AuditGenerationConfig {
2584    /// Enable audit engagement generation
2585    #[serde(default)]
2586    pub enabled: bool,
2587
2588    /// Generate engagement documents and workpapers
2589    #[serde(default = "default_true")]
2590    pub generate_workpapers: bool,
2591
2592    /// Default engagement type distribution
2593    #[serde(default)]
2594    pub engagement_types: AuditEngagementTypesConfig,
2595
2596    /// Workpaper configuration
2597    #[serde(default)]
2598    pub workpapers: WorkpaperConfig,
2599
2600    /// Team configuration
2601    #[serde(default)]
2602    pub team: AuditTeamConfig,
2603
2604    /// Review workflow configuration
2605    #[serde(default)]
2606    pub review: ReviewWorkflowConfig,
2607}
2608
2609impl Default for AuditGenerationConfig {
2610    fn default() -> Self {
2611        Self {
2612            enabled: false,
2613            generate_workpapers: true,
2614            engagement_types: AuditEngagementTypesConfig::default(),
2615            workpapers: WorkpaperConfig::default(),
2616            team: AuditTeamConfig::default(),
2617            review: ReviewWorkflowConfig::default(),
2618        }
2619    }
2620}
2621
2622/// Engagement type distribution configuration.
2623#[derive(Debug, Clone, Serialize, Deserialize)]
2624pub struct AuditEngagementTypesConfig {
2625    /// Financial statement audit probability
2626    #[serde(default = "default_financial_audit_prob")]
2627    pub financial_statement: f64,
2628    /// SOX/ICFR audit probability
2629    #[serde(default = "default_sox_audit_prob")]
2630    pub sox_icfr: f64,
2631    /// Integrated audit probability
2632    #[serde(default = "default_integrated_audit_prob")]
2633    pub integrated: f64,
2634    /// Review engagement probability
2635    #[serde(default = "default_review_prob")]
2636    pub review: f64,
2637    /// Agreed-upon procedures probability
2638    #[serde(default = "default_aup_prob")]
2639    pub agreed_upon_procedures: f64,
2640}
2641
2642fn default_financial_audit_prob() -> f64 {
2643    0.40
2644}
2645fn default_sox_audit_prob() -> f64 {
2646    0.20
2647}
2648fn default_integrated_audit_prob() -> f64 {
2649    0.25
2650}
2651fn default_review_prob() -> f64 {
2652    0.10
2653}
2654fn default_aup_prob() -> f64 {
2655    0.05
2656}
2657
2658impl Default for AuditEngagementTypesConfig {
2659    fn default() -> Self {
2660        Self {
2661            financial_statement: default_financial_audit_prob(),
2662            sox_icfr: default_sox_audit_prob(),
2663            integrated: default_integrated_audit_prob(),
2664            review: default_review_prob(),
2665            agreed_upon_procedures: default_aup_prob(),
2666        }
2667    }
2668}
2669
2670/// Workpaper generation configuration.
2671#[derive(Debug, Clone, Serialize, Deserialize)]
2672pub struct WorkpaperConfig {
2673    /// Average workpapers per engagement phase
2674    #[serde(default = "default_workpapers_per_phase")]
2675    pub average_per_phase: usize,
2676
2677    /// Include ISA compliance references
2678    #[serde(default = "default_true")]
2679    pub include_isa_references: bool,
2680
2681    /// Generate sample details
2682    #[serde(default = "default_true")]
2683    pub include_sample_details: bool,
2684
2685    /// Include cross-references between workpapers
2686    #[serde(default = "default_true")]
2687    pub include_cross_references: bool,
2688
2689    /// Sampling configuration
2690    #[serde(default)]
2691    pub sampling: SamplingConfig,
2692}
2693
2694fn default_workpapers_per_phase() -> usize {
2695    5
2696}
2697
2698impl Default for WorkpaperConfig {
2699    fn default() -> Self {
2700        Self {
2701            average_per_phase: default_workpapers_per_phase(),
2702            include_isa_references: true,
2703            include_sample_details: true,
2704            include_cross_references: true,
2705            sampling: SamplingConfig::default(),
2706        }
2707    }
2708}
2709
2710/// Sampling method configuration.
2711#[derive(Debug, Clone, Serialize, Deserialize)]
2712pub struct SamplingConfig {
2713    /// Statistical sampling rate (0.0-1.0)
2714    #[serde(default = "default_statistical_rate")]
2715    pub statistical_rate: f64,
2716    /// Judgmental sampling rate (0.0-1.0)
2717    #[serde(default = "default_judgmental_rate")]
2718    pub judgmental_rate: f64,
2719    /// Haphazard sampling rate (0.0-1.0)
2720    #[serde(default = "default_haphazard_rate")]
2721    pub haphazard_rate: f64,
2722    /// 100% examination rate (0.0-1.0)
2723    #[serde(default = "default_complete_examination_rate")]
2724    pub complete_examination_rate: f64,
2725}
2726
2727fn default_statistical_rate() -> f64 {
2728    0.40
2729}
2730fn default_judgmental_rate() -> f64 {
2731    0.30
2732}
2733fn default_haphazard_rate() -> f64 {
2734    0.20
2735}
2736fn default_complete_examination_rate() -> f64 {
2737    0.10
2738}
2739
2740impl Default for SamplingConfig {
2741    fn default() -> Self {
2742        Self {
2743            statistical_rate: default_statistical_rate(),
2744            judgmental_rate: default_judgmental_rate(),
2745            haphazard_rate: default_haphazard_rate(),
2746            complete_examination_rate: default_complete_examination_rate(),
2747        }
2748    }
2749}
2750
2751/// Audit team configuration.
2752#[derive(Debug, Clone, Serialize, Deserialize)]
2753pub struct AuditTeamConfig {
2754    /// Minimum team size
2755    #[serde(default = "default_min_team_size")]
2756    pub min_team_size: usize,
2757    /// Maximum team size
2758    #[serde(default = "default_max_team_size")]
2759    pub max_team_size: usize,
2760    /// Probability of having a specialist on the team
2761    #[serde(default = "default_specialist_probability")]
2762    pub specialist_probability: f64,
2763}
2764
2765fn default_min_team_size() -> usize {
2766    3
2767}
2768fn default_max_team_size() -> usize {
2769    8
2770}
2771fn default_specialist_probability() -> f64 {
2772    0.30
2773}
2774
2775impl Default for AuditTeamConfig {
2776    fn default() -> Self {
2777        Self {
2778            min_team_size: default_min_team_size(),
2779            max_team_size: default_max_team_size(),
2780            specialist_probability: default_specialist_probability(),
2781        }
2782    }
2783}
2784
2785/// Review workflow configuration.
2786#[derive(Debug, Clone, Serialize, Deserialize)]
2787pub struct ReviewWorkflowConfig {
2788    /// Average days between preparer completion and first review
2789    #[serde(default = "default_review_delay_days")]
2790    pub average_review_delay_days: u32,
2791    /// Probability of review notes requiring rework
2792    #[serde(default = "default_rework_probability_review")]
2793    pub rework_probability: f64,
2794    /// Require partner sign-off for all workpapers
2795    #[serde(default = "default_true")]
2796    pub require_partner_signoff: bool,
2797}
2798
2799fn default_review_delay_days() -> u32 {
2800    2
2801}
2802fn default_rework_probability_review() -> f64 {
2803    0.15
2804}
2805
2806impl Default for ReviewWorkflowConfig {
2807    fn default() -> Self {
2808        Self {
2809            average_review_delay_days: default_review_delay_days(),
2810            rework_probability: default_rework_probability_review(),
2811            require_partner_signoff: true,
2812        }
2813    }
2814}
2815
2816// =============================================================================
2817// Data Quality Configuration
2818// =============================================================================
2819
2820/// Data quality variation settings for realistic flakiness injection.
2821#[derive(Debug, Clone, Serialize, Deserialize)]
2822pub struct DataQualitySchemaConfig {
2823    /// Enable data quality variations
2824    #[serde(default)]
2825    pub enabled: bool,
2826    /// Preset to use (overrides individual settings if set)
2827    #[serde(default)]
2828    pub preset: DataQualityPreset,
2829    /// Missing value injection settings
2830    #[serde(default)]
2831    pub missing_values: MissingValuesSchemaConfig,
2832    /// Typo injection settings
2833    #[serde(default)]
2834    pub typos: TypoSchemaConfig,
2835    /// Format variation settings
2836    #[serde(default)]
2837    pub format_variations: FormatVariationSchemaConfig,
2838    /// Duplicate injection settings
2839    #[serde(default)]
2840    pub duplicates: DuplicateSchemaConfig,
2841    /// Encoding issue settings
2842    #[serde(default)]
2843    pub encoding_issues: EncodingIssueSchemaConfig,
2844    /// Generate quality issue labels for ML training
2845    #[serde(default)]
2846    pub generate_labels: bool,
2847    /// Per-sink quality profiles (different settings for CSV vs JSON etc.)
2848    #[serde(default)]
2849    pub sink_profiles: SinkQualityProfiles,
2850}
2851
2852impl Default for DataQualitySchemaConfig {
2853    fn default() -> Self {
2854        Self {
2855            enabled: false,
2856            preset: DataQualityPreset::None,
2857            missing_values: MissingValuesSchemaConfig::default(),
2858            typos: TypoSchemaConfig::default(),
2859            format_variations: FormatVariationSchemaConfig::default(),
2860            duplicates: DuplicateSchemaConfig::default(),
2861            encoding_issues: EncodingIssueSchemaConfig::default(),
2862            generate_labels: true,
2863            sink_profiles: SinkQualityProfiles::default(),
2864        }
2865    }
2866}
2867
2868/// Preset configurations for common data quality scenarios.
2869#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2870#[serde(rename_all = "snake_case")]
2871pub enum DataQualityPreset {
2872    /// No data quality variations (clean data)
2873    #[default]
2874    None,
2875    /// Minimal variations (very clean data with rare issues)
2876    Minimal,
2877    /// Normal variations (realistic enterprise data quality)
2878    Normal,
2879    /// High variations (messy data for stress testing)
2880    High,
2881    /// Custom (use individual settings)
2882    Custom,
2883}
2884
2885/// Missing value injection configuration.
2886#[derive(Debug, Clone, Serialize, Deserialize)]
2887pub struct MissingValuesSchemaConfig {
2888    /// Enable missing value injection
2889    #[serde(default)]
2890    pub enabled: bool,
2891    /// Global missing rate (0.0 to 1.0)
2892    #[serde(default = "default_missing_rate")]
2893    pub rate: f64,
2894    /// Missing value strategy
2895    #[serde(default)]
2896    pub strategy: MissingValueStrategy,
2897    /// Field-specific rates (field name -> rate)
2898    #[serde(default)]
2899    pub field_rates: std::collections::HashMap<String, f64>,
2900    /// Fields that should never have missing values
2901    #[serde(default)]
2902    pub protected_fields: Vec<String>,
2903}
2904
2905fn default_missing_rate() -> f64 {
2906    0.01
2907}
2908
2909impl Default for MissingValuesSchemaConfig {
2910    fn default() -> Self {
2911        Self {
2912            enabled: false,
2913            rate: default_missing_rate(),
2914            strategy: MissingValueStrategy::Mcar,
2915            field_rates: std::collections::HashMap::new(),
2916            protected_fields: vec![
2917                "document_id".to_string(),
2918                "company_code".to_string(),
2919                "posting_date".to_string(),
2920            ],
2921        }
2922    }
2923}
2924
2925/// Missing value strategy types.
2926#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2927#[serde(rename_all = "snake_case")]
2928pub enum MissingValueStrategy {
2929    /// Missing Completely At Random - equal probability for all values
2930    #[default]
2931    Mcar,
2932    /// Missing At Random - depends on other observed values
2933    Mar,
2934    /// Missing Not At Random - depends on the value itself
2935    Mnar,
2936    /// Systematic - entire field groups missing together
2937    Systematic,
2938}
2939
2940/// Typo injection configuration.
2941#[derive(Debug, Clone, Serialize, Deserialize)]
2942pub struct TypoSchemaConfig {
2943    /// Enable typo injection
2944    #[serde(default)]
2945    pub enabled: bool,
2946    /// Character error rate (per character, not per field)
2947    #[serde(default = "default_typo_rate")]
2948    pub char_error_rate: f64,
2949    /// Typo type weights
2950    #[serde(default)]
2951    pub type_weights: TypoTypeWeights,
2952    /// Fields that should never have typos
2953    #[serde(default)]
2954    pub protected_fields: Vec<String>,
2955}
2956
2957fn default_typo_rate() -> f64 {
2958    0.001
2959}
2960
2961impl Default for TypoSchemaConfig {
2962    fn default() -> Self {
2963        Self {
2964            enabled: false,
2965            char_error_rate: default_typo_rate(),
2966            type_weights: TypoTypeWeights::default(),
2967            protected_fields: vec![
2968                "document_id".to_string(),
2969                "gl_account".to_string(),
2970                "company_code".to_string(),
2971            ],
2972        }
2973    }
2974}
2975
2976/// Weights for different typo types.
2977#[derive(Debug, Clone, Serialize, Deserialize)]
2978pub struct TypoTypeWeights {
2979    /// Keyboard-adjacent substitution (e.g., 'a' -> 's')
2980    #[serde(default = "default_substitution_weight")]
2981    pub substitution: f64,
2982    /// Adjacent character transposition (e.g., 'ab' -> 'ba')
2983    #[serde(default = "default_transposition_weight")]
2984    pub transposition: f64,
2985    /// Character insertion
2986    #[serde(default = "default_insertion_weight")]
2987    pub insertion: f64,
2988    /// Character deletion
2989    #[serde(default = "default_deletion_weight")]
2990    pub deletion: f64,
2991    /// OCR-style errors (e.g., '0' -> 'O')
2992    #[serde(default = "default_ocr_weight")]
2993    pub ocr_errors: f64,
2994    /// Homophone substitution (e.g., 'their' -> 'there')
2995    #[serde(default = "default_homophone_weight")]
2996    pub homophones: f64,
2997}
2998
2999fn default_substitution_weight() -> f64 {
3000    0.35
3001}
3002fn default_transposition_weight() -> f64 {
3003    0.25
3004}
3005fn default_insertion_weight() -> f64 {
3006    0.10
3007}
3008fn default_deletion_weight() -> f64 {
3009    0.15
3010}
3011fn default_ocr_weight() -> f64 {
3012    0.10
3013}
3014fn default_homophone_weight() -> f64 {
3015    0.05
3016}
3017
3018impl Default for TypoTypeWeights {
3019    fn default() -> Self {
3020        Self {
3021            substitution: default_substitution_weight(),
3022            transposition: default_transposition_weight(),
3023            insertion: default_insertion_weight(),
3024            deletion: default_deletion_weight(),
3025            ocr_errors: default_ocr_weight(),
3026            homophones: default_homophone_weight(),
3027        }
3028    }
3029}
3030
3031/// Format variation configuration.
3032#[derive(Debug, Clone, Serialize, Deserialize, Default)]
3033pub struct FormatVariationSchemaConfig {
3034    /// Enable format variations
3035    #[serde(default)]
3036    pub enabled: bool,
3037    /// Date format variation settings
3038    #[serde(default)]
3039    pub dates: DateFormatVariationConfig,
3040    /// Amount format variation settings
3041    #[serde(default)]
3042    pub amounts: AmountFormatVariationConfig,
3043    /// Identifier format variation settings
3044    #[serde(default)]
3045    pub identifiers: IdentifierFormatVariationConfig,
3046}
3047
3048/// Date format variation configuration.
3049#[derive(Debug, Clone, Serialize, Deserialize)]
3050pub struct DateFormatVariationConfig {
3051    /// Enable date format variations
3052    #[serde(default)]
3053    pub enabled: bool,
3054    /// Overall variation rate
3055    #[serde(default = "default_date_variation_rate")]
3056    pub rate: f64,
3057    /// Include ISO format (2024-01-15)
3058    #[serde(default = "default_true")]
3059    pub iso_format: bool,
3060    /// Include US format (01/15/2024)
3061    #[serde(default)]
3062    pub us_format: bool,
3063    /// Include EU format (15.01.2024)
3064    #[serde(default)]
3065    pub eu_format: bool,
3066    /// Include long format (January 15, 2024)
3067    #[serde(default)]
3068    pub long_format: bool,
3069}
3070
3071fn default_date_variation_rate() -> f64 {
3072    0.05
3073}
3074
3075impl Default for DateFormatVariationConfig {
3076    fn default() -> Self {
3077        Self {
3078            enabled: false,
3079            rate: default_date_variation_rate(),
3080            iso_format: true,
3081            us_format: false,
3082            eu_format: false,
3083            long_format: false,
3084        }
3085    }
3086}
3087
3088/// Amount format variation configuration.
3089#[derive(Debug, Clone, Serialize, Deserialize)]
3090pub struct AmountFormatVariationConfig {
3091    /// Enable amount format variations
3092    #[serde(default)]
3093    pub enabled: bool,
3094    /// Overall variation rate
3095    #[serde(default = "default_amount_variation_rate")]
3096    pub rate: f64,
3097    /// Include US comma format (1,234.56)
3098    #[serde(default)]
3099    pub us_comma_format: bool,
3100    /// Include EU format (1.234,56)
3101    #[serde(default)]
3102    pub eu_format: bool,
3103    /// Include currency prefix ($1,234.56)
3104    #[serde(default)]
3105    pub currency_prefix: bool,
3106    /// Include accounting format with parentheses for negatives
3107    #[serde(default)]
3108    pub accounting_format: bool,
3109}
3110
3111fn default_amount_variation_rate() -> f64 {
3112    0.02
3113}
3114
3115impl Default for AmountFormatVariationConfig {
3116    fn default() -> Self {
3117        Self {
3118            enabled: false,
3119            rate: default_amount_variation_rate(),
3120            us_comma_format: false,
3121            eu_format: false,
3122            currency_prefix: false,
3123            accounting_format: false,
3124        }
3125    }
3126}
3127
3128/// Identifier format variation configuration.
3129#[derive(Debug, Clone, Serialize, Deserialize)]
3130pub struct IdentifierFormatVariationConfig {
3131    /// Enable identifier format variations
3132    #[serde(default)]
3133    pub enabled: bool,
3134    /// Overall variation rate
3135    #[serde(default = "default_identifier_variation_rate")]
3136    pub rate: f64,
3137    /// Case variations (uppercase, lowercase, mixed)
3138    #[serde(default)]
3139    pub case_variations: bool,
3140    /// Padding variations (leading zeros)
3141    #[serde(default)]
3142    pub padding_variations: bool,
3143    /// Separator variations (dash vs underscore)
3144    #[serde(default)]
3145    pub separator_variations: bool,
3146}
3147
3148fn default_identifier_variation_rate() -> f64 {
3149    0.02
3150}
3151
3152impl Default for IdentifierFormatVariationConfig {
3153    fn default() -> Self {
3154        Self {
3155            enabled: false,
3156            rate: default_identifier_variation_rate(),
3157            case_variations: false,
3158            padding_variations: false,
3159            separator_variations: false,
3160        }
3161    }
3162}
3163
3164/// Duplicate injection configuration.
3165#[derive(Debug, Clone, Serialize, Deserialize)]
3166pub struct DuplicateSchemaConfig {
3167    /// Enable duplicate injection
3168    #[serde(default)]
3169    pub enabled: bool,
3170    /// Overall duplicate rate
3171    #[serde(default = "default_duplicate_rate")]
3172    pub rate: f64,
3173    /// Exact duplicate proportion (out of duplicates)
3174    #[serde(default = "default_exact_duplicate_ratio")]
3175    pub exact_duplicate_ratio: f64,
3176    /// Near duplicate proportion (slight variations)
3177    #[serde(default = "default_near_duplicate_ratio")]
3178    pub near_duplicate_ratio: f64,
3179    /// Fuzzy duplicate proportion (typos in key fields)
3180    #[serde(default = "default_fuzzy_duplicate_ratio")]
3181    pub fuzzy_duplicate_ratio: f64,
3182    /// Maximum date offset for near/fuzzy duplicates (days)
3183    #[serde(default = "default_max_date_offset")]
3184    pub max_date_offset_days: u32,
3185    /// Maximum amount variance for near duplicates (fraction)
3186    #[serde(default = "default_max_amount_variance")]
3187    pub max_amount_variance: f64,
3188}
3189
3190fn default_duplicate_rate() -> f64 {
3191    0.005
3192}
3193fn default_exact_duplicate_ratio() -> f64 {
3194    0.4
3195}
3196fn default_near_duplicate_ratio() -> f64 {
3197    0.35
3198}
3199fn default_fuzzy_duplicate_ratio() -> f64 {
3200    0.25
3201}
3202fn default_max_date_offset() -> u32 {
3203    3
3204}
3205fn default_max_amount_variance() -> f64 {
3206    0.01
3207}
3208
3209impl Default for DuplicateSchemaConfig {
3210    fn default() -> Self {
3211        Self {
3212            enabled: false,
3213            rate: default_duplicate_rate(),
3214            exact_duplicate_ratio: default_exact_duplicate_ratio(),
3215            near_duplicate_ratio: default_near_duplicate_ratio(),
3216            fuzzy_duplicate_ratio: default_fuzzy_duplicate_ratio(),
3217            max_date_offset_days: default_max_date_offset(),
3218            max_amount_variance: default_max_amount_variance(),
3219        }
3220    }
3221}
3222
3223/// Encoding issue configuration.
3224#[derive(Debug, Clone, Serialize, Deserialize)]
3225pub struct EncodingIssueSchemaConfig {
3226    /// Enable encoding issue injection
3227    #[serde(default)]
3228    pub enabled: bool,
3229    /// Overall encoding issue rate
3230    #[serde(default = "default_encoding_rate")]
3231    pub rate: f64,
3232    /// Include mojibake (UTF-8/Latin-1 confusion)
3233    #[serde(default)]
3234    pub mojibake: bool,
3235    /// Include HTML entity corruption
3236    #[serde(default)]
3237    pub html_entities: bool,
3238    /// Include BOM issues
3239    #[serde(default)]
3240    pub bom_issues: bool,
3241}
3242
3243fn default_encoding_rate() -> f64 {
3244    0.001
3245}
3246
3247impl Default for EncodingIssueSchemaConfig {
3248    fn default() -> Self {
3249        Self {
3250            enabled: false,
3251            rate: default_encoding_rate(),
3252            mojibake: false,
3253            html_entities: false,
3254            bom_issues: false,
3255        }
3256    }
3257}
3258
3259/// Per-sink quality profiles for different output formats.
3260#[derive(Debug, Clone, Serialize, Deserialize, Default)]
3261pub struct SinkQualityProfiles {
3262    /// CSV-specific quality settings
3263    #[serde(default)]
3264    pub csv: Option<SinkQualityOverride>,
3265    /// JSON-specific quality settings
3266    #[serde(default)]
3267    pub json: Option<SinkQualityOverride>,
3268    /// Parquet-specific quality settings
3269    #[serde(default)]
3270    pub parquet: Option<SinkQualityOverride>,
3271}
3272
3273/// Quality setting overrides for a specific sink type.
3274#[derive(Debug, Clone, Serialize, Deserialize)]
3275pub struct SinkQualityOverride {
3276    /// Override enabled state
3277    pub enabled: Option<bool>,
3278    /// Override missing value rate
3279    pub missing_rate: Option<f64>,
3280    /// Override typo rate
3281    pub typo_rate: Option<f64>,
3282    /// Override format variation rate
3283    pub format_variation_rate: Option<f64>,
3284    /// Override duplicate rate
3285    pub duplicate_rate: Option<f64>,
3286}
3287
3288#[cfg(test)]
3289mod tests {
3290    use super::*;
3291    use crate::presets::demo_preset;
3292
3293    // ==========================================================================
3294    // Serialization/Deserialization Tests
3295    // ==========================================================================
3296
3297    #[test]
3298    fn test_config_yaml_roundtrip() {
3299        let config = demo_preset();
3300        let yaml = serde_yaml::to_string(&config).expect("Failed to serialize to YAML");
3301        let deserialized: GeneratorConfig =
3302            serde_yaml::from_str(&yaml).expect("Failed to deserialize from YAML");
3303
3304        assert_eq!(
3305            config.global.period_months,
3306            deserialized.global.period_months
3307        );
3308        assert_eq!(config.global.industry, deserialized.global.industry);
3309        assert_eq!(config.companies.len(), deserialized.companies.len());
3310        assert_eq!(config.companies[0].code, deserialized.companies[0].code);
3311    }
3312
3313    #[test]
3314    fn test_config_json_roundtrip() {
3315        // Create a config without infinity values (JSON can't serialize f64::INFINITY)
3316        let mut config = demo_preset();
3317        // Replace infinity with a large but finite value for JSON compatibility
3318        config.master_data.employees.approval_limits.executive = 1e12;
3319
3320        let json = serde_json::to_string(&config).expect("Failed to serialize to JSON");
3321        let deserialized: GeneratorConfig =
3322            serde_json::from_str(&json).expect("Failed to deserialize from JSON");
3323
3324        assert_eq!(
3325            config.global.period_months,
3326            deserialized.global.period_months
3327        );
3328        assert_eq!(config.global.industry, deserialized.global.industry);
3329        assert_eq!(config.companies.len(), deserialized.companies.len());
3330    }
3331
3332    #[test]
3333    fn test_transaction_volume_serialization() {
3334        // Test various transaction volumes serialize correctly
3335        let volumes = vec![
3336            (TransactionVolume::TenK, "ten_k"),
3337            (TransactionVolume::HundredK, "hundred_k"),
3338            (TransactionVolume::OneM, "one_m"),
3339            (TransactionVolume::TenM, "ten_m"),
3340            (TransactionVolume::HundredM, "hundred_m"),
3341        ];
3342
3343        for (volume, expected_key) in volumes {
3344            let json = serde_json::to_string(&volume).expect("Failed to serialize");
3345            assert!(
3346                json.contains(expected_key),
3347                "Expected {} in JSON: {}",
3348                expected_key,
3349                json
3350            );
3351        }
3352    }
3353
3354    #[test]
3355    fn test_transaction_volume_custom_serialization() {
3356        let volume = TransactionVolume::Custom(12345);
3357        let json = serde_json::to_string(&volume).expect("Failed to serialize");
3358        let deserialized: TransactionVolume =
3359            serde_json::from_str(&json).expect("Failed to deserialize");
3360        assert_eq!(deserialized.count(), 12345);
3361    }
3362
3363    #[test]
3364    fn test_output_mode_serialization() {
3365        let modes = vec![
3366            OutputMode::Streaming,
3367            OutputMode::FlatFile,
3368            OutputMode::Both,
3369        ];
3370
3371        for mode in modes {
3372            let json = serde_json::to_string(&mode).expect("Failed to serialize");
3373            let deserialized: OutputMode =
3374                serde_json::from_str(&json).expect("Failed to deserialize");
3375            assert!(format!("{:?}", mode) == format!("{:?}", deserialized));
3376        }
3377    }
3378
3379    #[test]
3380    fn test_file_format_serialization() {
3381        let formats = vec![
3382            FileFormat::Csv,
3383            FileFormat::Parquet,
3384            FileFormat::Json,
3385            FileFormat::JsonLines,
3386        ];
3387
3388        for format in formats {
3389            let json = serde_json::to_string(&format).expect("Failed to serialize");
3390            let deserialized: FileFormat =
3391                serde_json::from_str(&json).expect("Failed to deserialize");
3392            assert!(format!("{:?}", format) == format!("{:?}", deserialized));
3393        }
3394    }
3395
3396    #[test]
3397    fn test_compression_algorithm_serialization() {
3398        let algos = vec![
3399            CompressionAlgorithm::Gzip,
3400            CompressionAlgorithm::Zstd,
3401            CompressionAlgorithm::Lz4,
3402            CompressionAlgorithm::Snappy,
3403        ];
3404
3405        for algo in algos {
3406            let json = serde_json::to_string(&algo).expect("Failed to serialize");
3407            let deserialized: CompressionAlgorithm =
3408                serde_json::from_str(&json).expect("Failed to deserialize");
3409            assert!(format!("{:?}", algo) == format!("{:?}", deserialized));
3410        }
3411    }
3412
3413    #[test]
3414    fn test_transfer_pricing_method_serialization() {
3415        let methods = vec![
3416            TransferPricingMethod::CostPlus,
3417            TransferPricingMethod::ComparableUncontrolled,
3418            TransferPricingMethod::ResalePrice,
3419            TransferPricingMethod::TransactionalNetMargin,
3420            TransferPricingMethod::ProfitSplit,
3421        ];
3422
3423        for method in methods {
3424            let json = serde_json::to_string(&method).expect("Failed to serialize");
3425            let deserialized: TransferPricingMethod =
3426                serde_json::from_str(&json).expect("Failed to deserialize");
3427            assert!(format!("{:?}", method) == format!("{:?}", deserialized));
3428        }
3429    }
3430
3431    #[test]
3432    fn test_benford_exemption_serialization() {
3433        let exemptions = vec![
3434            BenfordExemption::Recurring,
3435            BenfordExemption::Payroll,
3436            BenfordExemption::FixedFees,
3437            BenfordExemption::RoundAmounts,
3438        ];
3439
3440        for exemption in exemptions {
3441            let json = serde_json::to_string(&exemption).expect("Failed to serialize");
3442            let deserialized: BenfordExemption =
3443                serde_json::from_str(&json).expect("Failed to deserialize");
3444            assert!(format!("{:?}", exemption) == format!("{:?}", deserialized));
3445        }
3446    }
3447
3448    // ==========================================================================
3449    // Default Value Tests
3450    // ==========================================================================
3451
3452    #[test]
3453    fn test_global_config_defaults() {
3454        let yaml = r#"
3455            industry: manufacturing
3456            start_date: "2024-01-01"
3457            period_months: 6
3458        "#;
3459        let config: GlobalConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
3460        assert_eq!(config.group_currency, "USD");
3461        assert!(config.parallel);
3462        assert_eq!(config.worker_threads, 0);
3463        assert_eq!(config.memory_limit_mb, 0);
3464    }
3465
3466    #[test]
3467    fn test_fraud_config_defaults() {
3468        let config = FraudConfig::default();
3469        assert!(!config.enabled);
3470        assert_eq!(config.fraud_rate, 0.005);
3471        assert!(!config.clustering_enabled);
3472    }
3473
3474    #[test]
3475    fn test_internal_controls_config_defaults() {
3476        let config = InternalControlsConfig::default();
3477        assert!(!config.enabled);
3478        assert_eq!(config.exception_rate, 0.02);
3479        assert_eq!(config.sod_violation_rate, 0.01);
3480        assert!(config.export_control_master_data);
3481        assert_eq!(config.sox_materiality_threshold, 10000.0);
3482    }
3483
3484    #[test]
3485    fn test_output_config_defaults() {
3486        let config = OutputConfig::default();
3487        assert!(matches!(config.mode, OutputMode::FlatFile));
3488        assert_eq!(config.formats, vec![FileFormat::Parquet]);
3489        assert!(config.compression.enabled);
3490        assert!(matches!(
3491            config.compression.algorithm,
3492            CompressionAlgorithm::Zstd
3493        ));
3494        assert!(config.include_acdoca);
3495        assert!(!config.include_bseg);
3496        assert!(config.partition_by_period);
3497        assert!(!config.partition_by_company);
3498    }
3499
3500    #[test]
3501    fn test_approval_config_defaults() {
3502        let config = ApprovalConfig::default();
3503        assert!(!config.enabled);
3504        assert_eq!(config.auto_approve_threshold, 1000.0);
3505        assert_eq!(config.rejection_rate, 0.02);
3506        assert_eq!(config.revision_rate, 0.05);
3507        assert_eq!(config.average_approval_delay_hours, 4.0);
3508        assert_eq!(config.thresholds.len(), 4);
3509    }
3510
3511    #[test]
3512    fn test_p2p_flow_config_defaults() {
3513        let config = P2PFlowConfig::default();
3514        assert!(config.enabled);
3515        assert_eq!(config.three_way_match_rate, 0.95);
3516        assert_eq!(config.partial_delivery_rate, 0.15);
3517        assert_eq!(config.average_po_to_gr_days, 14);
3518    }
3519
3520    #[test]
3521    fn test_o2c_flow_config_defaults() {
3522        let config = O2CFlowConfig::default();
3523        assert!(config.enabled);
3524        assert_eq!(config.credit_check_failure_rate, 0.02);
3525        assert_eq!(config.return_rate, 0.03);
3526        assert_eq!(config.bad_debt_rate, 0.01);
3527    }
3528
3529    #[test]
3530    fn test_balance_config_defaults() {
3531        let config = BalanceConfig::default();
3532        assert!(!config.generate_opening_balances);
3533        assert!(config.generate_trial_balances);
3534        assert_eq!(config.target_gross_margin, 0.35);
3535        assert!(config.validate_balance_equation);
3536        assert!(config.reconcile_subledgers);
3537    }
3538
3539    // ==========================================================================
3540    // Partial Config Deserialization Tests
3541    // ==========================================================================
3542
3543    #[test]
3544    fn test_partial_config_with_defaults() {
3545        // Minimal config that should use all defaults
3546        let yaml = r#"
3547            global:
3548              industry: manufacturing
3549              start_date: "2024-01-01"
3550              period_months: 3
3551            companies:
3552              - code: "TEST"
3553                name: "Test Company"
3554                currency: "USD"
3555                country: "US"
3556                annual_transaction_volume: ten_k
3557            chart_of_accounts:
3558              complexity: small
3559            output:
3560              output_directory: "./output"
3561        "#;
3562
3563        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
3564        assert_eq!(config.global.period_months, 3);
3565        assert_eq!(config.companies.len(), 1);
3566        assert!(!config.fraud.enabled); // Default
3567        assert!(!config.internal_controls.enabled); // Default
3568    }
3569
3570    #[test]
3571    fn test_config_with_fraud_enabled() {
3572        let yaml = r#"
3573            global:
3574              industry: retail
3575              start_date: "2024-01-01"
3576              period_months: 12
3577            companies:
3578              - code: "RETAIL"
3579                name: "Retail Co"
3580                currency: "USD"
3581                country: "US"
3582                annual_transaction_volume: hundred_k
3583            chart_of_accounts:
3584              complexity: medium
3585            output:
3586              output_directory: "./output"
3587            fraud:
3588              enabled: true
3589              fraud_rate: 0.05
3590              clustering_enabled: true
3591        "#;
3592
3593        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
3594        assert!(config.fraud.enabled);
3595        assert_eq!(config.fraud.fraud_rate, 0.05);
3596        assert!(config.fraud.clustering_enabled);
3597    }
3598
3599    #[test]
3600    fn test_config_with_multiple_companies() {
3601        let yaml = r#"
3602            global:
3603              industry: manufacturing
3604              start_date: "2024-01-01"
3605              period_months: 6
3606            companies:
3607              - code: "HQ"
3608                name: "Headquarters"
3609                currency: "USD"
3610                country: "US"
3611                annual_transaction_volume: hundred_k
3612                volume_weight: 1.0
3613              - code: "EU"
3614                name: "European Subsidiary"
3615                currency: "EUR"
3616                country: "DE"
3617                annual_transaction_volume: hundred_k
3618                volume_weight: 0.5
3619              - code: "APAC"
3620                name: "Asia Pacific"
3621                currency: "JPY"
3622                country: "JP"
3623                annual_transaction_volume: ten_k
3624                volume_weight: 0.3
3625            chart_of_accounts:
3626              complexity: large
3627            output:
3628              output_directory: "./output"
3629        "#;
3630
3631        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
3632        assert_eq!(config.companies.len(), 3);
3633        assert_eq!(config.companies[0].code, "HQ");
3634        assert_eq!(config.companies[1].currency, "EUR");
3635        assert_eq!(config.companies[2].volume_weight, 0.3);
3636    }
3637
3638    #[test]
3639    fn test_intercompany_config() {
3640        let yaml = r#"
3641            enabled: true
3642            ic_transaction_rate: 0.20
3643            transfer_pricing_method: cost_plus
3644            markup_percent: 0.08
3645            generate_matched_pairs: true
3646            generate_eliminations: true
3647        "#;
3648
3649        let config: IntercompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
3650        assert!(config.enabled);
3651        assert_eq!(config.ic_transaction_rate, 0.20);
3652        assert!(matches!(
3653            config.transfer_pricing_method,
3654            TransferPricingMethod::CostPlus
3655        ));
3656        assert_eq!(config.markup_percent, 0.08);
3657        assert!(config.generate_eliminations);
3658    }
3659
3660    // ==========================================================================
3661    // Company Config Tests
3662    // ==========================================================================
3663
3664    #[test]
3665    fn test_company_config_defaults() {
3666        let yaml = r#"
3667            code: "TEST"
3668            name: "Test Company"
3669            currency: "USD"
3670            country: "US"
3671            annual_transaction_volume: ten_k
3672        "#;
3673
3674        let config: CompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
3675        assert_eq!(config.fiscal_year_variant, "K4"); // Default
3676        assert_eq!(config.volume_weight, 1.0); // Default
3677    }
3678
3679    // ==========================================================================
3680    // Chart of Accounts Config Tests
3681    // ==========================================================================
3682
3683    #[test]
3684    fn test_coa_config_defaults() {
3685        let yaml = r#"
3686            complexity: medium
3687        "#;
3688
3689        let config: ChartOfAccountsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
3690        assert!(config.industry_specific); // Default true
3691        assert!(config.custom_accounts.is_none());
3692        assert_eq!(config.min_hierarchy_depth, 2); // Default
3693        assert_eq!(config.max_hierarchy_depth, 5); // Default
3694    }
3695}