Skip to main content

datasynth_config/
schema.rs

1//! Configuration schema for synthetic data generation.
2
3use datasynth_core::distributions::{
4    AmountDistributionConfig, DebitCreditDistributionConfig, EvenOddDistributionConfig,
5    LineItemDistributionConfig, SeasonalityConfig,
6};
7use datasynth_core::models::{CoAComplexity, IndustrySector};
8use serde::{Deserialize, Serialize};
9use std::path::PathBuf;
10
11/// Root configuration for the synthetic data generator.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct GeneratorConfig {
14    /// Global settings
15    pub global: GlobalConfig,
16    /// Company configuration
17    pub companies: Vec<CompanyConfig>,
18    /// Chart of Accounts configuration
19    pub chart_of_accounts: ChartOfAccountsConfig,
20    /// Transaction generation settings
21    #[serde(default)]
22    pub transactions: TransactionConfig,
23    /// Output configuration
24    pub output: OutputConfig,
25    /// Fraud simulation settings
26    #[serde(default)]
27    pub fraud: FraudConfig,
28    /// Data quality variation settings
29    #[serde(default)]
30    pub data_quality: DataQualitySchemaConfig,
31    /// Internal Controls System settings
32    #[serde(default)]
33    pub internal_controls: InternalControlsConfig,
34    /// Business process mix
35    #[serde(default)]
36    pub business_processes: BusinessProcessConfig,
37    /// User persona distribution
38    #[serde(default)]
39    pub user_personas: UserPersonaConfig,
40    /// Template configuration for realistic data
41    #[serde(default)]
42    pub templates: TemplateConfig,
43    /// Approval workflow configuration
44    #[serde(default)]
45    pub approval: ApprovalConfig,
46    /// Department structure configuration
47    #[serde(default)]
48    pub departments: DepartmentConfig,
49    /// Master data generation settings
50    #[serde(default)]
51    pub master_data: MasterDataConfig,
52    /// Document flow generation settings
53    #[serde(default)]
54    pub document_flows: DocumentFlowConfig,
55    /// Intercompany transaction settings
56    #[serde(default)]
57    pub intercompany: IntercompanyConfig,
58    /// Balance and trial balance settings
59    #[serde(default)]
60    pub balance: BalanceConfig,
61    /// OCPM (Object-Centric Process Mining) settings
62    #[serde(default)]
63    pub ocpm: OcpmConfig,
64    /// Audit engagement and workpaper generation settings
65    #[serde(default)]
66    pub audit: AuditGenerationConfig,
67    /// Banking KYC/AML transaction generation settings
68    #[serde(default)]
69    pub banking: datasynth_banking::BankingConfig,
70    /// Scenario configuration for metadata and tagging (Phase 1.3)
71    #[serde(default)]
72    pub scenario: ScenarioConfig,
73    /// Temporal drift configuration for simulating distribution changes over time (Phase 2.2)
74    #[serde(default)]
75    pub temporal: TemporalDriftConfig,
76    /// Graph export configuration for accounting network export
77    #[serde(default)]
78    pub graph_export: GraphExportConfig,
79    /// Streaming output API configuration
80    #[serde(default)]
81    pub streaming: StreamingSchemaConfig,
82    /// Rate limiting configuration
83    #[serde(default)]
84    pub rate_limit: RateLimitSchemaConfig,
85    /// Temporal attribute generation configuration
86    #[serde(default)]
87    pub temporal_attributes: TemporalAttributeSchemaConfig,
88    /// Relationship generation configuration
89    #[serde(default)]
90    pub relationships: RelationshipSchemaConfig,
91}
92
93/// Graph export configuration for accounting network and ML training exports.
94///
95/// This section enables exporting generated data as graphs for:
96/// - Network reconstruction algorithms
97/// - Graph neural network training
98/// - Neo4j graph database import
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct GraphExportConfig {
101    /// Enable graph export.
102    #[serde(default)]
103    pub enabled: bool,
104
105    /// Graph types to generate.
106    #[serde(default = "default_graph_types")]
107    pub graph_types: Vec<GraphTypeConfig>,
108
109    /// Export formats to generate.
110    #[serde(default = "default_graph_formats")]
111    pub formats: Vec<GraphExportFormat>,
112
113    /// Train split ratio for ML datasets.
114    #[serde(default = "default_train_ratio")]
115    pub train_ratio: f64,
116
117    /// Validation split ratio for ML datasets.
118    #[serde(default = "default_val_ratio")]
119    pub validation_ratio: f64,
120
121    /// Random seed for train/val/test splits.
122    #[serde(default)]
123    pub split_seed: Option<u64>,
124
125    /// Output subdirectory for graph exports (relative to output directory).
126    #[serde(default = "default_graph_subdir")]
127    pub output_subdirectory: String,
128}
129
130fn default_graph_types() -> Vec<GraphTypeConfig> {
131    vec![GraphTypeConfig::default()]
132}
133
134fn default_graph_formats() -> Vec<GraphExportFormat> {
135    vec![GraphExportFormat::PytorchGeometric]
136}
137
138fn default_train_ratio() -> f64 {
139    0.7
140}
141
142fn default_val_ratio() -> f64 {
143    0.15
144}
145
146fn default_graph_subdir() -> String {
147    "graphs".to_string()
148}
149
150impl Default for GraphExportConfig {
151    fn default() -> Self {
152        Self {
153            enabled: false,
154            graph_types: default_graph_types(),
155            formats: default_graph_formats(),
156            train_ratio: 0.7,
157            validation_ratio: 0.15,
158            split_seed: None,
159            output_subdirectory: "graphs".to_string(),
160        }
161    }
162}
163
164/// Configuration for a specific graph type to export.
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct GraphTypeConfig {
167    /// Name identifier for this graph configuration.
168    #[serde(default = "default_graph_name")]
169    pub name: String,
170
171    /// Whether to aggregate parallel edges between the same nodes.
172    #[serde(default)]
173    pub aggregate_edges: bool,
174
175    /// Minimum edge weight to include (filters out small transactions).
176    #[serde(default)]
177    pub min_edge_weight: f64,
178
179    /// Whether to include document nodes (creates hub-and-spoke structure).
180    #[serde(default)]
181    pub include_document_nodes: bool,
182}
183
184fn default_graph_name() -> String {
185    "accounting_network".to_string()
186}
187
188impl Default for GraphTypeConfig {
189    fn default() -> Self {
190        Self {
191            name: "accounting_network".to_string(),
192            aggregate_edges: false,
193            min_edge_weight: 0.0,
194            include_document_nodes: false,
195        }
196    }
197}
198
199/// Export format for graph data.
200#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
201#[serde(rename_all = "snake_case")]
202pub enum GraphExportFormat {
203    /// PyTorch Geometric format (.npy files + metadata.json).
204    PytorchGeometric,
205    /// Neo4j format (CSV files + Cypher import scripts).
206    Neo4j,
207    /// Deep Graph Library format.
208    Dgl,
209    /// RustGraph/RustAssureTwin JSON format.
210    RustGraph,
211}
212
213/// Scenario configuration for metadata, tagging, and ML training setup.
214///
215/// This section enables tracking the purpose and characteristics of a generation run.
216#[derive(Debug, Clone, Default, Serialize, Deserialize)]
217pub struct ScenarioConfig {
218    /// Tags for categorizing and filtering datasets.
219    /// Examples: "fraud_detection", "retail", "month_end_stress", "ml_training"
220    #[serde(default)]
221    pub tags: Vec<String>,
222
223    /// Data quality profile preset.
224    /// - "clean": Minimal data quality issues (0.1% missing, 0.05% typos)
225    /// - "noisy": Moderate issues (5% missing, 2% typos, 1% duplicates)
226    /// - "legacy": Heavy issues simulating legacy system data (10% missing, 5% typos)
227    #[serde(default)]
228    pub profile: Option<String>,
229
230    /// Human-readable description of the scenario purpose.
231    #[serde(default)]
232    pub description: Option<String>,
233
234    /// Whether this run is for ML training (enables balanced labeling).
235    #[serde(default)]
236    pub ml_training: bool,
237
238    /// Target anomaly class balance for ML training.
239    /// If set, anomalies will be injected to achieve this ratio.
240    #[serde(default)]
241    pub target_anomaly_ratio: Option<f64>,
242
243    /// Custom metadata key-value pairs.
244    #[serde(default)]
245    pub metadata: std::collections::HashMap<String, String>,
246}
247
248/// Temporal drift configuration for simulating distribution changes over time.
249///
250/// This enables generation of data that shows realistic temporal evolution,
251/// useful for training drift detection models and testing temporal robustness.
252#[derive(Debug, Clone, Serialize, Deserialize)]
253pub struct TemporalDriftConfig {
254    /// Enable temporal drift simulation.
255    #[serde(default)]
256    pub enabled: bool,
257
258    /// Amount mean drift per period (e.g., 0.02 = 2% mean shift per month).
259    /// Simulates gradual inflation or business growth.
260    #[serde(default = "default_amount_drift")]
261    pub amount_mean_drift: f64,
262
263    /// Amount variance drift per period (e.g., 0.01 = 1% variance increase per month).
264    /// Simulates increasing volatility over time.
265    #[serde(default)]
266    pub amount_variance_drift: f64,
267
268    /// Anomaly rate drift per period (e.g., 0.001 = 0.1% increase per month).
269    /// Simulates increasing fraud attempts or degrading controls.
270    #[serde(default)]
271    pub anomaly_rate_drift: f64,
272
273    /// Concept drift rate - how quickly feature distributions change (0.0-1.0).
274    /// Higher values cause more rapid distribution shifts.
275    #[serde(default = "default_concept_drift")]
276    pub concept_drift_rate: f64,
277
278    /// Sudden drift events - probability of a sudden distribution shift in any period.
279    #[serde(default)]
280    pub sudden_drift_probability: f64,
281
282    /// Magnitude of sudden drift events when they occur (multiplier).
283    #[serde(default = "default_sudden_drift_magnitude")]
284    pub sudden_drift_magnitude: f64,
285
286    /// Seasonal drift - enable cyclic patterns that repeat annually.
287    #[serde(default)]
288    pub seasonal_drift: bool,
289
290    /// Drift start period (0 = from beginning). Use to simulate stable baseline before drift.
291    #[serde(default)]
292    pub drift_start_period: u32,
293
294    /// Drift type: "gradual", "sudden", "recurring", "mixed"
295    #[serde(default = "default_drift_type")]
296    pub drift_type: DriftType,
297}
298
299fn default_amount_drift() -> f64 {
300    0.02
301}
302
303fn default_concept_drift() -> f64 {
304    0.01
305}
306
307fn default_sudden_drift_magnitude() -> f64 {
308    2.0
309}
310
311fn default_drift_type() -> DriftType {
312    DriftType::Gradual
313}
314
315impl Default for TemporalDriftConfig {
316    fn default() -> Self {
317        Self {
318            enabled: false,
319            amount_mean_drift: 0.02,
320            amount_variance_drift: 0.0,
321            anomaly_rate_drift: 0.0,
322            concept_drift_rate: 0.01,
323            sudden_drift_probability: 0.0,
324            sudden_drift_magnitude: 2.0,
325            seasonal_drift: false,
326            drift_start_period: 0,
327            drift_type: DriftType::Gradual,
328        }
329    }
330}
331
332impl TemporalDriftConfig {
333    /// Convert to core DriftConfig for use in generators.
334    pub fn to_core_config(&self) -> datasynth_core::distributions::DriftConfig {
335        datasynth_core::distributions::DriftConfig {
336            enabled: self.enabled,
337            amount_mean_drift: self.amount_mean_drift,
338            amount_variance_drift: self.amount_variance_drift,
339            anomaly_rate_drift: self.anomaly_rate_drift,
340            concept_drift_rate: self.concept_drift_rate,
341            sudden_drift_probability: self.sudden_drift_probability,
342            sudden_drift_magnitude: self.sudden_drift_magnitude,
343            seasonal_drift: self.seasonal_drift,
344            drift_start_period: self.drift_start_period,
345            drift_type: match self.drift_type {
346                DriftType::Gradual => datasynth_core::distributions::DriftType::Gradual,
347                DriftType::Sudden => datasynth_core::distributions::DriftType::Sudden,
348                DriftType::Recurring => datasynth_core::distributions::DriftType::Recurring,
349                DriftType::Mixed => datasynth_core::distributions::DriftType::Mixed,
350            },
351        }
352    }
353}
354
355/// Types of temporal drift patterns.
356#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
357#[serde(rename_all = "snake_case")]
358pub enum DriftType {
359    /// Gradual, continuous drift over time (like inflation).
360    #[default]
361    Gradual,
362    /// Sudden, point-in-time shifts (like policy changes).
363    Sudden,
364    /// Recurring patterns that cycle (like seasonal variations).
365    Recurring,
366    /// Combination of gradual background drift with occasional sudden shifts.
367    Mixed,
368}
369
370// ============================================================================
371// Streaming Output API Configuration (Phase 2)
372// ============================================================================
373
374/// Configuration for streaming output API.
375#[derive(Debug, Clone, Serialize, Deserialize)]
376pub struct StreamingSchemaConfig {
377    /// Enable streaming output.
378    #[serde(default)]
379    pub enabled: bool,
380    /// Buffer size for streaming (number of items).
381    #[serde(default = "default_buffer_size")]
382    pub buffer_size: usize,
383    /// Enable progress reporting.
384    #[serde(default = "default_true")]
385    pub enable_progress: bool,
386    /// Progress reporting interval (number of items).
387    #[serde(default = "default_progress_interval")]
388    pub progress_interval: u64,
389    /// Backpressure strategy.
390    #[serde(default)]
391    pub backpressure: BackpressureSchemaStrategy,
392}
393
394fn default_buffer_size() -> usize {
395    1000
396}
397
398fn default_progress_interval() -> u64 {
399    100
400}
401
402impl Default for StreamingSchemaConfig {
403    fn default() -> Self {
404        Self {
405            enabled: false,
406            buffer_size: 1000,
407            enable_progress: true,
408            progress_interval: 100,
409            backpressure: BackpressureSchemaStrategy::Block,
410        }
411    }
412}
413
414/// Backpressure strategy for streaming output.
415#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
416#[serde(rename_all = "snake_case")]
417pub enum BackpressureSchemaStrategy {
418    /// Block until space is available in the buffer.
419    #[default]
420    Block,
421    /// Drop oldest items when buffer is full.
422    DropOldest,
423    /// Drop newest items when buffer is full.
424    DropNewest,
425    /// Buffer overflow items up to a limit, then block.
426    Buffer,
427}
428
429// ============================================================================
430// Rate Limiting Configuration (Phase 5)
431// ============================================================================
432
433/// Configuration for rate limiting.
434#[derive(Debug, Clone, Serialize, Deserialize)]
435pub struct RateLimitSchemaConfig {
436    /// Enable rate limiting.
437    #[serde(default)]
438    pub enabled: bool,
439    /// Entities per second limit.
440    #[serde(default = "default_entities_per_second")]
441    pub entities_per_second: f64,
442    /// Burst size (number of tokens in bucket).
443    #[serde(default = "default_burst_size")]
444    pub burst_size: u32,
445    /// Backpressure strategy for rate limiting.
446    #[serde(default)]
447    pub backpressure: RateLimitBackpressureSchema,
448}
449
450fn default_entities_per_second() -> f64 {
451    1000.0
452}
453
454fn default_burst_size() -> u32 {
455    100
456}
457
458impl Default for RateLimitSchemaConfig {
459    fn default() -> Self {
460        Self {
461            enabled: false,
462            entities_per_second: 1000.0,
463            burst_size: 100,
464            backpressure: RateLimitBackpressureSchema::Block,
465        }
466    }
467}
468
469/// Backpressure strategy for rate limiting.
470#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
471#[serde(rename_all = "snake_case")]
472pub enum RateLimitBackpressureSchema {
473    /// Block until rate allows.
474    #[default]
475    Block,
476    /// Drop items that exceed rate.
477    Drop,
478    /// Buffer items and process when rate allows.
479    Buffer,
480}
481
482// ============================================================================
483// Temporal Attribute Generation Configuration (Phase 3)
484// ============================================================================
485
486/// Configuration for temporal attribute generation.
487#[derive(Debug, Clone, Serialize, Deserialize)]
488pub struct TemporalAttributeSchemaConfig {
489    /// Enable temporal attribute generation.
490    #[serde(default)]
491    pub enabled: bool,
492    /// Valid time configuration.
493    #[serde(default)]
494    pub valid_time: ValidTimeSchemaConfig,
495    /// Transaction time configuration.
496    #[serde(default)]
497    pub transaction_time: TransactionTimeSchemaConfig,
498    /// Generate version chains for entities.
499    #[serde(default)]
500    pub generate_version_chains: bool,
501    /// Average number of versions per entity.
502    #[serde(default = "default_avg_versions")]
503    pub avg_versions_per_entity: f64,
504}
505
506fn default_avg_versions() -> f64 {
507    1.5
508}
509
510impl Default for TemporalAttributeSchemaConfig {
511    fn default() -> Self {
512        Self {
513            enabled: false,
514            valid_time: ValidTimeSchemaConfig::default(),
515            transaction_time: TransactionTimeSchemaConfig::default(),
516            generate_version_chains: false,
517            avg_versions_per_entity: 1.5,
518        }
519    }
520}
521
522/// Configuration for valid time (business time) generation.
523#[derive(Debug, Clone, Serialize, Deserialize)]
524pub struct ValidTimeSchemaConfig {
525    /// Probability that valid_to is set (entity has ended validity).
526    #[serde(default = "default_closed_probability")]
527    pub closed_probability: f64,
528    /// Average validity duration in days.
529    #[serde(default = "default_avg_validity_days")]
530    pub avg_validity_days: u32,
531    /// Standard deviation of validity duration in days.
532    #[serde(default = "default_validity_stddev")]
533    pub validity_stddev_days: u32,
534}
535
536fn default_closed_probability() -> f64 {
537    0.1
538}
539
540fn default_avg_validity_days() -> u32 {
541    365
542}
543
544fn default_validity_stddev() -> u32 {
545    90
546}
547
548impl Default for ValidTimeSchemaConfig {
549    fn default() -> Self {
550        Self {
551            closed_probability: 0.1,
552            avg_validity_days: 365,
553            validity_stddev_days: 90,
554        }
555    }
556}
557
558/// Configuration for transaction time (system time) generation.
559#[derive(Debug, Clone, Serialize, Deserialize)]
560pub struct TransactionTimeSchemaConfig {
561    /// Average recording delay in seconds (0 = immediate).
562    #[serde(default)]
563    pub avg_recording_delay_seconds: u32,
564    /// Allow backdating (recording time before valid time).
565    #[serde(default)]
566    pub allow_backdating: bool,
567    /// Probability of backdating if allowed.
568    #[serde(default = "default_backdating_probability")]
569    pub backdating_probability: f64,
570    /// Maximum backdate days.
571    #[serde(default = "default_max_backdate_days")]
572    pub max_backdate_days: u32,
573}
574
575fn default_backdating_probability() -> f64 {
576    0.01
577}
578
579fn default_max_backdate_days() -> u32 {
580    30
581}
582
583impl Default for TransactionTimeSchemaConfig {
584    fn default() -> Self {
585        Self {
586            avg_recording_delay_seconds: 0,
587            allow_backdating: false,
588            backdating_probability: 0.01,
589            max_backdate_days: 30,
590        }
591    }
592}
593
594// ============================================================================
595// Relationship Generation Configuration (Phase 4)
596// ============================================================================
597
598/// Configuration for relationship generation.
599#[derive(Debug, Clone, Serialize, Deserialize)]
600pub struct RelationshipSchemaConfig {
601    /// Relationship type definitions.
602    #[serde(default)]
603    pub relationship_types: Vec<RelationshipTypeSchemaConfig>,
604    /// Allow orphan entities (entities with no relationships).
605    #[serde(default = "default_true")]
606    pub allow_orphans: bool,
607    /// Probability of creating an orphan entity.
608    #[serde(default = "default_orphan_probability")]
609    pub orphan_probability: f64,
610    /// Allow circular relationships.
611    #[serde(default)]
612    pub allow_circular: bool,
613    /// Maximum depth for circular relationship detection.
614    #[serde(default = "default_max_circular_depth")]
615    pub max_circular_depth: u32,
616}
617
618fn default_orphan_probability() -> f64 {
619    0.01
620}
621
622fn default_max_circular_depth() -> u32 {
623    3
624}
625
626impl Default for RelationshipSchemaConfig {
627    fn default() -> Self {
628        Self {
629            relationship_types: Vec::new(),
630            allow_orphans: true,
631            orphan_probability: 0.01,
632            allow_circular: false,
633            max_circular_depth: 3,
634        }
635    }
636}
637
638/// Configuration for a specific relationship type.
639#[derive(Debug, Clone, Serialize, Deserialize)]
640pub struct RelationshipTypeSchemaConfig {
641    /// Name of the relationship type (e.g., "debits", "credits", "created").
642    pub name: String,
643    /// Source entity type (e.g., "journal_entry").
644    pub source_type: String,
645    /// Target entity type (e.g., "account").
646    pub target_type: String,
647    /// Cardinality rule for this relationship.
648    #[serde(default)]
649    pub cardinality: CardinalitySchemaRule,
650    /// Weight for this relationship in random selection.
651    #[serde(default = "default_relationship_weight")]
652    pub weight: f64,
653    /// Whether this relationship is required.
654    #[serde(default)]
655    pub required: bool,
656    /// Whether this relationship is directed.
657    #[serde(default = "default_true")]
658    pub directed: bool,
659}
660
661fn default_relationship_weight() -> f64 {
662    1.0
663}
664
665impl Default for RelationshipTypeSchemaConfig {
666    fn default() -> Self {
667        Self {
668            name: String::new(),
669            source_type: String::new(),
670            target_type: String::new(),
671            cardinality: CardinalitySchemaRule::default(),
672            weight: 1.0,
673            required: false,
674            directed: true,
675        }
676    }
677}
678
679/// Cardinality rule for relationships in schema config.
680#[derive(Debug, Clone, Serialize, Deserialize)]
681#[serde(rename_all = "snake_case")]
682pub enum CardinalitySchemaRule {
683    /// One source to one target.
684    OneToOne,
685    /// One source to many targets.
686    OneToMany {
687        /// Minimum number of targets.
688        min: u32,
689        /// Maximum number of targets.
690        max: u32,
691    },
692    /// Many sources to one target.
693    ManyToOne {
694        /// Minimum number of sources.
695        min: u32,
696        /// Maximum number of sources.
697        max: u32,
698    },
699    /// Many sources to many targets.
700    ManyToMany {
701        /// Minimum targets per source.
702        min_per_source: u32,
703        /// Maximum targets per source.
704        max_per_source: u32,
705    },
706}
707
708impl Default for CardinalitySchemaRule {
709    fn default() -> Self {
710        Self::OneToMany { min: 1, max: 5 }
711    }
712}
713
714/// Global configuration settings.
715#[derive(Debug, Clone, Serialize, Deserialize)]
716pub struct GlobalConfig {
717    /// Random seed for reproducibility
718    pub seed: Option<u64>,
719    /// Industry sector
720    pub industry: IndustrySector,
721    /// Simulation start date (YYYY-MM-DD)
722    pub start_date: String,
723    /// Simulation period in months
724    pub period_months: u32,
725    /// Base currency for group reporting
726    #[serde(default = "default_currency")]
727    pub group_currency: String,
728    /// Enable parallel generation
729    #[serde(default = "default_true")]
730    pub parallel: bool,
731    /// Number of worker threads (0 = auto-detect)
732    #[serde(default)]
733    pub worker_threads: usize,
734    /// Memory limit in MB (0 = unlimited)
735    #[serde(default)]
736    pub memory_limit_mb: usize,
737}
738
739fn default_currency() -> String {
740    "USD".to_string()
741}
742fn default_true() -> bool {
743    true
744}
745
746/// Company code configuration.
747#[derive(Debug, Clone, Serialize, Deserialize)]
748pub struct CompanyConfig {
749    /// Company code identifier
750    pub code: String,
751    /// Company name
752    pub name: String,
753    /// Local currency (ISO 4217)
754    pub currency: String,
755    /// Country code (ISO 3166-1 alpha-2)
756    pub country: String,
757    /// Fiscal year variant
758    #[serde(default = "default_fiscal_variant")]
759    pub fiscal_year_variant: String,
760    /// Transaction volume per year
761    pub annual_transaction_volume: TransactionVolume,
762    /// Company-specific transaction weight
763    #[serde(default = "default_weight")]
764    pub volume_weight: f64,
765}
766
767fn default_fiscal_variant() -> String {
768    "K4".to_string()
769}
770fn default_weight() -> f64 {
771    1.0
772}
773
774/// Transaction volume presets.
775#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
776#[serde(rename_all = "snake_case")]
777pub enum TransactionVolume {
778    /// 10,000 transactions per year
779    TenK,
780    /// 100,000 transactions per year
781    HundredK,
782    /// 1,000,000 transactions per year
783    OneM,
784    /// 10,000,000 transactions per year
785    TenM,
786    /// 100,000,000 transactions per year
787    HundredM,
788    /// Custom count
789    Custom(u64),
790}
791
792impl TransactionVolume {
793    /// Get the transaction count.
794    pub fn count(&self) -> u64 {
795        match self {
796            Self::TenK => 10_000,
797            Self::HundredK => 100_000,
798            Self::OneM => 1_000_000,
799            Self::TenM => 10_000_000,
800            Self::HundredM => 100_000_000,
801            Self::Custom(n) => *n,
802        }
803    }
804}
805
806/// Chart of Accounts configuration.
807#[derive(Debug, Clone, Serialize, Deserialize)]
808pub struct ChartOfAccountsConfig {
809    /// CoA complexity level
810    pub complexity: CoAComplexity,
811    /// Use industry-specific accounts
812    #[serde(default = "default_true")]
813    pub industry_specific: bool,
814    /// Custom account definitions file
815    pub custom_accounts: Option<PathBuf>,
816    /// Minimum hierarchy depth
817    #[serde(default = "default_min_depth")]
818    pub min_hierarchy_depth: u8,
819    /// Maximum hierarchy depth
820    #[serde(default = "default_max_depth")]
821    pub max_hierarchy_depth: u8,
822}
823
824fn default_min_depth() -> u8 {
825    2
826}
827fn default_max_depth() -> u8 {
828    5
829}
830
831impl Default for ChartOfAccountsConfig {
832    fn default() -> Self {
833        Self {
834            complexity: CoAComplexity::Small,
835            industry_specific: true,
836            custom_accounts: None,
837            min_hierarchy_depth: default_min_depth(),
838            max_hierarchy_depth: default_max_depth(),
839        }
840    }
841}
842
843/// Transaction generation configuration.
844#[derive(Debug, Clone, Serialize, Deserialize, Default)]
845pub struct TransactionConfig {
846    /// Line item distribution
847    #[serde(default)]
848    pub line_item_distribution: LineItemDistributionConfig,
849    /// Debit/credit balance distribution
850    #[serde(default)]
851    pub debit_credit_distribution: DebitCreditDistributionConfig,
852    /// Even/odd line count distribution
853    #[serde(default)]
854    pub even_odd_distribution: EvenOddDistributionConfig,
855    /// Transaction source distribution
856    #[serde(default)]
857    pub source_distribution: SourceDistribution,
858    /// Seasonality configuration
859    #[serde(default)]
860    pub seasonality: SeasonalityConfig,
861    /// Amount distribution
862    #[serde(default)]
863    pub amounts: AmountDistributionConfig,
864    /// Benford's Law compliance configuration
865    #[serde(default)]
866    pub benford: BenfordConfig,
867}
868
869/// Benford's Law compliance configuration.
870#[derive(Debug, Clone, Serialize, Deserialize)]
871pub struct BenfordConfig {
872    /// Enable Benford's Law compliance for amount generation
873    #[serde(default = "default_true")]
874    pub enabled: bool,
875    /// Tolerance for deviation from ideal Benford distribution (0.0-1.0)
876    #[serde(default = "default_benford_tolerance")]
877    pub tolerance: f64,
878    /// Transaction sources exempt from Benford's Law (fixed amounts)
879    #[serde(default)]
880    pub exempt_sources: Vec<BenfordExemption>,
881}
882
883fn default_benford_tolerance() -> f64 {
884    0.05
885}
886
887impl Default for BenfordConfig {
888    fn default() -> Self {
889        Self {
890            enabled: true,
891            tolerance: default_benford_tolerance(),
892            exempt_sources: vec![BenfordExemption::Recurring, BenfordExemption::Payroll],
893        }
894    }
895}
896
897/// Types of transactions exempt from Benford's Law.
898#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
899#[serde(rename_all = "snake_case")]
900pub enum BenfordExemption {
901    /// Recurring fixed amounts (rent, subscriptions)
902    Recurring,
903    /// Payroll (standardized salaries)
904    Payroll,
905    /// Fixed fees and charges
906    FixedFees,
907    /// Round number purchases (often legitimate)
908    RoundAmounts,
909}
910
911/// Distribution of transaction sources.
912#[derive(Debug, Clone, Serialize, Deserialize)]
913pub struct SourceDistribution {
914    /// Manual entries percentage
915    pub manual: f64,
916    /// Automated system entries
917    pub automated: f64,
918    /// Recurring entries
919    pub recurring: f64,
920    /// Adjustment entries
921    pub adjustment: f64,
922}
923
924impl Default for SourceDistribution {
925    fn default() -> Self {
926        Self {
927            manual: 0.20,
928            automated: 0.70,
929            recurring: 0.07,
930            adjustment: 0.03,
931        }
932    }
933}
934
935/// Output configuration.
936#[derive(Debug, Clone, Serialize, Deserialize)]
937pub struct OutputConfig {
938    /// Output mode
939    #[serde(default)]
940    pub mode: OutputMode,
941    /// Output directory
942    pub output_directory: PathBuf,
943    /// File formats to generate
944    #[serde(default = "default_formats")]
945    pub formats: Vec<FileFormat>,
946    /// Compression settings
947    #[serde(default)]
948    pub compression: CompressionConfig,
949    /// Batch size for writes
950    #[serde(default = "default_batch_size")]
951    pub batch_size: usize,
952    /// Include ACDOCA format
953    #[serde(default = "default_true")]
954    pub include_acdoca: bool,
955    /// Include BSEG format
956    #[serde(default)]
957    pub include_bseg: bool,
958    /// Partition by fiscal period
959    #[serde(default = "default_true")]
960    pub partition_by_period: bool,
961    /// Partition by company code
962    #[serde(default)]
963    pub partition_by_company: bool,
964}
965
966fn default_formats() -> Vec<FileFormat> {
967    vec![FileFormat::Parquet]
968}
969fn default_batch_size() -> usize {
970    100_000
971}
972
973impl Default for OutputConfig {
974    fn default() -> Self {
975        Self {
976            mode: OutputMode::FlatFile,
977            output_directory: PathBuf::from("./output"),
978            formats: default_formats(),
979            compression: CompressionConfig::default(),
980            batch_size: default_batch_size(),
981            include_acdoca: true,
982            include_bseg: false,
983            partition_by_period: true,
984            partition_by_company: false,
985        }
986    }
987}
988
989/// Output mode.
990#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
991#[serde(rename_all = "snake_case")]
992pub enum OutputMode {
993    /// Stream records as generated
994    Streaming,
995    /// Write to flat files
996    #[default]
997    FlatFile,
998    /// Both streaming and flat file
999    Both,
1000}
1001
1002/// Supported file formats.
1003#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1004#[serde(rename_all = "snake_case")]
1005pub enum FileFormat {
1006    Csv,
1007    Parquet,
1008    Json,
1009    JsonLines,
1010}
1011
1012/// Compression configuration.
1013#[derive(Debug, Clone, Serialize, Deserialize)]
1014pub struct CompressionConfig {
1015    /// Enable compression
1016    #[serde(default = "default_true")]
1017    pub enabled: bool,
1018    /// Compression algorithm
1019    #[serde(default)]
1020    pub algorithm: CompressionAlgorithm,
1021    /// Compression level (1-9)
1022    #[serde(default = "default_compression_level")]
1023    pub level: u8,
1024}
1025
1026fn default_compression_level() -> u8 {
1027    3
1028}
1029
1030impl Default for CompressionConfig {
1031    fn default() -> Self {
1032        Self {
1033            enabled: true,
1034            algorithm: CompressionAlgorithm::default(),
1035            level: default_compression_level(),
1036        }
1037    }
1038}
1039
1040/// Compression algorithms.
1041#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
1042#[serde(rename_all = "snake_case")]
1043pub enum CompressionAlgorithm {
1044    Gzip,
1045    #[default]
1046    Zstd,
1047    Lz4,
1048    Snappy,
1049}
1050
1051/// Fraud simulation configuration.
1052#[derive(Debug, Clone, Serialize, Deserialize)]
1053pub struct FraudConfig {
1054    /// Enable fraud scenario generation
1055    #[serde(default)]
1056    pub enabled: bool,
1057    /// Overall fraud rate (0.0 to 1.0)
1058    #[serde(default = "default_fraud_rate")]
1059    pub fraud_rate: f64,
1060    /// Fraud type distribution
1061    #[serde(default)]
1062    pub fraud_type_distribution: FraudTypeDistribution,
1063    /// Enable fraud clustering
1064    #[serde(default)]
1065    pub clustering_enabled: bool,
1066    /// Clustering factor
1067    #[serde(default = "default_clustering_factor")]
1068    pub clustering_factor: f64,
1069    /// Approval thresholds for threshold-adjacent fraud pattern
1070    #[serde(default = "default_approval_thresholds")]
1071    pub approval_thresholds: Vec<f64>,
1072}
1073
1074fn default_approval_thresholds() -> Vec<f64> {
1075    vec![1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0]
1076}
1077
1078fn default_fraud_rate() -> f64 {
1079    0.005
1080}
1081fn default_clustering_factor() -> f64 {
1082    3.0
1083}
1084
1085impl Default for FraudConfig {
1086    fn default() -> Self {
1087        Self {
1088            enabled: false,
1089            fraud_rate: default_fraud_rate(),
1090            fraud_type_distribution: FraudTypeDistribution::default(),
1091            clustering_enabled: false,
1092            clustering_factor: default_clustering_factor(),
1093            approval_thresholds: default_approval_thresholds(),
1094        }
1095    }
1096}
1097
1098/// Distribution of fraud types.
1099#[derive(Debug, Clone, Serialize, Deserialize)]
1100pub struct FraudTypeDistribution {
1101    pub suspense_account_abuse: f64,
1102    pub fictitious_transaction: f64,
1103    pub revenue_manipulation: f64,
1104    pub expense_capitalization: f64,
1105    pub split_transaction: f64,
1106    pub timing_anomaly: f64,
1107    pub unauthorized_access: f64,
1108    pub duplicate_payment: f64,
1109}
1110
1111impl Default for FraudTypeDistribution {
1112    fn default() -> Self {
1113        Self {
1114            suspense_account_abuse: 0.25,
1115            fictitious_transaction: 0.15,
1116            revenue_manipulation: 0.10,
1117            expense_capitalization: 0.10,
1118            split_transaction: 0.15,
1119            timing_anomaly: 0.10,
1120            unauthorized_access: 0.10,
1121            duplicate_payment: 0.05,
1122        }
1123    }
1124}
1125
1126/// Internal Controls System (ICS) configuration.
1127#[derive(Debug, Clone, Serialize, Deserialize)]
1128pub struct InternalControlsConfig {
1129    /// Enable internal controls system
1130    #[serde(default)]
1131    pub enabled: bool,
1132    /// Rate at which controls result in exceptions (0.0 - 1.0)
1133    #[serde(default = "default_exception_rate")]
1134    pub exception_rate: f64,
1135    /// Rate at which SoD violations occur (0.0 - 1.0)
1136    #[serde(default = "default_sod_violation_rate")]
1137    pub sod_violation_rate: f64,
1138    /// Export control master data to separate files
1139    #[serde(default = "default_true")]
1140    pub export_control_master_data: bool,
1141    /// SOX materiality threshold for marking transactions as SOX-relevant
1142    #[serde(default = "default_sox_materiality_threshold")]
1143    pub sox_materiality_threshold: f64,
1144}
1145
1146fn default_exception_rate() -> f64 {
1147    0.02
1148}
1149
1150fn default_sod_violation_rate() -> f64 {
1151    0.01
1152}
1153
1154fn default_sox_materiality_threshold() -> f64 {
1155    10000.0
1156}
1157
1158impl Default for InternalControlsConfig {
1159    fn default() -> Self {
1160        Self {
1161            enabled: false,
1162            exception_rate: default_exception_rate(),
1163            sod_violation_rate: default_sod_violation_rate(),
1164            export_control_master_data: true,
1165            sox_materiality_threshold: default_sox_materiality_threshold(),
1166        }
1167    }
1168}
1169
1170/// Business process configuration.
1171#[derive(Debug, Clone, Serialize, Deserialize)]
1172pub struct BusinessProcessConfig {
1173    /// Order-to-Cash weight
1174    #[serde(default = "default_o2c")]
1175    pub o2c_weight: f64,
1176    /// Procure-to-Pay weight
1177    #[serde(default = "default_p2p")]
1178    pub p2p_weight: f64,
1179    /// Record-to-Report weight
1180    #[serde(default = "default_r2r")]
1181    pub r2r_weight: f64,
1182    /// Hire-to-Retire weight
1183    #[serde(default = "default_h2r")]
1184    pub h2r_weight: f64,
1185    /// Acquire-to-Retire weight
1186    #[serde(default = "default_a2r")]
1187    pub a2r_weight: f64,
1188}
1189
1190fn default_o2c() -> f64 {
1191    0.35
1192}
1193fn default_p2p() -> f64 {
1194    0.30
1195}
1196fn default_r2r() -> f64 {
1197    0.20
1198}
1199fn default_h2r() -> f64 {
1200    0.10
1201}
1202fn default_a2r() -> f64 {
1203    0.05
1204}
1205
1206impl Default for BusinessProcessConfig {
1207    fn default() -> Self {
1208        Self {
1209            o2c_weight: default_o2c(),
1210            p2p_weight: default_p2p(),
1211            r2r_weight: default_r2r(),
1212            h2r_weight: default_h2r(),
1213            a2r_weight: default_a2r(),
1214        }
1215    }
1216}
1217
1218/// User persona configuration.
1219#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1220pub struct UserPersonaConfig {
1221    /// Distribution of user personas
1222    #[serde(default)]
1223    pub persona_distribution: PersonaDistribution,
1224    /// Users per persona type
1225    #[serde(default)]
1226    pub users_per_persona: UsersPerPersona,
1227}
1228
1229/// Distribution of user personas for transaction generation.
1230#[derive(Debug, Clone, Serialize, Deserialize)]
1231pub struct PersonaDistribution {
1232    pub junior_accountant: f64,
1233    pub senior_accountant: f64,
1234    pub controller: f64,
1235    pub manager: f64,
1236    pub automated_system: f64,
1237}
1238
1239impl Default for PersonaDistribution {
1240    fn default() -> Self {
1241        Self {
1242            junior_accountant: 0.15,
1243            senior_accountant: 0.15,
1244            controller: 0.05,
1245            manager: 0.05,
1246            automated_system: 0.60,
1247        }
1248    }
1249}
1250
1251/// Number of users per persona type.
1252#[derive(Debug, Clone, Serialize, Deserialize)]
1253pub struct UsersPerPersona {
1254    pub junior_accountant: usize,
1255    pub senior_accountant: usize,
1256    pub controller: usize,
1257    pub manager: usize,
1258    pub automated_system: usize,
1259}
1260
1261impl Default for UsersPerPersona {
1262    fn default() -> Self {
1263        Self {
1264            junior_accountant: 10,
1265            senior_accountant: 5,
1266            controller: 2,
1267            manager: 3,
1268            automated_system: 20,
1269        }
1270    }
1271}
1272
1273/// Template configuration for realistic data generation.
1274#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1275pub struct TemplateConfig {
1276    /// Name generation settings
1277    #[serde(default)]
1278    pub names: NameTemplateConfig,
1279    /// Description generation settings
1280    #[serde(default)]
1281    pub descriptions: DescriptionTemplateConfig,
1282    /// Reference number settings
1283    #[serde(default)]
1284    pub references: ReferenceTemplateConfig,
1285}
1286
1287/// Name template configuration.
1288#[derive(Debug, Clone, Serialize, Deserialize)]
1289pub struct NameTemplateConfig {
1290    /// Distribution of name cultures
1291    #[serde(default)]
1292    pub culture_distribution: CultureDistribution,
1293    /// Email domain for generated users
1294    #[serde(default = "default_email_domain")]
1295    pub email_domain: String,
1296    /// Generate realistic display names
1297    #[serde(default = "default_true")]
1298    pub generate_realistic_names: bool,
1299}
1300
1301fn default_email_domain() -> String {
1302    "company.com".to_string()
1303}
1304
1305impl Default for NameTemplateConfig {
1306    fn default() -> Self {
1307        Self {
1308            culture_distribution: CultureDistribution::default(),
1309            email_domain: default_email_domain(),
1310            generate_realistic_names: true,
1311        }
1312    }
1313}
1314
1315/// Distribution of name cultures for generation.
1316#[derive(Debug, Clone, Serialize, Deserialize)]
1317pub struct CultureDistribution {
1318    pub western_us: f64,
1319    pub hispanic: f64,
1320    pub german: f64,
1321    pub french: f64,
1322    pub chinese: f64,
1323    pub japanese: f64,
1324    pub indian: f64,
1325}
1326
1327impl Default for CultureDistribution {
1328    fn default() -> Self {
1329        Self {
1330            western_us: 0.40,
1331            hispanic: 0.20,
1332            german: 0.10,
1333            french: 0.05,
1334            chinese: 0.10,
1335            japanese: 0.05,
1336            indian: 0.10,
1337        }
1338    }
1339}
1340
1341/// Description template configuration.
1342#[derive(Debug, Clone, Serialize, Deserialize)]
1343pub struct DescriptionTemplateConfig {
1344    /// Generate header text for journal entries
1345    #[serde(default = "default_true")]
1346    pub generate_header_text: bool,
1347    /// Generate line text for journal entry lines
1348    #[serde(default = "default_true")]
1349    pub generate_line_text: bool,
1350}
1351
1352impl Default for DescriptionTemplateConfig {
1353    fn default() -> Self {
1354        Self {
1355            generate_header_text: true,
1356            generate_line_text: true,
1357        }
1358    }
1359}
1360
1361/// Reference number template configuration.
1362#[derive(Debug, Clone, Serialize, Deserialize)]
1363pub struct ReferenceTemplateConfig {
1364    /// Generate reference numbers
1365    #[serde(default = "default_true")]
1366    pub generate_references: bool,
1367    /// Invoice prefix
1368    #[serde(default = "default_invoice_prefix")]
1369    pub invoice_prefix: String,
1370    /// Purchase order prefix
1371    #[serde(default = "default_po_prefix")]
1372    pub po_prefix: String,
1373    /// Sales order prefix
1374    #[serde(default = "default_so_prefix")]
1375    pub so_prefix: String,
1376}
1377
1378fn default_invoice_prefix() -> String {
1379    "INV".to_string()
1380}
1381fn default_po_prefix() -> String {
1382    "PO".to_string()
1383}
1384fn default_so_prefix() -> String {
1385    "SO".to_string()
1386}
1387
1388impl Default for ReferenceTemplateConfig {
1389    fn default() -> Self {
1390        Self {
1391            generate_references: true,
1392            invoice_prefix: default_invoice_prefix(),
1393            po_prefix: default_po_prefix(),
1394            so_prefix: default_so_prefix(),
1395        }
1396    }
1397}
1398
1399/// Approval workflow configuration.
1400#[derive(Debug, Clone, Serialize, Deserialize)]
1401pub struct ApprovalConfig {
1402    /// Enable approval workflow generation
1403    #[serde(default)]
1404    pub enabled: bool,
1405    /// Threshold below which transactions are auto-approved
1406    #[serde(default = "default_auto_approve_threshold")]
1407    pub auto_approve_threshold: f64,
1408    /// Rate at which approvals are rejected (0.0 to 1.0)
1409    #[serde(default = "default_rejection_rate")]
1410    pub rejection_rate: f64,
1411    /// Rate at which approvals require revision (0.0 to 1.0)
1412    #[serde(default = "default_revision_rate")]
1413    pub revision_rate: f64,
1414    /// Average delay in hours for approval processing
1415    #[serde(default = "default_approval_delay_hours")]
1416    pub average_approval_delay_hours: f64,
1417    /// Approval chain thresholds
1418    #[serde(default)]
1419    pub thresholds: Vec<ApprovalThresholdConfig>,
1420}
1421
1422fn default_auto_approve_threshold() -> f64 {
1423    1000.0
1424}
1425fn default_rejection_rate() -> f64 {
1426    0.02
1427}
1428fn default_revision_rate() -> f64 {
1429    0.05
1430}
1431fn default_approval_delay_hours() -> f64 {
1432    4.0
1433}
1434
1435impl Default for ApprovalConfig {
1436    fn default() -> Self {
1437        Self {
1438            enabled: false,
1439            auto_approve_threshold: default_auto_approve_threshold(),
1440            rejection_rate: default_rejection_rate(),
1441            revision_rate: default_revision_rate(),
1442            average_approval_delay_hours: default_approval_delay_hours(),
1443            thresholds: vec![
1444                ApprovalThresholdConfig {
1445                    amount: 1000.0,
1446                    level: 1,
1447                    roles: vec!["senior_accountant".to_string()],
1448                },
1449                ApprovalThresholdConfig {
1450                    amount: 10000.0,
1451                    level: 2,
1452                    roles: vec!["senior_accountant".to_string(), "controller".to_string()],
1453                },
1454                ApprovalThresholdConfig {
1455                    amount: 100000.0,
1456                    level: 3,
1457                    roles: vec![
1458                        "senior_accountant".to_string(),
1459                        "controller".to_string(),
1460                        "manager".to_string(),
1461                    ],
1462                },
1463                ApprovalThresholdConfig {
1464                    amount: 500000.0,
1465                    level: 4,
1466                    roles: vec![
1467                        "senior_accountant".to_string(),
1468                        "controller".to_string(),
1469                        "manager".to_string(),
1470                        "executive".to_string(),
1471                    ],
1472                },
1473            ],
1474        }
1475    }
1476}
1477
1478/// Configuration for a single approval threshold.
1479#[derive(Debug, Clone, Serialize, Deserialize)]
1480pub struct ApprovalThresholdConfig {
1481    /// Amount threshold
1482    pub amount: f64,
1483    /// Approval level required
1484    pub level: u8,
1485    /// Roles that can approve at this level
1486    pub roles: Vec<String>,
1487}
1488
1489/// Department configuration.
1490#[derive(Debug, Clone, Serialize, Deserialize)]
1491pub struct DepartmentConfig {
1492    /// Enable department assignment
1493    #[serde(default)]
1494    pub enabled: bool,
1495    /// Multiplier for department headcounts
1496    #[serde(default = "default_headcount_multiplier")]
1497    pub headcount_multiplier: f64,
1498    /// Custom department definitions (optional)
1499    #[serde(default)]
1500    pub custom_departments: Vec<CustomDepartmentConfig>,
1501}
1502
1503fn default_headcount_multiplier() -> f64 {
1504    1.0
1505}
1506
1507impl Default for DepartmentConfig {
1508    fn default() -> Self {
1509        Self {
1510            enabled: false,
1511            headcount_multiplier: default_headcount_multiplier(),
1512            custom_departments: Vec::new(),
1513        }
1514    }
1515}
1516
1517/// Custom department definition.
1518#[derive(Debug, Clone, Serialize, Deserialize)]
1519pub struct CustomDepartmentConfig {
1520    /// Department code
1521    pub code: String,
1522    /// Department name
1523    pub name: String,
1524    /// Associated cost center
1525    #[serde(default)]
1526    pub cost_center: Option<String>,
1527    /// Primary business processes
1528    #[serde(default)]
1529    pub primary_processes: Vec<String>,
1530    /// Parent department code
1531    #[serde(default)]
1532    pub parent_code: Option<String>,
1533}
1534
1535// ============================================================================
1536// Master Data Configuration
1537// ============================================================================
1538
1539/// Master data generation configuration.
1540#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1541pub struct MasterDataConfig {
1542    /// Vendor master data settings
1543    #[serde(default)]
1544    pub vendors: VendorMasterConfig,
1545    /// Customer master data settings
1546    #[serde(default)]
1547    pub customers: CustomerMasterConfig,
1548    /// Material master data settings
1549    #[serde(default)]
1550    pub materials: MaterialMasterConfig,
1551    /// Fixed asset master data settings
1552    #[serde(default)]
1553    pub fixed_assets: FixedAssetMasterConfig,
1554    /// Employee master data settings
1555    #[serde(default)]
1556    pub employees: EmployeeMasterConfig,
1557    /// Cost center master data settings
1558    #[serde(default)]
1559    pub cost_centers: CostCenterMasterConfig,
1560}
1561
1562/// Vendor master data configuration.
1563#[derive(Debug, Clone, Serialize, Deserialize)]
1564pub struct VendorMasterConfig {
1565    /// Number of vendors to generate
1566    #[serde(default = "default_vendor_count")]
1567    pub count: usize,
1568    /// Percentage of vendors that are intercompany (0.0 to 1.0)
1569    #[serde(default = "default_intercompany_percent")]
1570    pub intercompany_percent: f64,
1571    /// Payment terms distribution
1572    #[serde(default)]
1573    pub payment_terms_distribution: PaymentTermsDistribution,
1574    /// Vendor behavior distribution
1575    #[serde(default)]
1576    pub behavior_distribution: VendorBehaviorDistribution,
1577    /// Generate bank account details
1578    #[serde(default = "default_true")]
1579    pub generate_bank_accounts: bool,
1580    /// Generate tax IDs
1581    #[serde(default = "default_true")]
1582    pub generate_tax_ids: bool,
1583}
1584
1585fn default_vendor_count() -> usize {
1586    500
1587}
1588
1589fn default_intercompany_percent() -> f64 {
1590    0.05
1591}
1592
1593impl Default for VendorMasterConfig {
1594    fn default() -> Self {
1595        Self {
1596            count: default_vendor_count(),
1597            intercompany_percent: default_intercompany_percent(),
1598            payment_terms_distribution: PaymentTermsDistribution::default(),
1599            behavior_distribution: VendorBehaviorDistribution::default(),
1600            generate_bank_accounts: true,
1601            generate_tax_ids: true,
1602        }
1603    }
1604}
1605
1606/// Payment terms distribution for vendors.
1607#[derive(Debug, Clone, Serialize, Deserialize)]
1608pub struct PaymentTermsDistribution {
1609    /// Net 30 days
1610    pub net_30: f64,
1611    /// Net 60 days
1612    pub net_60: f64,
1613    /// Net 90 days
1614    pub net_90: f64,
1615    /// 2% 10 Net 30 (early payment discount)
1616    pub two_ten_net_30: f64,
1617    /// Due on receipt
1618    pub due_on_receipt: f64,
1619    /// End of month
1620    pub end_of_month: f64,
1621}
1622
1623impl Default for PaymentTermsDistribution {
1624    fn default() -> Self {
1625        Self {
1626            net_30: 0.40,
1627            net_60: 0.20,
1628            net_90: 0.10,
1629            two_ten_net_30: 0.15,
1630            due_on_receipt: 0.05,
1631            end_of_month: 0.10,
1632        }
1633    }
1634}
1635
1636/// Vendor behavior distribution.
1637#[derive(Debug, Clone, Serialize, Deserialize)]
1638pub struct VendorBehaviorDistribution {
1639    /// Reliable vendors (consistent delivery, quality)
1640    pub reliable: f64,
1641    /// Sometimes late vendors
1642    pub sometimes_late: f64,
1643    /// Inconsistent quality vendors
1644    pub inconsistent_quality: f64,
1645    /// Premium vendors (high quality, premium pricing)
1646    pub premium: f64,
1647    /// Budget vendors (lower quality, lower pricing)
1648    pub budget: f64,
1649}
1650
1651impl Default for VendorBehaviorDistribution {
1652    fn default() -> Self {
1653        Self {
1654            reliable: 0.50,
1655            sometimes_late: 0.20,
1656            inconsistent_quality: 0.10,
1657            premium: 0.10,
1658            budget: 0.10,
1659        }
1660    }
1661}
1662
1663/// Customer master data configuration.
1664#[derive(Debug, Clone, Serialize, Deserialize)]
1665pub struct CustomerMasterConfig {
1666    /// Number of customers to generate
1667    #[serde(default = "default_customer_count")]
1668    pub count: usize,
1669    /// Percentage of customers that are intercompany (0.0 to 1.0)
1670    #[serde(default = "default_intercompany_percent")]
1671    pub intercompany_percent: f64,
1672    /// Credit rating distribution
1673    #[serde(default)]
1674    pub credit_rating_distribution: CreditRatingDistribution,
1675    /// Payment behavior distribution
1676    #[serde(default)]
1677    pub payment_behavior_distribution: PaymentBehaviorDistribution,
1678    /// Generate credit limits based on rating
1679    #[serde(default = "default_true")]
1680    pub generate_credit_limits: bool,
1681}
1682
1683fn default_customer_count() -> usize {
1684    2000
1685}
1686
1687impl Default for CustomerMasterConfig {
1688    fn default() -> Self {
1689        Self {
1690            count: default_customer_count(),
1691            intercompany_percent: default_intercompany_percent(),
1692            credit_rating_distribution: CreditRatingDistribution::default(),
1693            payment_behavior_distribution: PaymentBehaviorDistribution::default(),
1694            generate_credit_limits: true,
1695        }
1696    }
1697}
1698
1699/// Credit rating distribution for customers.
1700#[derive(Debug, Clone, Serialize, Deserialize)]
1701pub struct CreditRatingDistribution {
1702    /// AAA rating
1703    pub aaa: f64,
1704    /// AA rating
1705    pub aa: f64,
1706    /// A rating
1707    pub a: f64,
1708    /// BBB rating
1709    pub bbb: f64,
1710    /// BB rating
1711    pub bb: f64,
1712    /// B rating
1713    pub b: f64,
1714    /// Below B rating
1715    pub below_b: f64,
1716}
1717
1718impl Default for CreditRatingDistribution {
1719    fn default() -> Self {
1720        Self {
1721            aaa: 0.05,
1722            aa: 0.10,
1723            a: 0.20,
1724            bbb: 0.30,
1725            bb: 0.20,
1726            b: 0.10,
1727            below_b: 0.05,
1728        }
1729    }
1730}
1731
1732/// Payment behavior distribution for customers.
1733#[derive(Debug, Clone, Serialize, Deserialize)]
1734pub struct PaymentBehaviorDistribution {
1735    /// Always pays early
1736    pub early_payer: f64,
1737    /// Pays on time
1738    pub on_time: f64,
1739    /// Occasionally late
1740    pub occasional_late: f64,
1741    /// Frequently late
1742    pub frequent_late: f64,
1743    /// Takes early payment discounts
1744    pub discount_taker: f64,
1745}
1746
1747impl Default for PaymentBehaviorDistribution {
1748    fn default() -> Self {
1749        Self {
1750            early_payer: 0.10,
1751            on_time: 0.50,
1752            occasional_late: 0.25,
1753            frequent_late: 0.10,
1754            discount_taker: 0.05,
1755        }
1756    }
1757}
1758
1759/// Material master data configuration.
1760#[derive(Debug, Clone, Serialize, Deserialize)]
1761pub struct MaterialMasterConfig {
1762    /// Number of materials to generate
1763    #[serde(default = "default_material_count")]
1764    pub count: usize,
1765    /// Material type distribution
1766    #[serde(default)]
1767    pub type_distribution: MaterialTypeDistribution,
1768    /// Valuation method distribution
1769    #[serde(default)]
1770    pub valuation_distribution: ValuationMethodDistribution,
1771    /// Percentage of materials with BOM (bill of materials)
1772    #[serde(default = "default_bom_percent")]
1773    pub bom_percent: f64,
1774    /// Maximum BOM depth
1775    #[serde(default = "default_max_bom_depth")]
1776    pub max_bom_depth: u8,
1777}
1778
1779fn default_material_count() -> usize {
1780    5000
1781}
1782
1783fn default_bom_percent() -> f64 {
1784    0.20
1785}
1786
1787fn default_max_bom_depth() -> u8 {
1788    3
1789}
1790
1791impl Default for MaterialMasterConfig {
1792    fn default() -> Self {
1793        Self {
1794            count: default_material_count(),
1795            type_distribution: MaterialTypeDistribution::default(),
1796            valuation_distribution: ValuationMethodDistribution::default(),
1797            bom_percent: default_bom_percent(),
1798            max_bom_depth: default_max_bom_depth(),
1799        }
1800    }
1801}
1802
1803/// Material type distribution.
1804#[derive(Debug, Clone, Serialize, Deserialize)]
1805pub struct MaterialTypeDistribution {
1806    /// Raw materials
1807    pub raw_material: f64,
1808    /// Semi-finished goods
1809    pub semi_finished: f64,
1810    /// Finished goods
1811    pub finished_good: f64,
1812    /// Trading goods (purchased for resale)
1813    pub trading_good: f64,
1814    /// Operating supplies
1815    pub operating_supply: f64,
1816    /// Services
1817    pub service: f64,
1818}
1819
1820impl Default for MaterialTypeDistribution {
1821    fn default() -> Self {
1822        Self {
1823            raw_material: 0.30,
1824            semi_finished: 0.15,
1825            finished_good: 0.25,
1826            trading_good: 0.15,
1827            operating_supply: 0.10,
1828            service: 0.05,
1829        }
1830    }
1831}
1832
1833/// Valuation method distribution for materials.
1834#[derive(Debug, Clone, Serialize, Deserialize)]
1835pub struct ValuationMethodDistribution {
1836    /// Standard cost
1837    pub standard_cost: f64,
1838    /// Moving average
1839    pub moving_average: f64,
1840    /// FIFO (First In, First Out)
1841    pub fifo: f64,
1842    /// LIFO (Last In, First Out)
1843    pub lifo: f64,
1844}
1845
1846impl Default for ValuationMethodDistribution {
1847    fn default() -> Self {
1848        Self {
1849            standard_cost: 0.50,
1850            moving_average: 0.30,
1851            fifo: 0.15,
1852            lifo: 0.05,
1853        }
1854    }
1855}
1856
1857/// Fixed asset master data configuration.
1858#[derive(Debug, Clone, Serialize, Deserialize)]
1859pub struct FixedAssetMasterConfig {
1860    /// Number of fixed assets to generate
1861    #[serde(default = "default_asset_count")]
1862    pub count: usize,
1863    /// Asset class distribution
1864    #[serde(default)]
1865    pub class_distribution: AssetClassDistribution,
1866    /// Depreciation method distribution
1867    #[serde(default)]
1868    pub depreciation_distribution: DepreciationMethodDistribution,
1869    /// Percentage of assets that are fully depreciated
1870    #[serde(default = "default_fully_depreciated_percent")]
1871    pub fully_depreciated_percent: f64,
1872    /// Generate acquisition history
1873    #[serde(default = "default_true")]
1874    pub generate_acquisition_history: bool,
1875}
1876
1877fn default_asset_count() -> usize {
1878    800
1879}
1880
1881fn default_fully_depreciated_percent() -> f64 {
1882    0.15
1883}
1884
1885impl Default for FixedAssetMasterConfig {
1886    fn default() -> Self {
1887        Self {
1888            count: default_asset_count(),
1889            class_distribution: AssetClassDistribution::default(),
1890            depreciation_distribution: DepreciationMethodDistribution::default(),
1891            fully_depreciated_percent: default_fully_depreciated_percent(),
1892            generate_acquisition_history: true,
1893        }
1894    }
1895}
1896
1897/// Asset class distribution.
1898#[derive(Debug, Clone, Serialize, Deserialize)]
1899pub struct AssetClassDistribution {
1900    /// Buildings and structures
1901    pub buildings: f64,
1902    /// Machinery and equipment
1903    pub machinery: f64,
1904    /// Vehicles
1905    pub vehicles: f64,
1906    /// IT equipment
1907    pub it_equipment: f64,
1908    /// Furniture and fixtures
1909    pub furniture: f64,
1910    /// Land (non-depreciable)
1911    pub land: f64,
1912    /// Leasehold improvements
1913    pub leasehold: f64,
1914}
1915
1916impl Default for AssetClassDistribution {
1917    fn default() -> Self {
1918        Self {
1919            buildings: 0.15,
1920            machinery: 0.30,
1921            vehicles: 0.15,
1922            it_equipment: 0.20,
1923            furniture: 0.10,
1924            land: 0.05,
1925            leasehold: 0.05,
1926        }
1927    }
1928}
1929
1930/// Depreciation method distribution.
1931#[derive(Debug, Clone, Serialize, Deserialize)]
1932pub struct DepreciationMethodDistribution {
1933    /// Straight line
1934    pub straight_line: f64,
1935    /// Declining balance
1936    pub declining_balance: f64,
1937    /// Double declining balance
1938    pub double_declining: f64,
1939    /// Sum of years' digits
1940    pub sum_of_years: f64,
1941    /// Units of production
1942    pub units_of_production: f64,
1943}
1944
1945impl Default for DepreciationMethodDistribution {
1946    fn default() -> Self {
1947        Self {
1948            straight_line: 0.60,
1949            declining_balance: 0.20,
1950            double_declining: 0.10,
1951            sum_of_years: 0.05,
1952            units_of_production: 0.05,
1953        }
1954    }
1955}
1956
1957/// Employee master data configuration.
1958#[derive(Debug, Clone, Serialize, Deserialize)]
1959pub struct EmployeeMasterConfig {
1960    /// Number of employees to generate
1961    #[serde(default = "default_employee_count")]
1962    pub count: usize,
1963    /// Generate organizational hierarchy
1964    #[serde(default = "default_true")]
1965    pub generate_hierarchy: bool,
1966    /// Maximum hierarchy depth
1967    #[serde(default = "default_hierarchy_depth")]
1968    pub max_hierarchy_depth: u8,
1969    /// Average span of control (direct reports per manager)
1970    #[serde(default = "default_span_of_control")]
1971    pub average_span_of_control: f64,
1972    /// Approval limit distribution by job level
1973    #[serde(default)]
1974    pub approval_limits: ApprovalLimitDistribution,
1975    /// Department distribution
1976    #[serde(default)]
1977    pub department_distribution: EmployeeDepartmentDistribution,
1978}
1979
1980fn default_employee_count() -> usize {
1981    1500
1982}
1983
1984fn default_hierarchy_depth() -> u8 {
1985    6
1986}
1987
1988fn default_span_of_control() -> f64 {
1989    5.0
1990}
1991
1992impl Default for EmployeeMasterConfig {
1993    fn default() -> Self {
1994        Self {
1995            count: default_employee_count(),
1996            generate_hierarchy: true,
1997            max_hierarchy_depth: default_hierarchy_depth(),
1998            average_span_of_control: default_span_of_control(),
1999            approval_limits: ApprovalLimitDistribution::default(),
2000            department_distribution: EmployeeDepartmentDistribution::default(),
2001        }
2002    }
2003}
2004
2005/// Approval limit distribution by job level.
2006#[derive(Debug, Clone, Serialize, Deserialize)]
2007pub struct ApprovalLimitDistribution {
2008    /// Staff level approval limit
2009    #[serde(default = "default_staff_limit")]
2010    pub staff: f64,
2011    /// Senior staff approval limit
2012    #[serde(default = "default_senior_limit")]
2013    pub senior: f64,
2014    /// Manager approval limit
2015    #[serde(default = "default_manager_limit")]
2016    pub manager: f64,
2017    /// Director approval limit
2018    #[serde(default = "default_director_limit")]
2019    pub director: f64,
2020    /// VP approval limit
2021    #[serde(default = "default_vp_limit")]
2022    pub vp: f64,
2023    /// Executive approval limit
2024    #[serde(default = "default_executive_limit")]
2025    pub executive: f64,
2026}
2027
2028fn default_staff_limit() -> f64 {
2029    1000.0
2030}
2031fn default_senior_limit() -> f64 {
2032    5000.0
2033}
2034fn default_manager_limit() -> f64 {
2035    25000.0
2036}
2037fn default_director_limit() -> f64 {
2038    100000.0
2039}
2040fn default_vp_limit() -> f64 {
2041    500000.0
2042}
2043fn default_executive_limit() -> f64 {
2044    f64::INFINITY
2045}
2046
2047impl Default for ApprovalLimitDistribution {
2048    fn default() -> Self {
2049        Self {
2050            staff: default_staff_limit(),
2051            senior: default_senior_limit(),
2052            manager: default_manager_limit(),
2053            director: default_director_limit(),
2054            vp: default_vp_limit(),
2055            executive: default_executive_limit(),
2056        }
2057    }
2058}
2059
2060/// Employee distribution across departments.
2061#[derive(Debug, Clone, Serialize, Deserialize)]
2062pub struct EmployeeDepartmentDistribution {
2063    /// Finance and Accounting
2064    pub finance: f64,
2065    /// Procurement
2066    pub procurement: f64,
2067    /// Sales
2068    pub sales: f64,
2069    /// Warehouse and Logistics
2070    pub warehouse: f64,
2071    /// IT
2072    pub it: f64,
2073    /// Human Resources
2074    pub hr: f64,
2075    /// Operations
2076    pub operations: f64,
2077    /// Executive
2078    pub executive: f64,
2079}
2080
2081impl Default for EmployeeDepartmentDistribution {
2082    fn default() -> Self {
2083        Self {
2084            finance: 0.12,
2085            procurement: 0.10,
2086            sales: 0.25,
2087            warehouse: 0.15,
2088            it: 0.10,
2089            hr: 0.05,
2090            operations: 0.20,
2091            executive: 0.03,
2092        }
2093    }
2094}
2095
2096/// Cost center master data configuration.
2097#[derive(Debug, Clone, Serialize, Deserialize)]
2098pub struct CostCenterMasterConfig {
2099    /// Number of cost centers to generate
2100    #[serde(default = "default_cost_center_count")]
2101    pub count: usize,
2102    /// Generate cost center hierarchy
2103    #[serde(default = "default_true")]
2104    pub generate_hierarchy: bool,
2105    /// Maximum hierarchy depth
2106    #[serde(default = "default_cc_hierarchy_depth")]
2107    pub max_hierarchy_depth: u8,
2108}
2109
2110fn default_cost_center_count() -> usize {
2111    50
2112}
2113
2114fn default_cc_hierarchy_depth() -> u8 {
2115    3
2116}
2117
2118impl Default for CostCenterMasterConfig {
2119    fn default() -> Self {
2120        Self {
2121            count: default_cost_center_count(),
2122            generate_hierarchy: true,
2123            max_hierarchy_depth: default_cc_hierarchy_depth(),
2124        }
2125    }
2126}
2127
2128// ============================================================================
2129// Document Flow Configuration
2130// ============================================================================
2131
2132/// Document flow generation configuration.
2133#[derive(Debug, Clone, Serialize, Deserialize)]
2134pub struct DocumentFlowConfig {
2135    /// P2P (Procure-to-Pay) flow configuration
2136    #[serde(default)]
2137    pub p2p: P2PFlowConfig,
2138    /// O2C (Order-to-Cash) flow configuration
2139    #[serde(default)]
2140    pub o2c: O2CFlowConfig,
2141    /// Generate document reference chains
2142    #[serde(default = "default_true")]
2143    pub generate_document_references: bool,
2144    /// Export document flow graph
2145    #[serde(default)]
2146    pub export_flow_graph: bool,
2147}
2148
2149impl Default for DocumentFlowConfig {
2150    fn default() -> Self {
2151        Self {
2152            p2p: P2PFlowConfig::default(),
2153            o2c: O2CFlowConfig::default(),
2154            generate_document_references: true,
2155            export_flow_graph: false,
2156        }
2157    }
2158}
2159
2160/// P2P (Procure-to-Pay) flow configuration.
2161#[derive(Debug, Clone, Serialize, Deserialize)]
2162pub struct P2PFlowConfig {
2163    /// Enable P2P document flow generation
2164    #[serde(default = "default_true")]
2165    pub enabled: bool,
2166    /// Three-way match success rate (PO-GR-Invoice)
2167    #[serde(default = "default_three_way_match_rate")]
2168    pub three_way_match_rate: f64,
2169    /// Rate of partial deliveries
2170    #[serde(default = "default_partial_delivery_rate")]
2171    pub partial_delivery_rate: f64,
2172    /// Rate of price variances between PO and Invoice
2173    #[serde(default = "default_price_variance_rate")]
2174    pub price_variance_rate: f64,
2175    /// Maximum price variance percentage
2176    #[serde(default = "default_max_price_variance")]
2177    pub max_price_variance_percent: f64,
2178    /// Rate of quantity variances between PO/GR and Invoice
2179    #[serde(default = "default_quantity_variance_rate")]
2180    pub quantity_variance_rate: f64,
2181    /// Average days from PO to goods receipt
2182    #[serde(default = "default_po_to_gr_days")]
2183    pub average_po_to_gr_days: u32,
2184    /// Average days from GR to invoice
2185    #[serde(default = "default_gr_to_invoice_days")]
2186    pub average_gr_to_invoice_days: u32,
2187    /// Average days from invoice to payment
2188    #[serde(default = "default_invoice_to_payment_days")]
2189    pub average_invoice_to_payment_days: u32,
2190    /// PO line count distribution
2191    #[serde(default)]
2192    pub line_count_distribution: DocumentLineCountDistribution,
2193    /// Payment behavior configuration
2194    #[serde(default)]
2195    pub payment_behavior: P2PPaymentBehaviorConfig,
2196}
2197
2198fn default_three_way_match_rate() -> f64 {
2199    0.95
2200}
2201
2202fn default_partial_delivery_rate() -> f64 {
2203    0.15
2204}
2205
2206fn default_price_variance_rate() -> f64 {
2207    0.08
2208}
2209
2210fn default_max_price_variance() -> f64 {
2211    0.05
2212}
2213
2214fn default_quantity_variance_rate() -> f64 {
2215    0.05
2216}
2217
2218fn default_po_to_gr_days() -> u32 {
2219    14
2220}
2221
2222fn default_gr_to_invoice_days() -> u32 {
2223    5
2224}
2225
2226fn default_invoice_to_payment_days() -> u32 {
2227    30
2228}
2229
2230impl Default for P2PFlowConfig {
2231    fn default() -> Self {
2232        Self {
2233            enabled: true,
2234            three_way_match_rate: default_three_way_match_rate(),
2235            partial_delivery_rate: default_partial_delivery_rate(),
2236            price_variance_rate: default_price_variance_rate(),
2237            max_price_variance_percent: default_max_price_variance(),
2238            quantity_variance_rate: default_quantity_variance_rate(),
2239            average_po_to_gr_days: default_po_to_gr_days(),
2240            average_gr_to_invoice_days: default_gr_to_invoice_days(),
2241            average_invoice_to_payment_days: default_invoice_to_payment_days(),
2242            line_count_distribution: DocumentLineCountDistribution::default(),
2243            payment_behavior: P2PPaymentBehaviorConfig::default(),
2244        }
2245    }
2246}
2247
2248// ============================================================================
2249// P2P Payment Behavior Configuration
2250// ============================================================================
2251
2252/// P2P payment behavior configuration.
2253#[derive(Debug, Clone, Serialize, Deserialize)]
2254pub struct P2PPaymentBehaviorConfig {
2255    /// Rate of late payments (beyond due date)
2256    #[serde(default = "default_p2p_late_payment_rate")]
2257    pub late_payment_rate: f64,
2258    /// Distribution of late payment days
2259    #[serde(default)]
2260    pub late_payment_days_distribution: LatePaymentDaysDistribution,
2261    /// Rate of partial payments
2262    #[serde(default = "default_p2p_partial_payment_rate")]
2263    pub partial_payment_rate: f64,
2264    /// Rate of payment corrections (NSF, chargebacks, reversals)
2265    #[serde(default = "default_p2p_payment_correction_rate")]
2266    pub payment_correction_rate: f64,
2267}
2268
2269fn default_p2p_late_payment_rate() -> f64 {
2270    0.15
2271}
2272
2273fn default_p2p_partial_payment_rate() -> f64 {
2274    0.05
2275}
2276
2277fn default_p2p_payment_correction_rate() -> f64 {
2278    0.02
2279}
2280
2281impl Default for P2PPaymentBehaviorConfig {
2282    fn default() -> Self {
2283        Self {
2284            late_payment_rate: default_p2p_late_payment_rate(),
2285            late_payment_days_distribution: LatePaymentDaysDistribution::default(),
2286            partial_payment_rate: default_p2p_partial_payment_rate(),
2287            payment_correction_rate: default_p2p_payment_correction_rate(),
2288        }
2289    }
2290}
2291
2292/// Distribution of late payment days for P2P.
2293#[derive(Debug, Clone, Serialize, Deserialize)]
2294pub struct LatePaymentDaysDistribution {
2295    /// 1-7 days late (slightly late)
2296    #[serde(default = "default_slightly_late")]
2297    pub slightly_late_1_to_7: f64,
2298    /// 8-14 days late
2299    #[serde(default = "default_late_8_14")]
2300    pub late_8_to_14: f64,
2301    /// 15-30 days late (very late)
2302    #[serde(default = "default_very_late")]
2303    pub very_late_15_to_30: f64,
2304    /// 31-60 days late (severely late)
2305    #[serde(default = "default_severely_late")]
2306    pub severely_late_31_to_60: f64,
2307    /// Over 60 days late (extremely late)
2308    #[serde(default = "default_extremely_late")]
2309    pub extremely_late_over_60: f64,
2310}
2311
2312fn default_slightly_late() -> f64 {
2313    0.50
2314}
2315
2316fn default_late_8_14() -> f64 {
2317    0.25
2318}
2319
2320fn default_very_late() -> f64 {
2321    0.15
2322}
2323
2324fn default_severely_late() -> f64 {
2325    0.07
2326}
2327
2328fn default_extremely_late() -> f64 {
2329    0.03
2330}
2331
2332impl Default for LatePaymentDaysDistribution {
2333    fn default() -> Self {
2334        Self {
2335            slightly_late_1_to_7: default_slightly_late(),
2336            late_8_to_14: default_late_8_14(),
2337            very_late_15_to_30: default_very_late(),
2338            severely_late_31_to_60: default_severely_late(),
2339            extremely_late_over_60: default_extremely_late(),
2340        }
2341    }
2342}
2343
2344/// O2C (Order-to-Cash) flow configuration.
2345#[derive(Debug, Clone, Serialize, Deserialize)]
2346pub struct O2CFlowConfig {
2347    /// Enable O2C document flow generation
2348    #[serde(default = "default_true")]
2349    pub enabled: bool,
2350    /// Credit check failure rate
2351    #[serde(default = "default_credit_check_failure_rate")]
2352    pub credit_check_failure_rate: f64,
2353    /// Rate of partial shipments
2354    #[serde(default = "default_partial_shipment_rate")]
2355    pub partial_shipment_rate: f64,
2356    /// Rate of returns
2357    #[serde(default = "default_return_rate")]
2358    pub return_rate: f64,
2359    /// Bad debt write-off rate
2360    #[serde(default = "default_bad_debt_rate")]
2361    pub bad_debt_rate: f64,
2362    /// Average days from SO to delivery
2363    #[serde(default = "default_so_to_delivery_days")]
2364    pub average_so_to_delivery_days: u32,
2365    /// Average days from delivery to invoice
2366    #[serde(default = "default_delivery_to_invoice_days")]
2367    pub average_delivery_to_invoice_days: u32,
2368    /// Average days from invoice to receipt
2369    #[serde(default = "default_invoice_to_receipt_days")]
2370    pub average_invoice_to_receipt_days: u32,
2371    /// SO line count distribution
2372    #[serde(default)]
2373    pub line_count_distribution: DocumentLineCountDistribution,
2374    /// Cash discount configuration
2375    #[serde(default)]
2376    pub cash_discount: CashDiscountConfig,
2377    /// Payment behavior configuration
2378    #[serde(default)]
2379    pub payment_behavior: O2CPaymentBehaviorConfig,
2380}
2381
2382fn default_credit_check_failure_rate() -> f64 {
2383    0.02
2384}
2385
2386fn default_partial_shipment_rate() -> f64 {
2387    0.10
2388}
2389
2390fn default_return_rate() -> f64 {
2391    0.03
2392}
2393
2394fn default_bad_debt_rate() -> f64 {
2395    0.01
2396}
2397
2398fn default_so_to_delivery_days() -> u32 {
2399    7
2400}
2401
2402fn default_delivery_to_invoice_days() -> u32 {
2403    1
2404}
2405
2406fn default_invoice_to_receipt_days() -> u32 {
2407    45
2408}
2409
2410impl Default for O2CFlowConfig {
2411    fn default() -> Self {
2412        Self {
2413            enabled: true,
2414            credit_check_failure_rate: default_credit_check_failure_rate(),
2415            partial_shipment_rate: default_partial_shipment_rate(),
2416            return_rate: default_return_rate(),
2417            bad_debt_rate: default_bad_debt_rate(),
2418            average_so_to_delivery_days: default_so_to_delivery_days(),
2419            average_delivery_to_invoice_days: default_delivery_to_invoice_days(),
2420            average_invoice_to_receipt_days: default_invoice_to_receipt_days(),
2421            line_count_distribution: DocumentLineCountDistribution::default(),
2422            cash_discount: CashDiscountConfig::default(),
2423            payment_behavior: O2CPaymentBehaviorConfig::default(),
2424        }
2425    }
2426}
2427
2428// ============================================================================
2429// O2C Payment Behavior Configuration
2430// ============================================================================
2431
2432/// O2C payment behavior configuration.
2433#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2434pub struct O2CPaymentBehaviorConfig {
2435    /// Dunning (Mahnung) configuration
2436    #[serde(default)]
2437    pub dunning: DunningConfig,
2438    /// Partial payment configuration
2439    #[serde(default)]
2440    pub partial_payments: PartialPaymentConfig,
2441    /// Short payment configuration (unauthorized deductions)
2442    #[serde(default)]
2443    pub short_payments: ShortPaymentConfig,
2444    /// On-account payment configuration (unapplied payments)
2445    #[serde(default)]
2446    pub on_account_payments: OnAccountPaymentConfig,
2447    /// Payment correction configuration (NSF, chargebacks)
2448    #[serde(default)]
2449    pub payment_corrections: PaymentCorrectionConfig,
2450}
2451
2452/// Dunning (Mahnungen) configuration for AR collections.
2453#[derive(Debug, Clone, Serialize, Deserialize)]
2454pub struct DunningConfig {
2455    /// Enable dunning process
2456    #[serde(default)]
2457    pub enabled: bool,
2458    /// Days overdue for level 1 dunning (1st reminder)
2459    #[serde(default = "default_dunning_level_1_days")]
2460    pub level_1_days_overdue: u32,
2461    /// Days overdue for level 2 dunning (2nd reminder)
2462    #[serde(default = "default_dunning_level_2_days")]
2463    pub level_2_days_overdue: u32,
2464    /// Days overdue for level 3 dunning (final notice)
2465    #[serde(default = "default_dunning_level_3_days")]
2466    pub level_3_days_overdue: u32,
2467    /// Days overdue for collection handover
2468    #[serde(default = "default_collection_days")]
2469    pub collection_days_overdue: u32,
2470    /// Payment rates after each dunning level
2471    #[serde(default)]
2472    pub payment_after_dunning_rates: DunningPaymentRates,
2473    /// Rate of invoices blocked from dunning (disputes)
2474    #[serde(default = "default_dunning_block_rate")]
2475    pub dunning_block_rate: f64,
2476    /// Interest rate per year for overdue amounts
2477    #[serde(default = "default_dunning_interest_rate")]
2478    pub interest_rate_per_year: f64,
2479    /// Fixed dunning charge per letter
2480    #[serde(default = "default_dunning_charge")]
2481    pub dunning_charge: f64,
2482}
2483
2484fn default_dunning_level_1_days() -> u32 {
2485    14
2486}
2487
2488fn default_dunning_level_2_days() -> u32 {
2489    28
2490}
2491
2492fn default_dunning_level_3_days() -> u32 {
2493    42
2494}
2495
2496fn default_collection_days() -> u32 {
2497    60
2498}
2499
2500fn default_dunning_block_rate() -> f64 {
2501    0.05
2502}
2503
2504fn default_dunning_interest_rate() -> f64 {
2505    0.09
2506}
2507
2508fn default_dunning_charge() -> f64 {
2509    25.0
2510}
2511
2512impl Default for DunningConfig {
2513    fn default() -> Self {
2514        Self {
2515            enabled: false,
2516            level_1_days_overdue: default_dunning_level_1_days(),
2517            level_2_days_overdue: default_dunning_level_2_days(),
2518            level_3_days_overdue: default_dunning_level_3_days(),
2519            collection_days_overdue: default_collection_days(),
2520            payment_after_dunning_rates: DunningPaymentRates::default(),
2521            dunning_block_rate: default_dunning_block_rate(),
2522            interest_rate_per_year: default_dunning_interest_rate(),
2523            dunning_charge: default_dunning_charge(),
2524        }
2525    }
2526}
2527
2528/// Payment rates after each dunning level.
2529#[derive(Debug, Clone, Serialize, Deserialize)]
2530pub struct DunningPaymentRates {
2531    /// Rate that pays after level 1 reminder
2532    #[serde(default = "default_after_level_1")]
2533    pub after_level_1: f64,
2534    /// Rate that pays after level 2 reminder
2535    #[serde(default = "default_after_level_2")]
2536    pub after_level_2: f64,
2537    /// Rate that pays after level 3 final notice
2538    #[serde(default = "default_after_level_3")]
2539    pub after_level_3: f64,
2540    /// Rate that pays during collection
2541    #[serde(default = "default_during_collection")]
2542    pub during_collection: f64,
2543    /// Rate that never pays (becomes bad debt)
2544    #[serde(default = "default_never_pay")]
2545    pub never_pay: f64,
2546}
2547
2548fn default_after_level_1() -> f64 {
2549    0.40
2550}
2551
2552fn default_after_level_2() -> f64 {
2553    0.30
2554}
2555
2556fn default_after_level_3() -> f64 {
2557    0.15
2558}
2559
2560fn default_during_collection() -> f64 {
2561    0.05
2562}
2563
2564fn default_never_pay() -> f64 {
2565    0.10
2566}
2567
2568impl Default for DunningPaymentRates {
2569    fn default() -> Self {
2570        Self {
2571            after_level_1: default_after_level_1(),
2572            after_level_2: default_after_level_2(),
2573            after_level_3: default_after_level_3(),
2574            during_collection: default_during_collection(),
2575            never_pay: default_never_pay(),
2576        }
2577    }
2578}
2579
2580/// Partial payment configuration.
2581#[derive(Debug, Clone, Serialize, Deserialize)]
2582pub struct PartialPaymentConfig {
2583    /// Rate of invoices paid partially
2584    #[serde(default = "default_partial_payment_rate")]
2585    pub rate: f64,
2586    /// Distribution of partial payment percentages
2587    #[serde(default)]
2588    pub percentage_distribution: PartialPaymentPercentageDistribution,
2589    /// Average days until remainder is paid
2590    #[serde(default = "default_avg_days_until_remainder")]
2591    pub avg_days_until_remainder: u32,
2592}
2593
2594fn default_partial_payment_rate() -> f64 {
2595    0.08
2596}
2597
2598fn default_avg_days_until_remainder() -> u32 {
2599    30
2600}
2601
2602impl Default for PartialPaymentConfig {
2603    fn default() -> Self {
2604        Self {
2605            rate: default_partial_payment_rate(),
2606            percentage_distribution: PartialPaymentPercentageDistribution::default(),
2607            avg_days_until_remainder: default_avg_days_until_remainder(),
2608        }
2609    }
2610}
2611
2612/// Distribution of partial payment percentages.
2613#[derive(Debug, Clone, Serialize, Deserialize)]
2614pub struct PartialPaymentPercentageDistribution {
2615    /// Pay 25% of invoice
2616    #[serde(default = "default_partial_25")]
2617    pub pay_25_percent: f64,
2618    /// Pay 50% of invoice
2619    #[serde(default = "default_partial_50")]
2620    pub pay_50_percent: f64,
2621    /// Pay 75% of invoice
2622    #[serde(default = "default_partial_75")]
2623    pub pay_75_percent: f64,
2624    /// Pay random percentage
2625    #[serde(default = "default_partial_random")]
2626    pub pay_random_percent: f64,
2627}
2628
2629fn default_partial_25() -> f64 {
2630    0.15
2631}
2632
2633fn default_partial_50() -> f64 {
2634    0.50
2635}
2636
2637fn default_partial_75() -> f64 {
2638    0.25
2639}
2640
2641fn default_partial_random() -> f64 {
2642    0.10
2643}
2644
2645impl Default for PartialPaymentPercentageDistribution {
2646    fn default() -> Self {
2647        Self {
2648            pay_25_percent: default_partial_25(),
2649            pay_50_percent: default_partial_50(),
2650            pay_75_percent: default_partial_75(),
2651            pay_random_percent: default_partial_random(),
2652        }
2653    }
2654}
2655
2656/// Short payment configuration (unauthorized deductions).
2657#[derive(Debug, Clone, Serialize, Deserialize)]
2658pub struct ShortPaymentConfig {
2659    /// Rate of payments that are short
2660    #[serde(default = "default_short_payment_rate")]
2661    pub rate: f64,
2662    /// Distribution of short payment reasons
2663    #[serde(default)]
2664    pub reason_distribution: ShortPaymentReasonDistribution,
2665    /// Maximum percentage that can be short
2666    #[serde(default = "default_max_short_percent")]
2667    pub max_short_percent: f64,
2668}
2669
2670fn default_short_payment_rate() -> f64 {
2671    0.03
2672}
2673
2674fn default_max_short_percent() -> f64 {
2675    0.10
2676}
2677
2678impl Default for ShortPaymentConfig {
2679    fn default() -> Self {
2680        Self {
2681            rate: default_short_payment_rate(),
2682            reason_distribution: ShortPaymentReasonDistribution::default(),
2683            max_short_percent: default_max_short_percent(),
2684        }
2685    }
2686}
2687
2688/// Distribution of short payment reasons.
2689#[derive(Debug, Clone, Serialize, Deserialize)]
2690pub struct ShortPaymentReasonDistribution {
2691    /// Pricing dispute
2692    #[serde(default = "default_pricing_dispute")]
2693    pub pricing_dispute: f64,
2694    /// Quality issue
2695    #[serde(default = "default_quality_issue")]
2696    pub quality_issue: f64,
2697    /// Quantity discrepancy
2698    #[serde(default = "default_quantity_discrepancy")]
2699    pub quantity_discrepancy: f64,
2700    /// Unauthorized deduction
2701    #[serde(default = "default_unauthorized_deduction")]
2702    pub unauthorized_deduction: f64,
2703    /// Early payment discount taken incorrectly
2704    #[serde(default = "default_incorrect_discount")]
2705    pub incorrect_discount: f64,
2706}
2707
2708fn default_pricing_dispute() -> f64 {
2709    0.30
2710}
2711
2712fn default_quality_issue() -> f64 {
2713    0.20
2714}
2715
2716fn default_quantity_discrepancy() -> f64 {
2717    0.20
2718}
2719
2720fn default_unauthorized_deduction() -> f64 {
2721    0.15
2722}
2723
2724fn default_incorrect_discount() -> f64 {
2725    0.15
2726}
2727
2728impl Default for ShortPaymentReasonDistribution {
2729    fn default() -> Self {
2730        Self {
2731            pricing_dispute: default_pricing_dispute(),
2732            quality_issue: default_quality_issue(),
2733            quantity_discrepancy: default_quantity_discrepancy(),
2734            unauthorized_deduction: default_unauthorized_deduction(),
2735            incorrect_discount: default_incorrect_discount(),
2736        }
2737    }
2738}
2739
2740/// On-account payment configuration (unapplied payments).
2741#[derive(Debug, Clone, Serialize, Deserialize)]
2742pub struct OnAccountPaymentConfig {
2743    /// Rate of payments that are on-account (unapplied)
2744    #[serde(default = "default_on_account_rate")]
2745    pub rate: f64,
2746    /// Average days until on-account payments are applied
2747    #[serde(default = "default_avg_days_until_applied")]
2748    pub avg_days_until_applied: u32,
2749}
2750
2751fn default_on_account_rate() -> f64 {
2752    0.02
2753}
2754
2755fn default_avg_days_until_applied() -> u32 {
2756    14
2757}
2758
2759impl Default for OnAccountPaymentConfig {
2760    fn default() -> Self {
2761        Self {
2762            rate: default_on_account_rate(),
2763            avg_days_until_applied: default_avg_days_until_applied(),
2764        }
2765    }
2766}
2767
2768/// Payment correction configuration.
2769#[derive(Debug, Clone, Serialize, Deserialize)]
2770pub struct PaymentCorrectionConfig {
2771    /// Rate of payments requiring correction
2772    #[serde(default = "default_payment_correction_rate")]
2773    pub rate: f64,
2774    /// Distribution of correction types
2775    #[serde(default)]
2776    pub type_distribution: PaymentCorrectionTypeDistribution,
2777}
2778
2779fn default_payment_correction_rate() -> f64 {
2780    0.02
2781}
2782
2783impl Default for PaymentCorrectionConfig {
2784    fn default() -> Self {
2785        Self {
2786            rate: default_payment_correction_rate(),
2787            type_distribution: PaymentCorrectionTypeDistribution::default(),
2788        }
2789    }
2790}
2791
2792/// Distribution of payment correction types.
2793#[derive(Debug, Clone, Serialize, Deserialize)]
2794pub struct PaymentCorrectionTypeDistribution {
2795    /// NSF (Non-sufficient funds) / bounced check
2796    #[serde(default = "default_nsf_rate")]
2797    pub nsf: f64,
2798    /// Chargeback
2799    #[serde(default = "default_chargeback_rate")]
2800    pub chargeback: f64,
2801    /// Wrong amount applied
2802    #[serde(default = "default_wrong_amount_rate")]
2803    pub wrong_amount: f64,
2804    /// Wrong customer applied
2805    #[serde(default = "default_wrong_customer_rate")]
2806    pub wrong_customer: f64,
2807    /// Duplicate payment
2808    #[serde(default = "default_duplicate_payment_rate")]
2809    pub duplicate_payment: f64,
2810}
2811
2812fn default_nsf_rate() -> f64 {
2813    0.30
2814}
2815
2816fn default_chargeback_rate() -> f64 {
2817    0.20
2818}
2819
2820fn default_wrong_amount_rate() -> f64 {
2821    0.20
2822}
2823
2824fn default_wrong_customer_rate() -> f64 {
2825    0.15
2826}
2827
2828fn default_duplicate_payment_rate() -> f64 {
2829    0.15
2830}
2831
2832impl Default for PaymentCorrectionTypeDistribution {
2833    fn default() -> Self {
2834        Self {
2835            nsf: default_nsf_rate(),
2836            chargeback: default_chargeback_rate(),
2837            wrong_amount: default_wrong_amount_rate(),
2838            wrong_customer: default_wrong_customer_rate(),
2839            duplicate_payment: default_duplicate_payment_rate(),
2840        }
2841    }
2842}
2843
2844/// Document line count distribution.
2845#[derive(Debug, Clone, Serialize, Deserialize)]
2846pub struct DocumentLineCountDistribution {
2847    /// Minimum number of lines
2848    #[serde(default = "default_min_lines")]
2849    pub min_lines: u32,
2850    /// Maximum number of lines
2851    #[serde(default = "default_max_lines")]
2852    pub max_lines: u32,
2853    /// Most common line count (mode)
2854    #[serde(default = "default_mode_lines")]
2855    pub mode_lines: u32,
2856}
2857
2858fn default_min_lines() -> u32 {
2859    1
2860}
2861
2862fn default_max_lines() -> u32 {
2863    20
2864}
2865
2866fn default_mode_lines() -> u32 {
2867    3
2868}
2869
2870impl Default for DocumentLineCountDistribution {
2871    fn default() -> Self {
2872        Self {
2873            min_lines: default_min_lines(),
2874            max_lines: default_max_lines(),
2875            mode_lines: default_mode_lines(),
2876        }
2877    }
2878}
2879
2880/// Cash discount configuration.
2881#[derive(Debug, Clone, Serialize, Deserialize)]
2882pub struct CashDiscountConfig {
2883    /// Percentage of invoices eligible for cash discount
2884    #[serde(default = "default_discount_eligible_rate")]
2885    pub eligible_rate: f64,
2886    /// Rate at which customers take the discount
2887    #[serde(default = "default_discount_taken_rate")]
2888    pub taken_rate: f64,
2889    /// Standard discount percentage
2890    #[serde(default = "default_discount_percent")]
2891    pub discount_percent: f64,
2892    /// Days within which discount must be taken
2893    #[serde(default = "default_discount_days")]
2894    pub discount_days: u32,
2895}
2896
2897fn default_discount_eligible_rate() -> f64 {
2898    0.30
2899}
2900
2901fn default_discount_taken_rate() -> f64 {
2902    0.60
2903}
2904
2905fn default_discount_percent() -> f64 {
2906    0.02
2907}
2908
2909fn default_discount_days() -> u32 {
2910    10
2911}
2912
2913impl Default for CashDiscountConfig {
2914    fn default() -> Self {
2915        Self {
2916            eligible_rate: default_discount_eligible_rate(),
2917            taken_rate: default_discount_taken_rate(),
2918            discount_percent: default_discount_percent(),
2919            discount_days: default_discount_days(),
2920        }
2921    }
2922}
2923
2924// ============================================================================
2925// Intercompany Configuration
2926// ============================================================================
2927
2928/// Intercompany transaction configuration.
2929#[derive(Debug, Clone, Serialize, Deserialize)]
2930pub struct IntercompanyConfig {
2931    /// Enable intercompany transaction generation
2932    #[serde(default)]
2933    pub enabled: bool,
2934    /// Rate of transactions that are intercompany
2935    #[serde(default = "default_ic_transaction_rate")]
2936    pub ic_transaction_rate: f64,
2937    /// Transfer pricing method
2938    #[serde(default)]
2939    pub transfer_pricing_method: TransferPricingMethod,
2940    /// Transfer pricing markup percentage (for cost-plus)
2941    #[serde(default = "default_markup_percent")]
2942    pub markup_percent: f64,
2943    /// Generate matched IC pairs (offsetting entries)
2944    #[serde(default = "default_true")]
2945    pub generate_matched_pairs: bool,
2946    /// IC transaction type distribution
2947    #[serde(default)]
2948    pub transaction_type_distribution: ICTransactionTypeDistribution,
2949    /// Generate elimination entries for consolidation
2950    #[serde(default)]
2951    pub generate_eliminations: bool,
2952}
2953
2954fn default_ic_transaction_rate() -> f64 {
2955    0.15
2956}
2957
2958fn default_markup_percent() -> f64 {
2959    0.05
2960}
2961
2962impl Default for IntercompanyConfig {
2963    fn default() -> Self {
2964        Self {
2965            enabled: false,
2966            ic_transaction_rate: default_ic_transaction_rate(),
2967            transfer_pricing_method: TransferPricingMethod::default(),
2968            markup_percent: default_markup_percent(),
2969            generate_matched_pairs: true,
2970            transaction_type_distribution: ICTransactionTypeDistribution::default(),
2971            generate_eliminations: false,
2972        }
2973    }
2974}
2975
2976/// Transfer pricing method.
2977#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2978#[serde(rename_all = "snake_case")]
2979pub enum TransferPricingMethod {
2980    /// Cost plus a markup
2981    #[default]
2982    CostPlus,
2983    /// Comparable uncontrolled price
2984    ComparableUncontrolled,
2985    /// Resale price method
2986    ResalePrice,
2987    /// Transactional net margin method
2988    TransactionalNetMargin,
2989    /// Profit split method
2990    ProfitSplit,
2991}
2992
2993/// IC transaction type distribution.
2994#[derive(Debug, Clone, Serialize, Deserialize)]
2995pub struct ICTransactionTypeDistribution {
2996    /// Goods sales between entities
2997    pub goods_sale: f64,
2998    /// Services provided
2999    pub service_provided: f64,
3000    /// Intercompany loans
3001    pub loan: f64,
3002    /// Dividends
3003    pub dividend: f64,
3004    /// Management fees
3005    pub management_fee: f64,
3006    /// Royalties
3007    pub royalty: f64,
3008    /// Cost sharing
3009    pub cost_sharing: f64,
3010}
3011
3012impl Default for ICTransactionTypeDistribution {
3013    fn default() -> Self {
3014        Self {
3015            goods_sale: 0.35,
3016            service_provided: 0.20,
3017            loan: 0.10,
3018            dividend: 0.05,
3019            management_fee: 0.15,
3020            royalty: 0.10,
3021            cost_sharing: 0.05,
3022        }
3023    }
3024}
3025
3026// ============================================================================
3027// Balance Configuration
3028// ============================================================================
3029
3030/// Balance and trial balance configuration.
3031#[derive(Debug, Clone, Serialize, Deserialize)]
3032pub struct BalanceConfig {
3033    /// Generate opening balances
3034    #[serde(default)]
3035    pub generate_opening_balances: bool,
3036    /// Generate trial balances
3037    #[serde(default = "default_true")]
3038    pub generate_trial_balances: bool,
3039    /// Target gross margin (for revenue/COGS coherence)
3040    #[serde(default = "default_gross_margin")]
3041    pub target_gross_margin: f64,
3042    /// Target DSO (Days Sales Outstanding)
3043    #[serde(default = "default_dso")]
3044    pub target_dso_days: u32,
3045    /// Target DPO (Days Payable Outstanding)
3046    #[serde(default = "default_dpo")]
3047    pub target_dpo_days: u32,
3048    /// Target current ratio
3049    #[serde(default = "default_current_ratio")]
3050    pub target_current_ratio: f64,
3051    /// Target debt-to-equity ratio
3052    #[serde(default = "default_debt_equity")]
3053    pub target_debt_to_equity: f64,
3054    /// Validate balance sheet equation (A = L + E)
3055    #[serde(default = "default_true")]
3056    pub validate_balance_equation: bool,
3057    /// Reconcile subledgers to GL control accounts
3058    #[serde(default = "default_true")]
3059    pub reconcile_subledgers: bool,
3060}
3061
3062fn default_gross_margin() -> f64 {
3063    0.35
3064}
3065
3066fn default_dso() -> u32 {
3067    45
3068}
3069
3070fn default_dpo() -> u32 {
3071    30
3072}
3073
3074fn default_current_ratio() -> f64 {
3075    1.5
3076}
3077
3078fn default_debt_equity() -> f64 {
3079    0.5
3080}
3081
3082impl Default for BalanceConfig {
3083    fn default() -> Self {
3084        Self {
3085            generate_opening_balances: false,
3086            generate_trial_balances: true,
3087            target_gross_margin: default_gross_margin(),
3088            target_dso_days: default_dso(),
3089            target_dpo_days: default_dpo(),
3090            target_current_ratio: default_current_ratio(),
3091            target_debt_to_equity: default_debt_equity(),
3092            validate_balance_equation: true,
3093            reconcile_subledgers: true,
3094        }
3095    }
3096}
3097
3098// ==========================================================================
3099// OCPM (Object-Centric Process Mining) Configuration
3100// ==========================================================================
3101
3102/// OCPM (Object-Centric Process Mining) configuration.
3103///
3104/// Controls generation of OCEL 2.0 compatible event logs with
3105/// many-to-many event-to-object relationships.
3106#[derive(Debug, Clone, Serialize, Deserialize)]
3107pub struct OcpmConfig {
3108    /// Enable OCPM event log generation
3109    #[serde(default)]
3110    pub enabled: bool,
3111
3112    /// Generate lifecycle events (Start/Complete pairs vs atomic events)
3113    #[serde(default = "default_true")]
3114    pub generate_lifecycle_events: bool,
3115
3116    /// Include object-to-object relationships in output
3117    #[serde(default = "default_true")]
3118    pub include_object_relationships: bool,
3119
3120    /// Compute and export process variants
3121    #[serde(default = "default_true")]
3122    pub compute_variants: bool,
3123
3124    /// Maximum variants to track (0 = unlimited)
3125    #[serde(default)]
3126    pub max_variants: usize,
3127
3128    /// P2P process configuration
3129    #[serde(default)]
3130    pub p2p_process: OcpmProcessConfig,
3131
3132    /// O2C process configuration
3133    #[serde(default)]
3134    pub o2c_process: OcpmProcessConfig,
3135
3136    /// Output format configuration
3137    #[serde(default)]
3138    pub output: OcpmOutputConfig,
3139}
3140
3141impl Default for OcpmConfig {
3142    fn default() -> Self {
3143        Self {
3144            enabled: false,
3145            generate_lifecycle_events: true,
3146            include_object_relationships: true,
3147            compute_variants: true,
3148            max_variants: 0,
3149            p2p_process: OcpmProcessConfig::default(),
3150            o2c_process: OcpmProcessConfig::default(),
3151            output: OcpmOutputConfig::default(),
3152        }
3153    }
3154}
3155
3156/// Process-specific OCPM configuration.
3157#[derive(Debug, Clone, Serialize, Deserialize)]
3158pub struct OcpmProcessConfig {
3159    /// Rework probability (0.0-1.0)
3160    #[serde(default = "default_rework_probability")]
3161    pub rework_probability: f64,
3162
3163    /// Skip step probability (0.0-1.0)
3164    #[serde(default = "default_skip_probability")]
3165    pub skip_step_probability: f64,
3166
3167    /// Out-of-order step probability (0.0-1.0)
3168    #[serde(default = "default_out_of_order_probability")]
3169    pub out_of_order_probability: f64,
3170}
3171
3172fn default_rework_probability() -> f64 {
3173    0.05
3174}
3175
3176fn default_skip_probability() -> f64 {
3177    0.02
3178}
3179
3180fn default_out_of_order_probability() -> f64 {
3181    0.03
3182}
3183
3184impl Default for OcpmProcessConfig {
3185    fn default() -> Self {
3186        Self {
3187            rework_probability: default_rework_probability(),
3188            skip_step_probability: default_skip_probability(),
3189            out_of_order_probability: default_out_of_order_probability(),
3190        }
3191    }
3192}
3193
3194/// OCPM output format configuration.
3195#[derive(Debug, Clone, Serialize, Deserialize)]
3196pub struct OcpmOutputConfig {
3197    /// Export OCEL 2.0 JSON format
3198    #[serde(default = "default_true")]
3199    pub ocel_json: bool,
3200
3201    /// Export OCEL 2.0 XML format
3202    #[serde(default)]
3203    pub ocel_xml: bool,
3204
3205    /// Export flattened CSV for each object type
3206    #[serde(default = "default_true")]
3207    pub flattened_csv: bool,
3208
3209    /// Export event-object relationship table
3210    #[serde(default = "default_true")]
3211    pub event_object_csv: bool,
3212
3213    /// Export object-object relationship table
3214    #[serde(default = "default_true")]
3215    pub object_relationship_csv: bool,
3216
3217    /// Export process variants summary
3218    #[serde(default = "default_true")]
3219    pub variants_csv: bool,
3220}
3221
3222impl Default for OcpmOutputConfig {
3223    fn default() -> Self {
3224        Self {
3225            ocel_json: true,
3226            ocel_xml: false,
3227            flattened_csv: true,
3228            event_object_csv: true,
3229            object_relationship_csv: true,
3230            variants_csv: true,
3231        }
3232    }
3233}
3234
3235/// Audit engagement and workpaper generation configuration.
3236#[derive(Debug, Clone, Serialize, Deserialize)]
3237pub struct AuditGenerationConfig {
3238    /// Enable audit engagement generation
3239    #[serde(default)]
3240    pub enabled: bool,
3241
3242    /// Generate engagement documents and workpapers
3243    #[serde(default = "default_true")]
3244    pub generate_workpapers: bool,
3245
3246    /// Default engagement type distribution
3247    #[serde(default)]
3248    pub engagement_types: AuditEngagementTypesConfig,
3249
3250    /// Workpaper configuration
3251    #[serde(default)]
3252    pub workpapers: WorkpaperConfig,
3253
3254    /// Team configuration
3255    #[serde(default)]
3256    pub team: AuditTeamConfig,
3257
3258    /// Review workflow configuration
3259    #[serde(default)]
3260    pub review: ReviewWorkflowConfig,
3261}
3262
3263impl Default for AuditGenerationConfig {
3264    fn default() -> Self {
3265        Self {
3266            enabled: false,
3267            generate_workpapers: true,
3268            engagement_types: AuditEngagementTypesConfig::default(),
3269            workpapers: WorkpaperConfig::default(),
3270            team: AuditTeamConfig::default(),
3271            review: ReviewWorkflowConfig::default(),
3272        }
3273    }
3274}
3275
3276/// Engagement type distribution configuration.
3277#[derive(Debug, Clone, Serialize, Deserialize)]
3278pub struct AuditEngagementTypesConfig {
3279    /// Financial statement audit probability
3280    #[serde(default = "default_financial_audit_prob")]
3281    pub financial_statement: f64,
3282    /// SOX/ICFR audit probability
3283    #[serde(default = "default_sox_audit_prob")]
3284    pub sox_icfr: f64,
3285    /// Integrated audit probability
3286    #[serde(default = "default_integrated_audit_prob")]
3287    pub integrated: f64,
3288    /// Review engagement probability
3289    #[serde(default = "default_review_prob")]
3290    pub review: f64,
3291    /// Agreed-upon procedures probability
3292    #[serde(default = "default_aup_prob")]
3293    pub agreed_upon_procedures: f64,
3294}
3295
3296fn default_financial_audit_prob() -> f64 {
3297    0.40
3298}
3299fn default_sox_audit_prob() -> f64 {
3300    0.20
3301}
3302fn default_integrated_audit_prob() -> f64 {
3303    0.25
3304}
3305fn default_review_prob() -> f64 {
3306    0.10
3307}
3308fn default_aup_prob() -> f64 {
3309    0.05
3310}
3311
3312impl Default for AuditEngagementTypesConfig {
3313    fn default() -> Self {
3314        Self {
3315            financial_statement: default_financial_audit_prob(),
3316            sox_icfr: default_sox_audit_prob(),
3317            integrated: default_integrated_audit_prob(),
3318            review: default_review_prob(),
3319            agreed_upon_procedures: default_aup_prob(),
3320        }
3321    }
3322}
3323
3324/// Workpaper generation configuration.
3325#[derive(Debug, Clone, Serialize, Deserialize)]
3326pub struct WorkpaperConfig {
3327    /// Average workpapers per engagement phase
3328    #[serde(default = "default_workpapers_per_phase")]
3329    pub average_per_phase: usize,
3330
3331    /// Include ISA compliance references
3332    #[serde(default = "default_true")]
3333    pub include_isa_references: bool,
3334
3335    /// Generate sample details
3336    #[serde(default = "default_true")]
3337    pub include_sample_details: bool,
3338
3339    /// Include cross-references between workpapers
3340    #[serde(default = "default_true")]
3341    pub include_cross_references: bool,
3342
3343    /// Sampling configuration
3344    #[serde(default)]
3345    pub sampling: SamplingConfig,
3346}
3347
3348fn default_workpapers_per_phase() -> usize {
3349    5
3350}
3351
3352impl Default for WorkpaperConfig {
3353    fn default() -> Self {
3354        Self {
3355            average_per_phase: default_workpapers_per_phase(),
3356            include_isa_references: true,
3357            include_sample_details: true,
3358            include_cross_references: true,
3359            sampling: SamplingConfig::default(),
3360        }
3361    }
3362}
3363
3364/// Sampling method configuration.
3365#[derive(Debug, Clone, Serialize, Deserialize)]
3366pub struct SamplingConfig {
3367    /// Statistical sampling rate (0.0-1.0)
3368    #[serde(default = "default_statistical_rate")]
3369    pub statistical_rate: f64,
3370    /// Judgmental sampling rate (0.0-1.0)
3371    #[serde(default = "default_judgmental_rate")]
3372    pub judgmental_rate: f64,
3373    /// Haphazard sampling rate (0.0-1.0)
3374    #[serde(default = "default_haphazard_rate")]
3375    pub haphazard_rate: f64,
3376    /// 100% examination rate (0.0-1.0)
3377    #[serde(default = "default_complete_examination_rate")]
3378    pub complete_examination_rate: f64,
3379}
3380
3381fn default_statistical_rate() -> f64 {
3382    0.40
3383}
3384fn default_judgmental_rate() -> f64 {
3385    0.30
3386}
3387fn default_haphazard_rate() -> f64 {
3388    0.20
3389}
3390fn default_complete_examination_rate() -> f64 {
3391    0.10
3392}
3393
3394impl Default for SamplingConfig {
3395    fn default() -> Self {
3396        Self {
3397            statistical_rate: default_statistical_rate(),
3398            judgmental_rate: default_judgmental_rate(),
3399            haphazard_rate: default_haphazard_rate(),
3400            complete_examination_rate: default_complete_examination_rate(),
3401        }
3402    }
3403}
3404
3405/// Audit team configuration.
3406#[derive(Debug, Clone, Serialize, Deserialize)]
3407pub struct AuditTeamConfig {
3408    /// Minimum team size
3409    #[serde(default = "default_min_team_size")]
3410    pub min_team_size: usize,
3411    /// Maximum team size
3412    #[serde(default = "default_max_team_size")]
3413    pub max_team_size: usize,
3414    /// Probability of having a specialist on the team
3415    #[serde(default = "default_specialist_probability")]
3416    pub specialist_probability: f64,
3417}
3418
3419fn default_min_team_size() -> usize {
3420    3
3421}
3422fn default_max_team_size() -> usize {
3423    8
3424}
3425fn default_specialist_probability() -> f64 {
3426    0.30
3427}
3428
3429impl Default for AuditTeamConfig {
3430    fn default() -> Self {
3431        Self {
3432            min_team_size: default_min_team_size(),
3433            max_team_size: default_max_team_size(),
3434            specialist_probability: default_specialist_probability(),
3435        }
3436    }
3437}
3438
3439/// Review workflow configuration.
3440#[derive(Debug, Clone, Serialize, Deserialize)]
3441pub struct ReviewWorkflowConfig {
3442    /// Average days between preparer completion and first review
3443    #[serde(default = "default_review_delay_days")]
3444    pub average_review_delay_days: u32,
3445    /// Probability of review notes requiring rework
3446    #[serde(default = "default_rework_probability_review")]
3447    pub rework_probability: f64,
3448    /// Require partner sign-off for all workpapers
3449    #[serde(default = "default_true")]
3450    pub require_partner_signoff: bool,
3451}
3452
3453fn default_review_delay_days() -> u32 {
3454    2
3455}
3456fn default_rework_probability_review() -> f64 {
3457    0.15
3458}
3459
3460impl Default for ReviewWorkflowConfig {
3461    fn default() -> Self {
3462        Self {
3463            average_review_delay_days: default_review_delay_days(),
3464            rework_probability: default_rework_probability_review(),
3465            require_partner_signoff: true,
3466        }
3467    }
3468}
3469
3470// =============================================================================
3471// Data Quality Configuration
3472// =============================================================================
3473
3474/// Data quality variation settings for realistic flakiness injection.
3475#[derive(Debug, Clone, Serialize, Deserialize)]
3476pub struct DataQualitySchemaConfig {
3477    /// Enable data quality variations
3478    #[serde(default)]
3479    pub enabled: bool,
3480    /// Preset to use (overrides individual settings if set)
3481    #[serde(default)]
3482    pub preset: DataQualityPreset,
3483    /// Missing value injection settings
3484    #[serde(default)]
3485    pub missing_values: MissingValuesSchemaConfig,
3486    /// Typo injection settings
3487    #[serde(default)]
3488    pub typos: TypoSchemaConfig,
3489    /// Format variation settings
3490    #[serde(default)]
3491    pub format_variations: FormatVariationSchemaConfig,
3492    /// Duplicate injection settings
3493    #[serde(default)]
3494    pub duplicates: DuplicateSchemaConfig,
3495    /// Encoding issue settings
3496    #[serde(default)]
3497    pub encoding_issues: EncodingIssueSchemaConfig,
3498    /// Generate quality issue labels for ML training
3499    #[serde(default)]
3500    pub generate_labels: bool,
3501    /// Per-sink quality profiles (different settings for CSV vs JSON etc.)
3502    #[serde(default)]
3503    pub sink_profiles: SinkQualityProfiles,
3504}
3505
3506impl Default for DataQualitySchemaConfig {
3507    fn default() -> Self {
3508        Self {
3509            enabled: false,
3510            preset: DataQualityPreset::None,
3511            missing_values: MissingValuesSchemaConfig::default(),
3512            typos: TypoSchemaConfig::default(),
3513            format_variations: FormatVariationSchemaConfig::default(),
3514            duplicates: DuplicateSchemaConfig::default(),
3515            encoding_issues: EncodingIssueSchemaConfig::default(),
3516            generate_labels: true,
3517            sink_profiles: SinkQualityProfiles::default(),
3518        }
3519    }
3520}
3521
3522impl DataQualitySchemaConfig {
3523    /// Creates a config for a specific preset profile.
3524    pub fn with_preset(preset: DataQualityPreset) -> Self {
3525        let mut config = Self {
3526            preset,
3527            ..Default::default()
3528        };
3529        config.apply_preset();
3530        config
3531    }
3532
3533    /// Applies the preset settings to the individual configuration fields.
3534    /// Call this after deserializing if preset is not Custom or None.
3535    pub fn apply_preset(&mut self) {
3536        if !self.preset.overrides_settings() {
3537            return;
3538        }
3539
3540        self.enabled = true;
3541
3542        // Missing values
3543        self.missing_values.enabled = self.preset.missing_rate() > 0.0;
3544        self.missing_values.rate = self.preset.missing_rate();
3545
3546        // Typos
3547        self.typos.enabled = self.preset.typo_rate() > 0.0;
3548        self.typos.char_error_rate = self.preset.typo_rate();
3549
3550        // Duplicates
3551        self.duplicates.enabled = self.preset.duplicate_rate() > 0.0;
3552        self.duplicates.exact_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
3553        self.duplicates.near_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
3554        self.duplicates.fuzzy_duplicate_ratio = self.preset.duplicate_rate() * 0.2;
3555
3556        // Format variations
3557        self.format_variations.enabled = self.preset.format_variations_enabled();
3558
3559        // Encoding issues
3560        self.encoding_issues.enabled = self.preset.encoding_issues_enabled();
3561        self.encoding_issues.rate = self.preset.encoding_issue_rate();
3562
3563        // OCR errors for typos in legacy preset
3564        if self.preset.ocr_errors_enabled() {
3565            self.typos.type_weights.ocr_errors = 0.3;
3566        }
3567    }
3568
3569    /// Returns the effective missing value rate (considering preset).
3570    pub fn effective_missing_rate(&self) -> f64 {
3571        if self.preset.overrides_settings() {
3572            self.preset.missing_rate()
3573        } else {
3574            self.missing_values.rate
3575        }
3576    }
3577
3578    /// Returns the effective typo rate (considering preset).
3579    pub fn effective_typo_rate(&self) -> f64 {
3580        if self.preset.overrides_settings() {
3581            self.preset.typo_rate()
3582        } else {
3583            self.typos.char_error_rate
3584        }
3585    }
3586
3587    /// Returns the effective duplicate rate (considering preset).
3588    pub fn effective_duplicate_rate(&self) -> f64 {
3589        if self.preset.overrides_settings() {
3590            self.preset.duplicate_rate()
3591        } else {
3592            self.duplicates.exact_duplicate_ratio
3593                + self.duplicates.near_duplicate_ratio
3594                + self.duplicates.fuzzy_duplicate_ratio
3595        }
3596    }
3597
3598    /// Creates a clean profile config.
3599    pub fn clean() -> Self {
3600        Self::with_preset(DataQualityPreset::Clean)
3601    }
3602
3603    /// Creates a noisy profile config.
3604    pub fn noisy() -> Self {
3605        Self::with_preset(DataQualityPreset::Noisy)
3606    }
3607
3608    /// Creates a legacy profile config.
3609    pub fn legacy() -> Self {
3610        Self::with_preset(DataQualityPreset::Legacy)
3611    }
3612}
3613
3614/// Preset configurations for common data quality scenarios.
3615#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
3616#[serde(rename_all = "snake_case")]
3617pub enum DataQualityPreset {
3618    /// No data quality variations (clean data)
3619    #[default]
3620    None,
3621    /// Minimal variations (very clean data with rare issues)
3622    Minimal,
3623    /// Normal variations (realistic enterprise data quality)
3624    Normal,
3625    /// High variations (messy data for stress testing)
3626    High,
3627    /// Custom (use individual settings)
3628    Custom,
3629
3630    // ========================================
3631    // ML-Oriented Profiles (Phase 2.1)
3632    // ========================================
3633    /// Clean profile for ML training - minimal data quality issues
3634    /// Missing: 0.1%, Typos: 0.05%, Duplicates: 0%, Format: None
3635    Clean,
3636    /// Noisy profile simulating typical production data issues
3637    /// Missing: 5%, Typos: 2%, Duplicates: 1%, Format: Medium
3638    Noisy,
3639    /// Legacy profile simulating migrated/OCR'd historical data
3640    /// Missing: 10%, Typos: 5%, Duplicates: 3%, Format: Heavy + OCR
3641    Legacy,
3642}
3643
3644impl DataQualityPreset {
3645    /// Returns the missing value rate for this preset.
3646    pub fn missing_rate(&self) -> f64 {
3647        match self {
3648            DataQualityPreset::None => 0.0,
3649            DataQualityPreset::Minimal => 0.005,
3650            DataQualityPreset::Normal => 0.02,
3651            DataQualityPreset::High => 0.08,
3652            DataQualityPreset::Custom => 0.01, // Use config value
3653            DataQualityPreset::Clean => 0.001,
3654            DataQualityPreset::Noisy => 0.05,
3655            DataQualityPreset::Legacy => 0.10,
3656        }
3657    }
3658
3659    /// Returns the typo rate for this preset.
3660    pub fn typo_rate(&self) -> f64 {
3661        match self {
3662            DataQualityPreset::None => 0.0,
3663            DataQualityPreset::Minimal => 0.0005,
3664            DataQualityPreset::Normal => 0.002,
3665            DataQualityPreset::High => 0.01,
3666            DataQualityPreset::Custom => 0.001, // Use config value
3667            DataQualityPreset::Clean => 0.0005,
3668            DataQualityPreset::Noisy => 0.02,
3669            DataQualityPreset::Legacy => 0.05,
3670        }
3671    }
3672
3673    /// Returns the duplicate rate for this preset.
3674    pub fn duplicate_rate(&self) -> f64 {
3675        match self {
3676            DataQualityPreset::None => 0.0,
3677            DataQualityPreset::Minimal => 0.001,
3678            DataQualityPreset::Normal => 0.005,
3679            DataQualityPreset::High => 0.02,
3680            DataQualityPreset::Custom => 0.0, // Use config value
3681            DataQualityPreset::Clean => 0.0,
3682            DataQualityPreset::Noisy => 0.01,
3683            DataQualityPreset::Legacy => 0.03,
3684        }
3685    }
3686
3687    /// Returns whether format variations are enabled for this preset.
3688    pub fn format_variations_enabled(&self) -> bool {
3689        match self {
3690            DataQualityPreset::None | DataQualityPreset::Clean => false,
3691            DataQualityPreset::Minimal => true,
3692            DataQualityPreset::Normal => true,
3693            DataQualityPreset::High => true,
3694            DataQualityPreset::Custom => true,
3695            DataQualityPreset::Noisy => true,
3696            DataQualityPreset::Legacy => true,
3697        }
3698    }
3699
3700    /// Returns whether OCR-style errors are enabled for this preset.
3701    pub fn ocr_errors_enabled(&self) -> bool {
3702        matches!(self, DataQualityPreset::Legacy | DataQualityPreset::High)
3703    }
3704
3705    /// Returns whether encoding issues are enabled for this preset.
3706    pub fn encoding_issues_enabled(&self) -> bool {
3707        matches!(
3708            self,
3709            DataQualityPreset::Legacy | DataQualityPreset::High | DataQualityPreset::Noisy
3710        )
3711    }
3712
3713    /// Returns the encoding issue rate for this preset.
3714    pub fn encoding_issue_rate(&self) -> f64 {
3715        match self {
3716            DataQualityPreset::None | DataQualityPreset::Clean | DataQualityPreset::Minimal => 0.0,
3717            DataQualityPreset::Normal => 0.002,
3718            DataQualityPreset::High => 0.01,
3719            DataQualityPreset::Custom => 0.0,
3720            DataQualityPreset::Noisy => 0.005,
3721            DataQualityPreset::Legacy => 0.02,
3722        }
3723    }
3724
3725    /// Returns true if this preset overrides individual settings.
3726    pub fn overrides_settings(&self) -> bool {
3727        !matches!(self, DataQualityPreset::Custom | DataQualityPreset::None)
3728    }
3729
3730    /// Returns a human-readable description of this preset.
3731    pub fn description(&self) -> &'static str {
3732        match self {
3733            DataQualityPreset::None => "No data quality issues (pristine data)",
3734            DataQualityPreset::Minimal => "Very rare data quality issues",
3735            DataQualityPreset::Normal => "Realistic enterprise data quality",
3736            DataQualityPreset::High => "Messy data for stress testing",
3737            DataQualityPreset::Custom => "Custom settings from configuration",
3738            DataQualityPreset::Clean => "ML-ready clean data with minimal issues",
3739            DataQualityPreset::Noisy => "Typical production data with moderate issues",
3740            DataQualityPreset::Legacy => "Legacy/migrated data with heavy issues and OCR errors",
3741        }
3742    }
3743}
3744
3745/// Missing value injection configuration.
3746#[derive(Debug, Clone, Serialize, Deserialize)]
3747pub struct MissingValuesSchemaConfig {
3748    /// Enable missing value injection
3749    #[serde(default)]
3750    pub enabled: bool,
3751    /// Global missing rate (0.0 to 1.0)
3752    #[serde(default = "default_missing_rate")]
3753    pub rate: f64,
3754    /// Missing value strategy
3755    #[serde(default)]
3756    pub strategy: MissingValueStrategy,
3757    /// Field-specific rates (field name -> rate)
3758    #[serde(default)]
3759    pub field_rates: std::collections::HashMap<String, f64>,
3760    /// Fields that should never have missing values
3761    #[serde(default)]
3762    pub protected_fields: Vec<String>,
3763}
3764
3765fn default_missing_rate() -> f64 {
3766    0.01
3767}
3768
3769impl Default for MissingValuesSchemaConfig {
3770    fn default() -> Self {
3771        Self {
3772            enabled: false,
3773            rate: default_missing_rate(),
3774            strategy: MissingValueStrategy::Mcar,
3775            field_rates: std::collections::HashMap::new(),
3776            protected_fields: vec![
3777                "document_id".to_string(),
3778                "company_code".to_string(),
3779                "posting_date".to_string(),
3780            ],
3781        }
3782    }
3783}
3784
3785/// Missing value strategy types.
3786#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
3787#[serde(rename_all = "snake_case")]
3788pub enum MissingValueStrategy {
3789    /// Missing Completely At Random - equal probability for all values
3790    #[default]
3791    Mcar,
3792    /// Missing At Random - depends on other observed values
3793    Mar,
3794    /// Missing Not At Random - depends on the value itself
3795    Mnar,
3796    /// Systematic - entire field groups missing together
3797    Systematic,
3798}
3799
3800/// Typo injection configuration.
3801#[derive(Debug, Clone, Serialize, Deserialize)]
3802pub struct TypoSchemaConfig {
3803    /// Enable typo injection
3804    #[serde(default)]
3805    pub enabled: bool,
3806    /// Character error rate (per character, not per field)
3807    #[serde(default = "default_typo_rate")]
3808    pub char_error_rate: f64,
3809    /// Typo type weights
3810    #[serde(default)]
3811    pub type_weights: TypoTypeWeights,
3812    /// Fields that should never have typos
3813    #[serde(default)]
3814    pub protected_fields: Vec<String>,
3815}
3816
3817fn default_typo_rate() -> f64 {
3818    0.001
3819}
3820
3821impl Default for TypoSchemaConfig {
3822    fn default() -> Self {
3823        Self {
3824            enabled: false,
3825            char_error_rate: default_typo_rate(),
3826            type_weights: TypoTypeWeights::default(),
3827            protected_fields: vec![
3828                "document_id".to_string(),
3829                "gl_account".to_string(),
3830                "company_code".to_string(),
3831            ],
3832        }
3833    }
3834}
3835
3836/// Weights for different typo types.
3837#[derive(Debug, Clone, Serialize, Deserialize)]
3838pub struct TypoTypeWeights {
3839    /// Keyboard-adjacent substitution (e.g., 'a' -> 's')
3840    #[serde(default = "default_substitution_weight")]
3841    pub substitution: f64,
3842    /// Adjacent character transposition (e.g., 'ab' -> 'ba')
3843    #[serde(default = "default_transposition_weight")]
3844    pub transposition: f64,
3845    /// Character insertion
3846    #[serde(default = "default_insertion_weight")]
3847    pub insertion: f64,
3848    /// Character deletion
3849    #[serde(default = "default_deletion_weight")]
3850    pub deletion: f64,
3851    /// OCR-style errors (e.g., '0' -> 'O')
3852    #[serde(default = "default_ocr_weight")]
3853    pub ocr_errors: f64,
3854    /// Homophone substitution (e.g., 'their' -> 'there')
3855    #[serde(default = "default_homophone_weight")]
3856    pub homophones: f64,
3857}
3858
3859fn default_substitution_weight() -> f64 {
3860    0.35
3861}
3862fn default_transposition_weight() -> f64 {
3863    0.25
3864}
3865fn default_insertion_weight() -> f64 {
3866    0.10
3867}
3868fn default_deletion_weight() -> f64 {
3869    0.15
3870}
3871fn default_ocr_weight() -> f64 {
3872    0.10
3873}
3874fn default_homophone_weight() -> f64 {
3875    0.05
3876}
3877
3878impl Default for TypoTypeWeights {
3879    fn default() -> Self {
3880        Self {
3881            substitution: default_substitution_weight(),
3882            transposition: default_transposition_weight(),
3883            insertion: default_insertion_weight(),
3884            deletion: default_deletion_weight(),
3885            ocr_errors: default_ocr_weight(),
3886            homophones: default_homophone_weight(),
3887        }
3888    }
3889}
3890
3891/// Format variation configuration.
3892#[derive(Debug, Clone, Serialize, Deserialize, Default)]
3893pub struct FormatVariationSchemaConfig {
3894    /// Enable format variations
3895    #[serde(default)]
3896    pub enabled: bool,
3897    /// Date format variation settings
3898    #[serde(default)]
3899    pub dates: DateFormatVariationConfig,
3900    /// Amount format variation settings
3901    #[serde(default)]
3902    pub amounts: AmountFormatVariationConfig,
3903    /// Identifier format variation settings
3904    #[serde(default)]
3905    pub identifiers: IdentifierFormatVariationConfig,
3906}
3907
3908/// Date format variation configuration.
3909#[derive(Debug, Clone, Serialize, Deserialize)]
3910pub struct DateFormatVariationConfig {
3911    /// Enable date format variations
3912    #[serde(default)]
3913    pub enabled: bool,
3914    /// Overall variation rate
3915    #[serde(default = "default_date_variation_rate")]
3916    pub rate: f64,
3917    /// Include ISO format (2024-01-15)
3918    #[serde(default = "default_true")]
3919    pub iso_format: bool,
3920    /// Include US format (01/15/2024)
3921    #[serde(default)]
3922    pub us_format: bool,
3923    /// Include EU format (15.01.2024)
3924    #[serde(default)]
3925    pub eu_format: bool,
3926    /// Include long format (January 15, 2024)
3927    #[serde(default)]
3928    pub long_format: bool,
3929}
3930
3931fn default_date_variation_rate() -> f64 {
3932    0.05
3933}
3934
3935impl Default for DateFormatVariationConfig {
3936    fn default() -> Self {
3937        Self {
3938            enabled: false,
3939            rate: default_date_variation_rate(),
3940            iso_format: true,
3941            us_format: false,
3942            eu_format: false,
3943            long_format: false,
3944        }
3945    }
3946}
3947
3948/// Amount format variation configuration.
3949#[derive(Debug, Clone, Serialize, Deserialize)]
3950pub struct AmountFormatVariationConfig {
3951    /// Enable amount format variations
3952    #[serde(default)]
3953    pub enabled: bool,
3954    /// Overall variation rate
3955    #[serde(default = "default_amount_variation_rate")]
3956    pub rate: f64,
3957    /// Include US comma format (1,234.56)
3958    #[serde(default)]
3959    pub us_comma_format: bool,
3960    /// Include EU format (1.234,56)
3961    #[serde(default)]
3962    pub eu_format: bool,
3963    /// Include currency prefix ($1,234.56)
3964    #[serde(default)]
3965    pub currency_prefix: bool,
3966    /// Include accounting format with parentheses for negatives
3967    #[serde(default)]
3968    pub accounting_format: bool,
3969}
3970
3971fn default_amount_variation_rate() -> f64 {
3972    0.02
3973}
3974
3975impl Default for AmountFormatVariationConfig {
3976    fn default() -> Self {
3977        Self {
3978            enabled: false,
3979            rate: default_amount_variation_rate(),
3980            us_comma_format: false,
3981            eu_format: false,
3982            currency_prefix: false,
3983            accounting_format: false,
3984        }
3985    }
3986}
3987
3988/// Identifier format variation configuration.
3989#[derive(Debug, Clone, Serialize, Deserialize)]
3990pub struct IdentifierFormatVariationConfig {
3991    /// Enable identifier format variations
3992    #[serde(default)]
3993    pub enabled: bool,
3994    /// Overall variation rate
3995    #[serde(default = "default_identifier_variation_rate")]
3996    pub rate: f64,
3997    /// Case variations (uppercase, lowercase, mixed)
3998    #[serde(default)]
3999    pub case_variations: bool,
4000    /// Padding variations (leading zeros)
4001    #[serde(default)]
4002    pub padding_variations: bool,
4003    /// Separator variations (dash vs underscore)
4004    #[serde(default)]
4005    pub separator_variations: bool,
4006}
4007
4008fn default_identifier_variation_rate() -> f64 {
4009    0.02
4010}
4011
4012impl Default for IdentifierFormatVariationConfig {
4013    fn default() -> Self {
4014        Self {
4015            enabled: false,
4016            rate: default_identifier_variation_rate(),
4017            case_variations: false,
4018            padding_variations: false,
4019            separator_variations: false,
4020        }
4021    }
4022}
4023
4024/// Duplicate injection configuration.
4025#[derive(Debug, Clone, Serialize, Deserialize)]
4026pub struct DuplicateSchemaConfig {
4027    /// Enable duplicate injection
4028    #[serde(default)]
4029    pub enabled: bool,
4030    /// Overall duplicate rate
4031    #[serde(default = "default_duplicate_rate")]
4032    pub rate: f64,
4033    /// Exact duplicate proportion (out of duplicates)
4034    #[serde(default = "default_exact_duplicate_ratio")]
4035    pub exact_duplicate_ratio: f64,
4036    /// Near duplicate proportion (slight variations)
4037    #[serde(default = "default_near_duplicate_ratio")]
4038    pub near_duplicate_ratio: f64,
4039    /// Fuzzy duplicate proportion (typos in key fields)
4040    #[serde(default = "default_fuzzy_duplicate_ratio")]
4041    pub fuzzy_duplicate_ratio: f64,
4042    /// Maximum date offset for near/fuzzy duplicates (days)
4043    #[serde(default = "default_max_date_offset")]
4044    pub max_date_offset_days: u32,
4045    /// Maximum amount variance for near duplicates (fraction)
4046    #[serde(default = "default_max_amount_variance")]
4047    pub max_amount_variance: f64,
4048}
4049
4050fn default_duplicate_rate() -> f64 {
4051    0.005
4052}
4053fn default_exact_duplicate_ratio() -> f64 {
4054    0.4
4055}
4056fn default_near_duplicate_ratio() -> f64 {
4057    0.35
4058}
4059fn default_fuzzy_duplicate_ratio() -> f64 {
4060    0.25
4061}
4062fn default_max_date_offset() -> u32 {
4063    3
4064}
4065fn default_max_amount_variance() -> f64 {
4066    0.01
4067}
4068
4069impl Default for DuplicateSchemaConfig {
4070    fn default() -> Self {
4071        Self {
4072            enabled: false,
4073            rate: default_duplicate_rate(),
4074            exact_duplicate_ratio: default_exact_duplicate_ratio(),
4075            near_duplicate_ratio: default_near_duplicate_ratio(),
4076            fuzzy_duplicate_ratio: default_fuzzy_duplicate_ratio(),
4077            max_date_offset_days: default_max_date_offset(),
4078            max_amount_variance: default_max_amount_variance(),
4079        }
4080    }
4081}
4082
4083/// Encoding issue configuration.
4084#[derive(Debug, Clone, Serialize, Deserialize)]
4085pub struct EncodingIssueSchemaConfig {
4086    /// Enable encoding issue injection
4087    #[serde(default)]
4088    pub enabled: bool,
4089    /// Overall encoding issue rate
4090    #[serde(default = "default_encoding_rate")]
4091    pub rate: f64,
4092    /// Include mojibake (UTF-8/Latin-1 confusion)
4093    #[serde(default)]
4094    pub mojibake: bool,
4095    /// Include HTML entity corruption
4096    #[serde(default)]
4097    pub html_entities: bool,
4098    /// Include BOM issues
4099    #[serde(default)]
4100    pub bom_issues: bool,
4101}
4102
4103fn default_encoding_rate() -> f64 {
4104    0.001
4105}
4106
4107impl Default for EncodingIssueSchemaConfig {
4108    fn default() -> Self {
4109        Self {
4110            enabled: false,
4111            rate: default_encoding_rate(),
4112            mojibake: false,
4113            html_entities: false,
4114            bom_issues: false,
4115        }
4116    }
4117}
4118
4119/// Per-sink quality profiles for different output formats.
4120#[derive(Debug, Clone, Serialize, Deserialize, Default)]
4121pub struct SinkQualityProfiles {
4122    /// CSV-specific quality settings
4123    #[serde(default)]
4124    pub csv: Option<SinkQualityOverride>,
4125    /// JSON-specific quality settings
4126    #[serde(default)]
4127    pub json: Option<SinkQualityOverride>,
4128    /// Parquet-specific quality settings
4129    #[serde(default)]
4130    pub parquet: Option<SinkQualityOverride>,
4131}
4132
4133/// Quality setting overrides for a specific sink type.
4134#[derive(Debug, Clone, Serialize, Deserialize)]
4135pub struct SinkQualityOverride {
4136    /// Override enabled state
4137    pub enabled: Option<bool>,
4138    /// Override missing value rate
4139    pub missing_rate: Option<f64>,
4140    /// Override typo rate
4141    pub typo_rate: Option<f64>,
4142    /// Override format variation rate
4143    pub format_variation_rate: Option<f64>,
4144    /// Override duplicate rate
4145    pub duplicate_rate: Option<f64>,
4146}
4147
4148#[cfg(test)]
4149mod tests {
4150    use super::*;
4151    use crate::presets::demo_preset;
4152
4153    // ==========================================================================
4154    // Serialization/Deserialization Tests
4155    // ==========================================================================
4156
4157    #[test]
4158    fn test_config_yaml_roundtrip() {
4159        let config = demo_preset();
4160        let yaml = serde_yaml::to_string(&config).expect("Failed to serialize to YAML");
4161        let deserialized: GeneratorConfig =
4162            serde_yaml::from_str(&yaml).expect("Failed to deserialize from YAML");
4163
4164        assert_eq!(
4165            config.global.period_months,
4166            deserialized.global.period_months
4167        );
4168        assert_eq!(config.global.industry, deserialized.global.industry);
4169        assert_eq!(config.companies.len(), deserialized.companies.len());
4170        assert_eq!(config.companies[0].code, deserialized.companies[0].code);
4171    }
4172
4173    #[test]
4174    fn test_config_json_roundtrip() {
4175        // Create a config without infinity values (JSON can't serialize f64::INFINITY)
4176        let mut config = demo_preset();
4177        // Replace infinity with a large but finite value for JSON compatibility
4178        config.master_data.employees.approval_limits.executive = 1e12;
4179
4180        let json = serde_json::to_string(&config).expect("Failed to serialize to JSON");
4181        let deserialized: GeneratorConfig =
4182            serde_json::from_str(&json).expect("Failed to deserialize from JSON");
4183
4184        assert_eq!(
4185            config.global.period_months,
4186            deserialized.global.period_months
4187        );
4188        assert_eq!(config.global.industry, deserialized.global.industry);
4189        assert_eq!(config.companies.len(), deserialized.companies.len());
4190    }
4191
4192    #[test]
4193    fn test_transaction_volume_serialization() {
4194        // Test various transaction volumes serialize correctly
4195        let volumes = vec![
4196            (TransactionVolume::TenK, "ten_k"),
4197            (TransactionVolume::HundredK, "hundred_k"),
4198            (TransactionVolume::OneM, "one_m"),
4199            (TransactionVolume::TenM, "ten_m"),
4200            (TransactionVolume::HundredM, "hundred_m"),
4201        ];
4202
4203        for (volume, expected_key) in volumes {
4204            let json = serde_json::to_string(&volume).expect("Failed to serialize");
4205            assert!(
4206                json.contains(expected_key),
4207                "Expected {} in JSON: {}",
4208                expected_key,
4209                json
4210            );
4211        }
4212    }
4213
4214    #[test]
4215    fn test_transaction_volume_custom_serialization() {
4216        let volume = TransactionVolume::Custom(12345);
4217        let json = serde_json::to_string(&volume).expect("Failed to serialize");
4218        let deserialized: TransactionVolume =
4219            serde_json::from_str(&json).expect("Failed to deserialize");
4220        assert_eq!(deserialized.count(), 12345);
4221    }
4222
4223    #[test]
4224    fn test_output_mode_serialization() {
4225        let modes = vec![
4226            OutputMode::Streaming,
4227            OutputMode::FlatFile,
4228            OutputMode::Both,
4229        ];
4230
4231        for mode in modes {
4232            let json = serde_json::to_string(&mode).expect("Failed to serialize");
4233            let deserialized: OutputMode =
4234                serde_json::from_str(&json).expect("Failed to deserialize");
4235            assert!(format!("{:?}", mode) == format!("{:?}", deserialized));
4236        }
4237    }
4238
4239    #[test]
4240    fn test_file_format_serialization() {
4241        let formats = vec![
4242            FileFormat::Csv,
4243            FileFormat::Parquet,
4244            FileFormat::Json,
4245            FileFormat::JsonLines,
4246        ];
4247
4248        for format in formats {
4249            let json = serde_json::to_string(&format).expect("Failed to serialize");
4250            let deserialized: FileFormat =
4251                serde_json::from_str(&json).expect("Failed to deserialize");
4252            assert!(format!("{:?}", format) == format!("{:?}", deserialized));
4253        }
4254    }
4255
4256    #[test]
4257    fn test_compression_algorithm_serialization() {
4258        let algos = vec![
4259            CompressionAlgorithm::Gzip,
4260            CompressionAlgorithm::Zstd,
4261            CompressionAlgorithm::Lz4,
4262            CompressionAlgorithm::Snappy,
4263        ];
4264
4265        for algo in algos {
4266            let json = serde_json::to_string(&algo).expect("Failed to serialize");
4267            let deserialized: CompressionAlgorithm =
4268                serde_json::from_str(&json).expect("Failed to deserialize");
4269            assert!(format!("{:?}", algo) == format!("{:?}", deserialized));
4270        }
4271    }
4272
4273    #[test]
4274    fn test_transfer_pricing_method_serialization() {
4275        let methods = vec![
4276            TransferPricingMethod::CostPlus,
4277            TransferPricingMethod::ComparableUncontrolled,
4278            TransferPricingMethod::ResalePrice,
4279            TransferPricingMethod::TransactionalNetMargin,
4280            TransferPricingMethod::ProfitSplit,
4281        ];
4282
4283        for method in methods {
4284            let json = serde_json::to_string(&method).expect("Failed to serialize");
4285            let deserialized: TransferPricingMethod =
4286                serde_json::from_str(&json).expect("Failed to deserialize");
4287            assert!(format!("{:?}", method) == format!("{:?}", deserialized));
4288        }
4289    }
4290
4291    #[test]
4292    fn test_benford_exemption_serialization() {
4293        let exemptions = vec![
4294            BenfordExemption::Recurring,
4295            BenfordExemption::Payroll,
4296            BenfordExemption::FixedFees,
4297            BenfordExemption::RoundAmounts,
4298        ];
4299
4300        for exemption in exemptions {
4301            let json = serde_json::to_string(&exemption).expect("Failed to serialize");
4302            let deserialized: BenfordExemption =
4303                serde_json::from_str(&json).expect("Failed to deserialize");
4304            assert!(format!("{:?}", exemption) == format!("{:?}", deserialized));
4305        }
4306    }
4307
4308    // ==========================================================================
4309    // Default Value Tests
4310    // ==========================================================================
4311
4312    #[test]
4313    fn test_global_config_defaults() {
4314        let yaml = r#"
4315            industry: manufacturing
4316            start_date: "2024-01-01"
4317            period_months: 6
4318        "#;
4319        let config: GlobalConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4320        assert_eq!(config.group_currency, "USD");
4321        assert!(config.parallel);
4322        assert_eq!(config.worker_threads, 0);
4323        assert_eq!(config.memory_limit_mb, 0);
4324    }
4325
4326    #[test]
4327    fn test_fraud_config_defaults() {
4328        let config = FraudConfig::default();
4329        assert!(!config.enabled);
4330        assert_eq!(config.fraud_rate, 0.005);
4331        assert!(!config.clustering_enabled);
4332    }
4333
4334    #[test]
4335    fn test_internal_controls_config_defaults() {
4336        let config = InternalControlsConfig::default();
4337        assert!(!config.enabled);
4338        assert_eq!(config.exception_rate, 0.02);
4339        assert_eq!(config.sod_violation_rate, 0.01);
4340        assert!(config.export_control_master_data);
4341        assert_eq!(config.sox_materiality_threshold, 10000.0);
4342    }
4343
4344    #[test]
4345    fn test_output_config_defaults() {
4346        let config = OutputConfig::default();
4347        assert!(matches!(config.mode, OutputMode::FlatFile));
4348        assert_eq!(config.formats, vec![FileFormat::Parquet]);
4349        assert!(config.compression.enabled);
4350        assert!(matches!(
4351            config.compression.algorithm,
4352            CompressionAlgorithm::Zstd
4353        ));
4354        assert!(config.include_acdoca);
4355        assert!(!config.include_bseg);
4356        assert!(config.partition_by_period);
4357        assert!(!config.partition_by_company);
4358    }
4359
4360    #[test]
4361    fn test_approval_config_defaults() {
4362        let config = ApprovalConfig::default();
4363        assert!(!config.enabled);
4364        assert_eq!(config.auto_approve_threshold, 1000.0);
4365        assert_eq!(config.rejection_rate, 0.02);
4366        assert_eq!(config.revision_rate, 0.05);
4367        assert_eq!(config.average_approval_delay_hours, 4.0);
4368        assert_eq!(config.thresholds.len(), 4);
4369    }
4370
4371    #[test]
4372    fn test_p2p_flow_config_defaults() {
4373        let config = P2PFlowConfig::default();
4374        assert!(config.enabled);
4375        assert_eq!(config.three_way_match_rate, 0.95);
4376        assert_eq!(config.partial_delivery_rate, 0.15);
4377        assert_eq!(config.average_po_to_gr_days, 14);
4378    }
4379
4380    #[test]
4381    fn test_o2c_flow_config_defaults() {
4382        let config = O2CFlowConfig::default();
4383        assert!(config.enabled);
4384        assert_eq!(config.credit_check_failure_rate, 0.02);
4385        assert_eq!(config.return_rate, 0.03);
4386        assert_eq!(config.bad_debt_rate, 0.01);
4387    }
4388
4389    #[test]
4390    fn test_balance_config_defaults() {
4391        let config = BalanceConfig::default();
4392        assert!(!config.generate_opening_balances);
4393        assert!(config.generate_trial_balances);
4394        assert_eq!(config.target_gross_margin, 0.35);
4395        assert!(config.validate_balance_equation);
4396        assert!(config.reconcile_subledgers);
4397    }
4398
4399    // ==========================================================================
4400    // Partial Config Deserialization Tests
4401    // ==========================================================================
4402
4403    #[test]
4404    fn test_partial_config_with_defaults() {
4405        // Minimal config that should use all defaults
4406        let yaml = r#"
4407            global:
4408              industry: manufacturing
4409              start_date: "2024-01-01"
4410              period_months: 3
4411            companies:
4412              - code: "TEST"
4413                name: "Test Company"
4414                currency: "USD"
4415                country: "US"
4416                annual_transaction_volume: ten_k
4417            chart_of_accounts:
4418              complexity: small
4419            output:
4420              output_directory: "./output"
4421        "#;
4422
4423        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4424        assert_eq!(config.global.period_months, 3);
4425        assert_eq!(config.companies.len(), 1);
4426        assert!(!config.fraud.enabled); // Default
4427        assert!(!config.internal_controls.enabled); // Default
4428    }
4429
4430    #[test]
4431    fn test_config_with_fraud_enabled() {
4432        let yaml = r#"
4433            global:
4434              industry: retail
4435              start_date: "2024-01-01"
4436              period_months: 12
4437            companies:
4438              - code: "RETAIL"
4439                name: "Retail Co"
4440                currency: "USD"
4441                country: "US"
4442                annual_transaction_volume: hundred_k
4443            chart_of_accounts:
4444              complexity: medium
4445            output:
4446              output_directory: "./output"
4447            fraud:
4448              enabled: true
4449              fraud_rate: 0.05
4450              clustering_enabled: true
4451        "#;
4452
4453        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4454        assert!(config.fraud.enabled);
4455        assert_eq!(config.fraud.fraud_rate, 0.05);
4456        assert!(config.fraud.clustering_enabled);
4457    }
4458
4459    #[test]
4460    fn test_config_with_multiple_companies() {
4461        let yaml = r#"
4462            global:
4463              industry: manufacturing
4464              start_date: "2024-01-01"
4465              period_months: 6
4466            companies:
4467              - code: "HQ"
4468                name: "Headquarters"
4469                currency: "USD"
4470                country: "US"
4471                annual_transaction_volume: hundred_k
4472                volume_weight: 1.0
4473              - code: "EU"
4474                name: "European Subsidiary"
4475                currency: "EUR"
4476                country: "DE"
4477                annual_transaction_volume: hundred_k
4478                volume_weight: 0.5
4479              - code: "APAC"
4480                name: "Asia Pacific"
4481                currency: "JPY"
4482                country: "JP"
4483                annual_transaction_volume: ten_k
4484                volume_weight: 0.3
4485            chart_of_accounts:
4486              complexity: large
4487            output:
4488              output_directory: "./output"
4489        "#;
4490
4491        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4492        assert_eq!(config.companies.len(), 3);
4493        assert_eq!(config.companies[0].code, "HQ");
4494        assert_eq!(config.companies[1].currency, "EUR");
4495        assert_eq!(config.companies[2].volume_weight, 0.3);
4496    }
4497
4498    #[test]
4499    fn test_intercompany_config() {
4500        let yaml = r#"
4501            enabled: true
4502            ic_transaction_rate: 0.20
4503            transfer_pricing_method: cost_plus
4504            markup_percent: 0.08
4505            generate_matched_pairs: true
4506            generate_eliminations: true
4507        "#;
4508
4509        let config: IntercompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4510        assert!(config.enabled);
4511        assert_eq!(config.ic_transaction_rate, 0.20);
4512        assert!(matches!(
4513            config.transfer_pricing_method,
4514            TransferPricingMethod::CostPlus
4515        ));
4516        assert_eq!(config.markup_percent, 0.08);
4517        assert!(config.generate_eliminations);
4518    }
4519
4520    // ==========================================================================
4521    // Company Config Tests
4522    // ==========================================================================
4523
4524    #[test]
4525    fn test_company_config_defaults() {
4526        let yaml = r#"
4527            code: "TEST"
4528            name: "Test Company"
4529            currency: "USD"
4530            country: "US"
4531            annual_transaction_volume: ten_k
4532        "#;
4533
4534        let config: CompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4535        assert_eq!(config.fiscal_year_variant, "K4"); // Default
4536        assert_eq!(config.volume_weight, 1.0); // Default
4537    }
4538
4539    // ==========================================================================
4540    // Chart of Accounts Config Tests
4541    // ==========================================================================
4542
4543    #[test]
4544    fn test_coa_config_defaults() {
4545        let yaml = r#"
4546            complexity: medium
4547        "#;
4548
4549        let config: ChartOfAccountsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
4550        assert!(config.industry_specific); // Default true
4551        assert!(config.custom_accounts.is_none());
4552        assert_eq!(config.min_hierarchy_depth, 2); // Default
4553        assert_eq!(config.max_hierarchy_depth, 5); // Default
4554    }
4555}