Skip to main content

datasynth_config/
schema.rs

1//! Configuration schema for synthetic data generation.
2
3use datasynth_core::distributions::{
4    AmountDistributionConfig, DebitCreditDistributionConfig, EvenOddDistributionConfig,
5    LineItemDistributionConfig, SeasonalityConfig,
6};
7use datasynth_core::models::{CoAComplexity, IndustrySector};
8use serde::{Deserialize, Serialize};
9use std::path::PathBuf;
10
11/// Root configuration for the synthetic data generator.
12///
13/// # camelCase alias policy
14///
15/// Every multi-word field carries `#[serde(alias = "camelCaseName")]`
16/// so SDK clients that follow JSON conventions can submit configs
17/// without round-tripping through a snake_case transformer.
18///
19/// Before v4.4.1 several fields — `documentFlows`, `accountingStandards`,
20/// `complianceRegulations`, `analyticsMetadata` — had no alias, so SDK
21/// submissions silently fell through to defaults. The symptom was
22/// "enabling the 6 feature subsections together collapses the archive
23/// from 99 files to 19". Root cause: those four fields never parsed;
24/// the orchestrator produced far less data than requested, and
25/// `output.exportFormat` similarly fell through so journal_entries
26/// landed as the default Parquet/CSV rather than JSON.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct GeneratorConfig {
29    /// Global settings
30    pub global: GlobalConfig,
31    /// Company configuration
32    pub companies: Vec<CompanyConfig>,
33    /// Chart of Accounts configuration
34    #[serde(alias = "chartOfAccounts")]
35    pub chart_of_accounts: ChartOfAccountsConfig,
36    /// Transaction generation settings
37    #[serde(default)]
38    pub transactions: TransactionConfig,
39    /// Output configuration
40    pub output: OutputConfig,
41    /// Fraud simulation settings
42    #[serde(default)]
43    pub fraud: FraudConfig,
44    /// Data quality variation settings
45    #[serde(default, alias = "dataQuality")]
46    pub data_quality: DataQualitySchemaConfig,
47    /// Internal Controls System settings
48    #[serde(default, alias = "internalControls")]
49    pub internal_controls: InternalControlsConfig,
50    /// Business process mix
51    #[serde(default, alias = "businessProcesses")]
52    pub business_processes: BusinessProcessConfig,
53    /// User persona distribution
54    #[serde(default, alias = "userPersonas")]
55    pub user_personas: UserPersonaConfig,
56    /// Template configuration for realistic data
57    #[serde(default)]
58    pub templates: TemplateConfig,
59    /// Approval workflow configuration
60    #[serde(default)]
61    pub approval: ApprovalConfig,
62    /// Department structure configuration
63    #[serde(default)]
64    pub departments: DepartmentConfig,
65    /// Master data generation settings
66    #[serde(default, alias = "masterData")]
67    pub master_data: MasterDataConfig,
68    /// Document flow generation settings
69    #[serde(default, alias = "documentFlows")]
70    pub document_flows: DocumentFlowConfig,
71    /// Intercompany transaction settings
72    #[serde(default)]
73    pub intercompany: IntercompanyConfig,
74    /// Balance and trial balance settings
75    #[serde(default)]
76    pub balance: BalanceConfig,
77    /// OCPM (Object-Centric Process Mining) settings
78    #[serde(default)]
79    pub ocpm: OcpmConfig,
80    /// Audit engagement and workpaper generation settings
81    #[serde(default)]
82    pub audit: AuditGenerationConfig,
83    /// Banking KYC/AML transaction generation settings
84    #[serde(default)]
85    pub banking: datasynth_banking::BankingConfig,
86    /// Scenario configuration for metadata and tagging (Phase 1.3)
87    #[serde(default)]
88    pub scenario: ScenarioConfig,
89    /// Temporal drift configuration for simulating distribution changes over time (Phase 2.2)
90    #[serde(default)]
91    pub temporal: TemporalDriftConfig,
92    /// Graph export configuration for accounting network export
93    #[serde(default, alias = "graphExport")]
94    pub graph_export: GraphExportConfig,
95    /// Streaming output API configuration
96    #[serde(default)]
97    pub streaming: StreamingSchemaConfig,
98    /// Rate limiting configuration
99    #[serde(default, alias = "rateLimit")]
100    pub rate_limit: RateLimitSchemaConfig,
101    /// Temporal attribute generation configuration
102    #[serde(default, alias = "temporalAttributes")]
103    pub temporal_attributes: TemporalAttributeSchemaConfig,
104    /// Relationship generation configuration
105    #[serde(default)]
106    pub relationships: RelationshipSchemaConfig,
107    /// Accounting standards framework configuration (IFRS, US GAAP)
108    #[serde(default, alias = "accountingStandards")]
109    pub accounting_standards: AccountingStandardsConfig,
110    /// Audit standards framework configuration (ISA, PCAOB)
111    #[serde(default, alias = "auditStandards")]
112    pub audit_standards: AuditStandardsConfig,
113    /// Advanced distribution configuration (mixture models, correlations, regime changes)
114    #[serde(default)]
115    pub distributions: AdvancedDistributionConfig,
116    /// Temporal patterns configuration (business days, period-end dynamics, processing lags)
117    #[serde(default, alias = "temporalPatterns")]
118    pub temporal_patterns: TemporalPatternsConfig,
119    /// Vendor network configuration (multi-tier supply chain modeling)
120    #[serde(default, alias = "vendorNetwork")]
121    pub vendor_network: VendorNetworkSchemaConfig,
122    /// Customer segmentation configuration (value segments, lifecycle stages)
123    #[serde(default, alias = "customerSegmentation")]
124    pub customer_segmentation: CustomerSegmentationSchemaConfig,
125    /// Relationship strength calculation configuration
126    #[serde(default, alias = "relationshipStrength")]
127    pub relationship_strength: RelationshipStrengthSchemaConfig,
128    /// Cross-process link configuration (P2P ↔ O2C via inventory)
129    #[serde(default, alias = "crossProcessLinks")]
130    pub cross_process_links: CrossProcessLinksSchemaConfig,
131    /// Organizational events configuration (acquisitions, divestitures, etc.)
132    #[serde(default, alias = "organizationalEvents")]
133    pub organizational_events: OrganizationalEventsSchemaConfig,
134    /// Behavioral drift configuration (vendor, customer, employee behavior)
135    #[serde(default, alias = "behavioralDrift")]
136    pub behavioral_drift: BehavioralDriftSchemaConfig,
137    /// Market drift configuration (economic cycles, commodities, price shocks)
138    #[serde(default, alias = "marketDrift")]
139    pub market_drift: MarketDriftSchemaConfig,
140    /// Drift labeling configuration for ground truth generation
141    #[serde(default, alias = "driftLabeling")]
142    pub drift_labeling: DriftLabelingSchemaConfig,
143    /// Enhanced anomaly injection configuration (multi-stage schemes, correlated injection, near-miss)
144    #[serde(default, alias = "anomalyInjection")]
145    pub anomaly_injection: EnhancedAnomalyConfig,
146    /// Industry-specific transaction and anomaly generation configuration
147    #[serde(default, alias = "industrySpecific")]
148    pub industry_specific: IndustrySpecificConfig,
149    /// Fingerprint privacy configuration for extraction/synthesis
150    #[serde(default, alias = "fingerprintPrivacy")]
151    pub fingerprint_privacy: FingerprintPrivacyConfig,
152    /// Quality gate configuration for pass/fail thresholds
153    #[serde(default, alias = "qualityGates")]
154    pub quality_gates: QualityGatesSchemaConfig,
155    /// Compliance configuration (EU AI Act, content marking)
156    #[serde(default)]
157    pub compliance: ComplianceSchemaConfig,
158    /// Webhook notification configuration
159    #[serde(default)]
160    pub webhooks: WebhookSchemaConfig,
161    /// LLM enrichment configuration (AI-augmented vendor names, descriptions, explanations)
162    #[serde(default)]
163    pub llm: LlmSchemaConfig,
164    /// Diffusion model configuration (statistical diffusion-based data enhancement)
165    #[serde(default)]
166    pub diffusion: DiffusionSchemaConfig,
167    /// Causal generation configuration (structural causal models, interventions)
168    #[serde(default)]
169    pub causal: CausalSchemaConfig,
170
171    // ===== Enterprise Process Chain Extensions =====
172    /// Source-to-Pay (S2C/S2P) configuration (sourcing, contracts, catalogs, scorecards)
173    #[serde(default, alias = "sourceToPay")]
174    pub source_to_pay: SourceToPayConfig,
175    /// Financial reporting configuration (financial statements, KPIs, budgets)
176    #[serde(default, alias = "financialReporting")]
177    pub financial_reporting: FinancialReportingConfig,
178    /// HR process configuration (payroll, time & attendance, expenses)
179    #[serde(default)]
180    pub hr: HrConfig,
181    /// Manufacturing configuration (production orders, WIP, routing)
182    #[serde(default)]
183    pub manufacturing: ManufacturingProcessConfig,
184    /// Sales quote configuration (quote-to-order pipeline)
185    #[serde(default, alias = "salesQuotes")]
186    pub sales_quotes: SalesQuoteConfig,
187    /// Tax accounting configuration (VAT/GST, sales tax, withholding, provisions, payroll tax)
188    #[serde(default)]
189    pub tax: TaxConfig,
190    /// Treasury and cash management configuration
191    #[serde(default)]
192    pub treasury: TreasuryConfig,
193    /// Project accounting configuration
194    #[serde(default, alias = "projectAccounting")]
195    pub project_accounting: ProjectAccountingConfig,
196    /// ESG / Sustainability reporting configuration
197    #[serde(default)]
198    pub esg: EsgConfig,
199    /// Country pack configuration (external packs directory, per-country overrides)
200    #[serde(default, alias = "countryPacks")]
201    pub country_packs: Option<CountryPacksSchemaConfig>,
202    /// Counterfactual simulation scenario configuration
203    #[serde(default)]
204    pub scenarios: ScenariosConfig,
205    /// Generation session configuration (period-by-period generation with balance carry-forward)
206    #[serde(default)]
207    pub session: SessionSchemaConfig,
208    /// Compliance regulations framework configuration (standards registry, jurisdictions, temporal versioning, audit templates, graph integration)
209    #[serde(default, alias = "complianceRegulations")]
210    pub compliance_regulations: ComplianceRegulationsConfig,
211    /// v3.3.0: analytics metadata phase — prior-year comparatives,
212    /// industry benchmarks, management reports, drift events. Off by
213    /// default so v3.2.1 archives are byte-identical.
214    #[serde(default, alias = "analyticsMetadata")]
215    pub analytics_metadata: AnalyticsMetadataConfig,
216}
217
218/// v3.3.0: analytics-metadata phase configuration.
219///
220/// Gates the `phase_analytics_metadata` pass that runs AFTER all
221/// JE-adding phases (including the fraud-bias sweep at Phase 20b).
222/// When enabled, the orchestrator calls `PriorYearGenerator`,
223/// `IndustryBenchmarkGenerator`, `ManagementReportGenerator`, and
224/// `DriftEventGenerator` in sequence; each sub-flag below controls
225/// whether that specific generator fires.
226#[derive(Debug, Clone, Serialize, Deserialize)]
227pub struct AnalyticsMetadataConfig {
228    /// Master switch for the whole analytics phase.
229    #[serde(default)]
230    pub enabled: bool,
231    /// Emit `PriorYearComparative` records derived from current
232    /// period's account balances.
233    #[serde(default = "default_true")]
234    pub prior_year: bool,
235    /// Emit `IndustryBenchmark` records for the configured industry.
236    #[serde(default = "default_true")]
237    pub industry_benchmark: bool,
238    /// Emit management-report artefacts.
239    #[serde(default = "default_true")]
240    pub management_reports: bool,
241    /// Emit `LabeledDriftEvent` records — post-generation sweep over
242    /// journal entries to label detected drift patterns.
243    #[serde(default = "default_true")]
244    pub drift_events: bool,
245}
246
247impl Default for AnalyticsMetadataConfig {
248    fn default() -> Self {
249        Self {
250            enabled: false,
251            prior_year: true,
252            industry_benchmark: true,
253            management_reports: true,
254            drift_events: true,
255        }
256    }
257}
258
259/// LLM enrichment configuration.
260///
261/// Controls AI-augmented metadata enrichment using LLM providers.
262/// When enabled, vendor names, transaction descriptions, and anomaly explanations
263/// are enriched using the configured provider (mock by default).
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct LlmSchemaConfig {
266    /// Whether LLM enrichment is enabled.
267    #[serde(default)]
268    pub enabled: bool,
269    /// Provider type: "mock", "openai", "anthropic", "custom".
270    #[serde(default = "default_llm_provider")]
271    pub provider: String,
272    /// Model name/ID for the provider.
273    #[serde(default = "default_llm_model_name")]
274    pub model: String,
275    /// Maximum number of vendor names to enrich per run.
276    #[serde(default = "default_llm_batch_size")]
277    pub max_vendor_enrichments: usize,
278
279    /// v4.1.1+: also enrich customer names at generate time.
280    /// Default `false` preserves v4.1.0 behaviour.
281    #[serde(default)]
282    pub enrich_customers: bool,
283
284    /// v4.1.1+: also enrich material descriptions at generate time.
285    /// Default `false`.
286    #[serde(default)]
287    pub enrich_materials: bool,
288
289    /// v4.1.1+: also enrich audit finding titles at generate time
290    /// (the finding narratives remain on their existing template path
291    /// because they're richer and locale-specific). Default `false`.
292    #[serde(default)]
293    pub enrich_findings: bool,
294
295    /// v4.1.1+: upper bound on customer enrichments per run. Matches
296    /// `max_vendor_enrichments` semantics.
297    #[serde(default = "default_llm_batch_size")]
298    pub max_customer_enrichments: usize,
299
300    /// v4.1.1+: upper bound on material enrichments per run.
301    #[serde(default = "default_llm_batch_size")]
302    pub max_material_enrichments: usize,
303
304    /// v4.1.1+: upper bound on finding enrichments per run.
305    #[serde(default = "default_llm_batch_size")]
306    pub max_finding_enrichments: usize,
307}
308
309fn default_llm_provider() -> String {
310    "mock".to_string()
311}
312
313fn default_llm_model_name() -> String {
314    "gpt-4o-mini".to_string()
315}
316
317fn default_llm_batch_size() -> usize {
318    50
319}
320
321impl Default for LlmSchemaConfig {
322    fn default() -> Self {
323        Self {
324            enabled: false,
325            provider: default_llm_provider(),
326            model: default_llm_model_name(),
327            max_vendor_enrichments: default_llm_batch_size(),
328            enrich_customers: false,
329            enrich_materials: false,
330            enrich_findings: false,
331            max_customer_enrichments: default_llm_batch_size(),
332            max_material_enrichments: default_llm_batch_size(),
333            max_finding_enrichments: default_llm_batch_size(),
334        }
335    }
336}
337
338/// Diffusion model configuration.
339///
340/// Controls statistical diffusion-based data enhancement that generates samples
341/// matching target distribution properties (means, standard deviations, correlations).
342#[derive(Debug, Clone, Serialize, Deserialize)]
343pub struct DiffusionSchemaConfig {
344    /// Whether diffusion enhancement is enabled.
345    #[serde(default)]
346    pub enabled: bool,
347    /// Number of diffusion steps (higher = better quality, slower).
348    #[serde(default = "default_diffusion_steps")]
349    pub n_steps: usize,
350    /// Noise schedule type: "linear", "cosine", "sigmoid".
351    #[serde(default = "default_diffusion_schedule")]
352    pub schedule: String,
353    /// Number of sample rows to generate for demonstration.
354    #[serde(default = "default_diffusion_sample_size")]
355    pub sample_size: usize,
356    /// Backend type: "statistical" (default), "neural", "hybrid".
357    #[serde(default = "default_diffusion_backend")]
358    pub backend: String,
359    /// Neural diffusion backend configuration (used when backend is "neural" or "hybrid").
360    #[serde(default)]
361    pub neural: NeuralDiffusionSchemaConfig,
362}
363
364fn default_diffusion_steps() -> usize {
365    100
366}
367
368fn default_diffusion_schedule() -> String {
369    "linear".to_string()
370}
371
372fn default_diffusion_sample_size() -> usize {
373    100
374}
375
376fn default_diffusion_backend() -> String {
377    "statistical".to_string()
378}
379
380impl Default for DiffusionSchemaConfig {
381    fn default() -> Self {
382        Self {
383            enabled: false,
384            n_steps: default_diffusion_steps(),
385            schedule: default_diffusion_schedule(),
386            sample_size: default_diffusion_sample_size(),
387            backend: default_diffusion_backend(),
388            neural: NeuralDiffusionSchemaConfig::default(),
389        }
390    }
391}
392
393/// Neural diffusion backend configuration.
394///
395/// Controls the `candle`-based neural score network that learns joint distributions
396/// from training data for the neural and hybrid diffusion backends.
397#[derive(Debug, Clone, Serialize, Deserialize)]
398pub struct NeuralDiffusionSchemaConfig {
399    /// Hidden layer dimensions for the score network MLP.
400    #[serde(default = "default_neural_hidden_dims")]
401    pub hidden_dims: Vec<usize>,
402    /// Dimensionality of the timestep embedding.
403    #[serde(default = "default_neural_timestep_embed_dim")]
404    pub timestep_embed_dim: usize,
405    /// Learning rate for training.
406    #[serde(default = "default_neural_learning_rate")]
407    pub learning_rate: f64,
408    /// Number of training epochs.
409    #[serde(default = "default_neural_training_epochs")]
410    pub training_epochs: usize,
411    /// Training batch size.
412    #[serde(default = "default_neural_batch_size")]
413    pub batch_size: usize,
414    /// Blend weight for hybrid mode (0.0 = all statistical, 1.0 = all neural).
415    #[serde(default = "default_neural_hybrid_weight")]
416    pub hybrid_weight: f64,
417    /// Hybrid blending strategy: "weighted_average", "column_select", "threshold".
418    #[serde(default = "default_neural_hybrid_strategy")]
419    pub hybrid_strategy: String,
420    /// Columns to apply neural generation to (empty = all numeric columns).
421    #[serde(default)]
422    pub neural_columns: Vec<String>,
423    /// v4.4.0+ Optional path to a pre-trained score-network checkpoint
424    /// (`.safetensors`). When set, the orchestrator loads the
425    /// checkpoint instead of training from the first batch — useful
426    /// for long-running production deployments where training cost
427    /// dominates per-run cost. When empty, the orchestrator trains
428    /// on the first generated JE amounts.
429    #[serde(default, skip_serializing_if = "Option::is_none")]
430    pub checkpoint_path: Option<String>,
431}
432
433fn default_neural_hidden_dims() -> Vec<usize> {
434    vec![256, 256, 128]
435}
436
437fn default_neural_timestep_embed_dim() -> usize {
438    64
439}
440
441fn default_neural_learning_rate() -> f64 {
442    0.001
443}
444
445fn default_neural_training_epochs() -> usize {
446    100
447}
448
449fn default_neural_batch_size() -> usize {
450    64
451}
452
453fn default_neural_hybrid_weight() -> f64 {
454    0.5
455}
456
457fn default_neural_hybrid_strategy() -> String {
458    "weighted_average".to_string()
459}
460
461impl Default for NeuralDiffusionSchemaConfig {
462    fn default() -> Self {
463        Self {
464            hidden_dims: default_neural_hidden_dims(),
465            timestep_embed_dim: default_neural_timestep_embed_dim(),
466            learning_rate: default_neural_learning_rate(),
467            training_epochs: default_neural_training_epochs(),
468            batch_size: default_neural_batch_size(),
469            hybrid_weight: default_neural_hybrid_weight(),
470            hybrid_strategy: default_neural_hybrid_strategy(),
471            neural_columns: Vec::new(),
472            checkpoint_path: None,
473        }
474    }
475}
476
477/// Causal generation configuration.
478///
479/// Controls structural causal model (SCM) based data generation that respects
480/// causal relationships between variables, supports do-calculus interventions,
481/// and enables counterfactual scenarios.
482#[derive(Debug, Clone, Serialize, Deserialize)]
483pub struct CausalSchemaConfig {
484    /// Whether causal generation is enabled.
485    #[serde(default)]
486    pub enabled: bool,
487    /// Built-in template to use: "fraud_detection", "revenue_cycle", or "custom".
488    #[serde(default = "default_causal_template")]
489    pub template: String,
490    /// Number of causal samples to generate.
491    #[serde(default = "default_causal_sample_size")]
492    pub sample_size: usize,
493    /// Whether to run causal validation on the output.
494    #[serde(default = "default_true")]
495    pub validate: bool,
496}
497
498fn default_causal_template() -> String {
499    "fraud_detection".to_string()
500}
501
502fn default_causal_sample_size() -> usize {
503    500
504}
505
506impl Default for CausalSchemaConfig {
507    fn default() -> Self {
508        Self {
509            enabled: false,
510            template: default_causal_template(),
511            sample_size: default_causal_sample_size(),
512            validate: true,
513        }
514    }
515}
516
517/// Graph export configuration for accounting network and ML training exports.
518///
519/// This section enables exporting generated data as graphs for:
520/// - Network reconstruction algorithms
521/// - Graph neural network training
522/// - Neo4j graph database import
523#[derive(Debug, Clone, Serialize, Deserialize)]
524pub struct GraphExportConfig {
525    /// Enable graph export.
526    #[serde(default)]
527    pub enabled: bool,
528
529    /// Graph types to generate.
530    #[serde(default = "default_graph_types")]
531    pub graph_types: Vec<GraphTypeConfig>,
532
533    /// Export formats to generate.
534    #[serde(default = "default_graph_formats")]
535    pub formats: Vec<GraphExportFormat>,
536
537    /// Train split ratio for ML datasets.
538    #[serde(default = "default_train_ratio")]
539    pub train_ratio: f64,
540
541    /// Validation split ratio for ML datasets.
542    #[serde(default = "default_val_ratio")]
543    pub validation_ratio: f64,
544
545    /// Random seed for train/val/test splits.
546    #[serde(default)]
547    pub split_seed: Option<u64>,
548
549    /// Output subdirectory for graph exports (relative to output directory).
550    #[serde(default = "default_graph_subdir")]
551    pub output_subdirectory: String,
552
553    /// Multi-layer hypergraph export settings for RustGraph integration.
554    #[serde(default)]
555    pub hypergraph: HypergraphExportSettings,
556
557    /// DGL-specific export settings.
558    #[serde(default)]
559    pub dgl: DglExportConfig,
560
561    /// `graphs/je_network.csv` flat edge-list export settings (v5.8.0+).
562    #[serde(default)]
563    pub je_network: JeNetworkConfig,
564}
565
566/// Method used to construct edges from journal entries when writing
567/// `graphs/je_network.csv` (v5.8.0+).
568///
569/// Reference: Ivertowski (2024), *Hardware Accelerated Method for
570/// Accounting Network Generation*, Methods A through E.
571#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
572#[serde(rename_all = "snake_case")]
573pub enum JeNetworkMethod {
574    /// Method B (full Cartesian product) for every JE — bijective on
575    /// 2-line entries (Method A) and `n × m` Cartesian for multi-line
576    /// entries with proportional amount allocation.  Default for
577    /// backward compatibility with v5.8.0 datasets that already
578    /// consumed the Cartesian-product output, but produces O(n × m)
579    /// edges per JE — a 50-debit / 50-credit period-close
580    /// consolidation alone yields 2 500 edges, and a typical
581    /// HF-scale 1 M-line config can blow up to 200 M+ edges.
582    #[default]
583    Cartesian,
584    /// Method A only — emit a single edge per 2-line journal entry
585    /// (1 debit + 1 credit) and skip multi-line entries entirely.
586    /// Edge count = number of 2-line JEs (≈ 60 % of entries per the
587    /// 2024 paper); per-edge confidence is exactly `1.0`.  Recommended
588    /// for published reference datasets where size and exactness
589    /// matter more than recall on multi-line consolidations.
590    A,
591}
592
593/// Configuration for the `graphs/je_network.csv` flat edge-list
594/// export (v5.8.0+).
595#[derive(Debug, Clone, Default, Serialize, Deserialize)]
596#[serde(deny_unknown_fields)]
597pub struct JeNetworkConfig {
598    /// Edge-construction method (see [`JeNetworkMethod`]).
599    #[serde(default)]
600    pub method: JeNetworkMethod,
601}
602
603fn default_graph_types() -> Vec<GraphTypeConfig> {
604    vec![GraphTypeConfig::default()]
605}
606
607fn default_graph_formats() -> Vec<GraphExportFormat> {
608    vec![GraphExportFormat::PytorchGeometric]
609}
610
611fn default_train_ratio() -> f64 {
612    0.7
613}
614
615fn default_val_ratio() -> f64 {
616    0.15
617}
618
619fn default_graph_subdir() -> String {
620    "graphs".to_string()
621}
622
623impl Default for GraphExportConfig {
624    fn default() -> Self {
625        Self {
626            enabled: false,
627            graph_types: default_graph_types(),
628            formats: default_graph_formats(),
629            train_ratio: 0.7,
630            validation_ratio: 0.15,
631            split_seed: None,
632            output_subdirectory: "graphs".to_string(),
633            hypergraph: HypergraphExportSettings::default(),
634            dgl: DglExportConfig::default(),
635            je_network: JeNetworkConfig::default(),
636        }
637    }
638}
639
640/// DGL-specific export settings.
641#[derive(Debug, Clone, Default, Serialize, Deserialize)]
642pub struct DglExportConfig {
643    /// Export as a heterogeneous graph (distinct node/edge types).
644    ///
645    /// When `true` the DGL exporter produces a `HeteroData` object with typed
646    /// node and edge stores rather than a single homogeneous graph.
647    /// Set to `true` in `graph_export.dgl.heterogeneous: true` in YAML.
648    #[serde(default)]
649    pub heterogeneous: bool,
650}
651
652// Default derived: heterogeneous = false (bool default)
653
654/// Settings for the multi-layer hypergraph export (RustGraph integration).
655///
656/// Produces a 3-layer hypergraph:
657/// - Layer 1: Governance & Controls (COSO, SOX, internal controls, organizational)
658/// - Layer 2: Process Events (P2P/O2C document flows, OCPM events)
659/// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
660#[derive(Debug, Clone, Serialize, Deserialize)]
661pub struct HypergraphExportSettings {
662    /// Enable hypergraph export.
663    #[serde(default)]
664    pub enabled: bool,
665
666    /// Maximum total nodes across all layers (default 50000).
667    #[serde(default = "default_hypergraph_max_nodes")]
668    pub max_nodes: usize,
669
670    /// Aggregation strategy when node budget is exceeded.
671    #[serde(default = "default_aggregation_strategy")]
672    pub aggregation_strategy: String,
673
674    /// Layer 1 (Governance & Controls) settings.
675    #[serde(default)]
676    pub governance_layer: GovernanceLayerSettings,
677
678    /// Layer 2 (Process Events) settings.
679    #[serde(default)]
680    pub process_layer: ProcessLayerSettings,
681
682    /// Layer 3 (Accounting Network) settings.
683    #[serde(default)]
684    pub accounting_layer: AccountingLayerSettings,
685
686    /// Cross-layer edge generation settings.
687    #[serde(default)]
688    pub cross_layer: CrossLayerSettings,
689
690    /// Output subdirectory for hypergraph files (relative to graph output directory).
691    #[serde(default = "default_hypergraph_subdir")]
692    pub output_subdirectory: String,
693
694    /// Output format: "native" (default) for internal field names, "unified" for RustGraph format.
695    #[serde(default = "default_hypergraph_format")]
696    pub output_format: String,
697
698    /// Optional URL for streaming unified JSONL to a RustGraph ingest endpoint.
699    #[serde(default)]
700    pub stream_target: Option<String>,
701
702    /// Batch size for streaming (number of JSONL lines per HTTP POST). Default: 1000.
703    #[serde(default = "default_stream_batch_size")]
704    pub stream_batch_size: usize,
705}
706
707fn default_hypergraph_max_nodes() -> usize {
708    50_000
709}
710
711fn default_aggregation_strategy() -> String {
712    "pool_by_counterparty".to_string()
713}
714
715fn default_hypergraph_subdir() -> String {
716    "hypergraph".to_string()
717}
718
719fn default_hypergraph_format() -> String {
720    "native".to_string()
721}
722
723fn default_stream_batch_size() -> usize {
724    1000
725}
726
727impl Default for HypergraphExportSettings {
728    fn default() -> Self {
729        Self {
730            enabled: false,
731            max_nodes: 50_000,
732            aggregation_strategy: "pool_by_counterparty".to_string(),
733            governance_layer: GovernanceLayerSettings::default(),
734            process_layer: ProcessLayerSettings::default(),
735            accounting_layer: AccountingLayerSettings::default(),
736            cross_layer: CrossLayerSettings::default(),
737            output_subdirectory: "hypergraph".to_string(),
738            output_format: "native".to_string(),
739            stream_target: None,
740            stream_batch_size: 1000,
741        }
742    }
743}
744
745/// Layer 1: Governance & Controls layer settings.
746#[derive(Debug, Clone, Serialize, Deserialize)]
747pub struct GovernanceLayerSettings {
748    /// Include COSO framework nodes (5 components + 17 principles).
749    #[serde(default = "default_true")]
750    pub include_coso: bool,
751    /// Include internal control nodes.
752    #[serde(default = "default_true")]
753    pub include_controls: bool,
754    /// Include SOX assertion nodes.
755    #[serde(default = "default_true")]
756    pub include_sox: bool,
757    /// Include vendor master data nodes.
758    #[serde(default = "default_true")]
759    pub include_vendors: bool,
760    /// Include customer master data nodes.
761    #[serde(default = "default_true")]
762    pub include_customers: bool,
763    /// Include employee/organizational nodes.
764    #[serde(default = "default_true")]
765    pub include_employees: bool,
766}
767
768impl Default for GovernanceLayerSettings {
769    fn default() -> Self {
770        Self {
771            include_coso: true,
772            include_controls: true,
773            include_sox: true,
774            include_vendors: true,
775            include_customers: true,
776            include_employees: true,
777        }
778    }
779}
780
781/// Layer 2: Process Events layer settings.
782#[derive(Debug, Clone, Serialize, Deserialize)]
783pub struct ProcessLayerSettings {
784    /// Include P2P (Procure-to-Pay) document flow nodes.
785    #[serde(default = "default_true")]
786    pub include_p2p: bool,
787    /// Include O2C (Order-to-Cash) document flow nodes.
788    #[serde(default = "default_true")]
789    pub include_o2c: bool,
790    /// Include S2C (Source-to-Contract) document flow nodes.
791    #[serde(default = "default_true")]
792    pub include_s2c: bool,
793    /// Include H2R (Hire-to-Retire) document flow nodes.
794    #[serde(default = "default_true")]
795    pub include_h2r: bool,
796    /// Include MFG (Manufacturing) document flow nodes.
797    #[serde(default = "default_true")]
798    pub include_mfg: bool,
799    /// Include BANK (Banking) document flow nodes.
800    #[serde(default = "default_true")]
801    pub include_bank: bool,
802    /// Include AUDIT document flow nodes.
803    #[serde(default = "default_true")]
804    pub include_audit: bool,
805    /// Include R2R (Record-to-Report) document flow nodes (bank recon + period close).
806    #[serde(default = "default_true")]
807    pub include_r2r: bool,
808    /// Export OCPM events as hyperedges.
809    #[serde(default = "default_true")]
810    pub events_as_hyperedges: bool,
811    /// Threshold: if a counterparty has more documents than this, aggregate into pool nodes.
812    #[serde(default = "default_docs_per_counterparty_threshold")]
813    pub docs_per_counterparty_threshold: usize,
814}
815
816fn default_docs_per_counterparty_threshold() -> usize {
817    20
818}
819
820impl Default for ProcessLayerSettings {
821    fn default() -> Self {
822        Self {
823            include_p2p: true,
824            include_o2c: true,
825            include_s2c: true,
826            include_h2r: true,
827            include_mfg: true,
828            include_bank: true,
829            include_audit: true,
830            include_r2r: true,
831            events_as_hyperedges: true,
832            docs_per_counterparty_threshold: 20,
833        }
834    }
835}
836
837/// Layer 3: Accounting Network layer settings.
838#[derive(Debug, Clone, Serialize, Deserialize)]
839pub struct AccountingLayerSettings {
840    /// Include GL account nodes.
841    #[serde(default = "default_true")]
842    pub include_accounts: bool,
843    /// Export journal entries as hyperedges (debit+credit accounts as participants).
844    #[serde(default = "default_true")]
845    pub je_as_hyperedges: bool,
846}
847
848impl Default for AccountingLayerSettings {
849    fn default() -> Self {
850        Self {
851            include_accounts: true,
852            je_as_hyperedges: true,
853        }
854    }
855}
856
857/// Cross-layer edge generation settings.
858#[derive(Debug, Clone, Serialize, Deserialize)]
859pub struct CrossLayerSettings {
860    /// Generate cross-layer edges (Control→Account, Vendor→PO, etc.).
861    #[serde(default = "default_true")]
862    pub enabled: bool,
863}
864
865impl Default for CrossLayerSettings {
866    fn default() -> Self {
867        Self { enabled: true }
868    }
869}
870
871/// Configuration for a specific graph type to export.
872#[derive(Debug, Clone, Serialize, Deserialize)]
873pub struct GraphTypeConfig {
874    /// Name identifier for this graph configuration.
875    #[serde(default = "default_graph_name")]
876    pub name: String,
877
878    /// Whether to aggregate parallel edges between the same nodes.
879    #[serde(default)]
880    pub aggregate_edges: bool,
881
882    /// Minimum edge weight to include (filters out small transactions).
883    #[serde(default)]
884    pub min_edge_weight: f64,
885
886    /// Whether to include document nodes (creates hub-and-spoke structure).
887    #[serde(default)]
888    pub include_document_nodes: bool,
889}
890
891fn default_graph_name() -> String {
892    "accounting_network".to_string()
893}
894
895impl Default for GraphTypeConfig {
896    fn default() -> Self {
897        Self {
898            name: "accounting_network".to_string(),
899            aggregate_edges: false,
900            min_edge_weight: 0.0,
901            include_document_nodes: false,
902        }
903    }
904}
905
906/// Export format for graph data.
907#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
908#[serde(rename_all = "snake_case")]
909pub enum GraphExportFormat {
910    /// PyTorch Geometric format (.npy files + metadata.json).
911    PytorchGeometric,
912    /// Neo4j format (CSV files + Cypher import scripts).
913    Neo4j,
914    /// Deep Graph Library format.
915    Dgl,
916    /// RustGraph/RustAssureTwin JSON format.
917    RustGraph,
918    /// RustGraph multi-layer hypergraph format (nodes.jsonl + edges.jsonl + hyperedges.jsonl).
919    RustGraphHypergraph,
920}
921
922/// Scenario configuration for metadata, tagging, and ML training setup.
923///
924/// This section enables tracking the purpose and characteristics of a generation run.
925#[derive(Debug, Clone, Default, Serialize, Deserialize)]
926pub struct ScenarioConfig {
927    /// Tags for categorizing and filtering datasets.
928    /// Examples: "fraud_detection", "retail", "month_end_stress", "ml_training"
929    #[serde(default)]
930    pub tags: Vec<String>,
931
932    /// Data quality profile preset.
933    /// - "clean": Minimal data quality issues (0.1% missing, 0.05% typos)
934    /// - "noisy": Moderate issues (5% missing, 2% typos, 1% duplicates)
935    /// - "legacy": Heavy issues simulating legacy system data (10% missing, 5% typos)
936    #[serde(default)]
937    pub profile: Option<String>,
938
939    /// Human-readable description of the scenario purpose.
940    #[serde(default)]
941    pub description: Option<String>,
942
943    /// Whether this run is for ML training (enables balanced labeling).
944    #[serde(default)]
945    pub ml_training: bool,
946
947    /// Target anomaly class balance for ML training.
948    /// If set, anomalies will be injected to achieve this ratio.
949    #[serde(default)]
950    pub target_anomaly_ratio: Option<f64>,
951
952    /// Custom metadata key-value pairs.
953    #[serde(default)]
954    pub metadata: std::collections::HashMap<String, String>,
955}
956
957/// Temporal drift configuration for simulating distribution changes over time.
958///
959/// This enables generation of data that shows realistic temporal evolution,
960/// useful for training drift detection models and testing temporal robustness.
961#[derive(Debug, Clone, Serialize, Deserialize)]
962pub struct TemporalDriftConfig {
963    /// Enable temporal drift simulation.
964    #[serde(default)]
965    pub enabled: bool,
966
967    /// Amount mean drift per period (e.g., 0.02 = 2% mean shift per month).
968    /// Simulates gradual inflation or business growth.
969    #[serde(default = "default_amount_drift")]
970    pub amount_mean_drift: f64,
971
972    /// Amount variance drift per period (e.g., 0.01 = 1% variance increase per month).
973    /// Simulates increasing volatility over time.
974    #[serde(default)]
975    pub amount_variance_drift: f64,
976
977    /// Anomaly rate drift per period (e.g., 0.001 = 0.1% increase per month).
978    /// Simulates increasing fraud attempts or degrading controls.
979    #[serde(default)]
980    pub anomaly_rate_drift: f64,
981
982    /// Concept drift rate - how quickly feature distributions change (0.0-1.0).
983    /// Higher values cause more rapid distribution shifts.
984    #[serde(default = "default_concept_drift")]
985    pub concept_drift_rate: f64,
986
987    /// Sudden drift events - probability of a sudden distribution shift in any period.
988    #[serde(default)]
989    pub sudden_drift_probability: f64,
990
991    /// Magnitude of sudden drift events when they occur (multiplier).
992    #[serde(default = "default_sudden_drift_magnitude")]
993    pub sudden_drift_magnitude: f64,
994
995    /// Seasonal drift - enable cyclic patterns that repeat annually.
996    #[serde(default)]
997    pub seasonal_drift: bool,
998
999    /// Drift start period (0 = from beginning). Use to simulate stable baseline before drift.
1000    #[serde(default)]
1001    pub drift_start_period: u32,
1002
1003    /// Drift type: "gradual", "sudden", "recurring", "mixed"
1004    #[serde(default = "default_drift_type")]
1005    pub drift_type: DriftType,
1006}
1007
1008fn default_amount_drift() -> f64 {
1009    0.02
1010}
1011
1012fn default_concept_drift() -> f64 {
1013    0.01
1014}
1015
1016fn default_sudden_drift_magnitude() -> f64 {
1017    2.0
1018}
1019
1020fn default_drift_type() -> DriftType {
1021    DriftType::Gradual
1022}
1023
1024impl Default for TemporalDriftConfig {
1025    fn default() -> Self {
1026        Self {
1027            enabled: false,
1028            amount_mean_drift: 0.02,
1029            amount_variance_drift: 0.0,
1030            anomaly_rate_drift: 0.0,
1031            concept_drift_rate: 0.01,
1032            sudden_drift_probability: 0.0,
1033            sudden_drift_magnitude: 2.0,
1034            seasonal_drift: false,
1035            drift_start_period: 0,
1036            drift_type: DriftType::Gradual,
1037        }
1038    }
1039}
1040
1041impl TemporalDriftConfig {
1042    /// Convert to core DriftConfig for use in generators.
1043    pub fn to_core_config(&self) -> datasynth_core::distributions::DriftConfig {
1044        datasynth_core::distributions::DriftConfig {
1045            enabled: self.enabled,
1046            amount_mean_drift: self.amount_mean_drift,
1047            amount_variance_drift: self.amount_variance_drift,
1048            anomaly_rate_drift: self.anomaly_rate_drift,
1049            concept_drift_rate: self.concept_drift_rate,
1050            sudden_drift_probability: self.sudden_drift_probability,
1051            sudden_drift_magnitude: self.sudden_drift_magnitude,
1052            seasonal_drift: self.seasonal_drift,
1053            drift_start_period: self.drift_start_period,
1054            drift_type: match self.drift_type {
1055                DriftType::Gradual => datasynth_core::distributions::DriftType::Gradual,
1056                DriftType::Sudden => datasynth_core::distributions::DriftType::Sudden,
1057                DriftType::Recurring => datasynth_core::distributions::DriftType::Recurring,
1058                DriftType::Mixed => datasynth_core::distributions::DriftType::Mixed,
1059            },
1060            regime_changes: Vec::new(),
1061            economic_cycle: Default::default(),
1062            parameter_drifts: Vec::new(),
1063        }
1064    }
1065}
1066
1067/// Types of temporal drift patterns.
1068#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1069#[serde(rename_all = "snake_case")]
1070pub enum DriftType {
1071    /// Gradual, continuous drift over time (like inflation).
1072    #[default]
1073    Gradual,
1074    /// Sudden, point-in-time shifts (like policy changes).
1075    Sudden,
1076    /// Recurring patterns that cycle (like seasonal variations).
1077    Recurring,
1078    /// Combination of gradual background drift with occasional sudden shifts.
1079    Mixed,
1080}
1081
1082// ============================================================================
1083// Streaming Output API Configuration (Phase 2)
1084// ============================================================================
1085
1086/// Configuration for streaming output API.
1087#[derive(Debug, Clone, Serialize, Deserialize)]
1088pub struct StreamingSchemaConfig {
1089    /// Enable streaming output.
1090    #[serde(default)]
1091    pub enabled: bool,
1092    /// Target events per second (0 = unlimited, default 0).
1093    #[serde(default)]
1094    pub events_per_second: f64,
1095    /// Token bucket burst size (default 100).
1096    #[serde(default = "default_burst_size")]
1097    pub burst_size: u32,
1098    /// Buffer size for streaming (number of items).
1099    #[serde(default = "default_buffer_size")]
1100    pub buffer_size: usize,
1101    /// Enable progress reporting.
1102    #[serde(default = "default_true")]
1103    pub enable_progress: bool,
1104    /// Progress reporting interval (number of items).
1105    #[serde(default = "default_progress_interval")]
1106    pub progress_interval: u64,
1107    /// Backpressure strategy.
1108    #[serde(default)]
1109    pub backpressure: BackpressureSchemaStrategy,
1110}
1111
1112fn default_buffer_size() -> usize {
1113    1000
1114}
1115
1116fn default_progress_interval() -> u64 {
1117    100
1118}
1119
1120impl Default for StreamingSchemaConfig {
1121    fn default() -> Self {
1122        Self {
1123            enabled: false,
1124            events_per_second: 0.0,
1125            burst_size: 100,
1126            buffer_size: 1000,
1127            enable_progress: true,
1128            progress_interval: 100,
1129            backpressure: BackpressureSchemaStrategy::Block,
1130        }
1131    }
1132}
1133
1134/// Backpressure strategy for streaming output.
1135#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1136#[serde(rename_all = "snake_case")]
1137pub enum BackpressureSchemaStrategy {
1138    /// Block until space is available in the buffer.
1139    #[default]
1140    Block,
1141    /// Drop oldest items when buffer is full.
1142    DropOldest,
1143    /// Drop newest items when buffer is full.
1144    DropNewest,
1145    /// Buffer overflow items up to a limit, then block.
1146    Buffer,
1147}
1148
1149// ============================================================================
1150// Rate Limiting Configuration (Phase 5)
1151// ============================================================================
1152
1153/// Configuration for rate limiting.
1154#[derive(Debug, Clone, Serialize, Deserialize)]
1155pub struct RateLimitSchemaConfig {
1156    /// Enable rate limiting.
1157    #[serde(default)]
1158    pub enabled: bool,
1159    /// Entities per second limit.
1160    #[serde(default = "default_entities_per_second")]
1161    pub entities_per_second: f64,
1162    /// Burst size (number of tokens in bucket).
1163    #[serde(default = "default_burst_size")]
1164    pub burst_size: u32,
1165    /// Backpressure strategy for rate limiting.
1166    #[serde(default)]
1167    pub backpressure: RateLimitBackpressureSchema,
1168}
1169
1170fn default_entities_per_second() -> f64 {
1171    1000.0
1172}
1173
1174fn default_burst_size() -> u32 {
1175    100
1176}
1177
1178impl Default for RateLimitSchemaConfig {
1179    fn default() -> Self {
1180        Self {
1181            enabled: false,
1182            entities_per_second: 1000.0,
1183            burst_size: 100,
1184            backpressure: RateLimitBackpressureSchema::Block,
1185        }
1186    }
1187}
1188
1189/// Backpressure strategy for rate limiting.
1190#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1191#[serde(rename_all = "snake_case")]
1192pub enum RateLimitBackpressureSchema {
1193    /// Block until rate allows.
1194    #[default]
1195    Block,
1196    /// Drop items that exceed rate.
1197    Drop,
1198    /// Buffer items and process when rate allows.
1199    Buffer,
1200}
1201
1202// ============================================================================
1203// Temporal Attribute Generation Configuration (Phase 3)
1204// ============================================================================
1205
1206/// Configuration for temporal attribute generation.
1207#[derive(Debug, Clone, Serialize, Deserialize)]
1208pub struct TemporalAttributeSchemaConfig {
1209    /// Enable temporal attribute generation.
1210    #[serde(default)]
1211    pub enabled: bool,
1212    /// Valid time configuration.
1213    #[serde(default)]
1214    pub valid_time: ValidTimeSchemaConfig,
1215    /// Transaction time configuration.
1216    #[serde(default)]
1217    pub transaction_time: TransactionTimeSchemaConfig,
1218    /// Generate version chains for entities.
1219    #[serde(default)]
1220    pub generate_version_chains: bool,
1221    /// Average number of versions per entity.
1222    #[serde(default = "default_avg_versions")]
1223    pub avg_versions_per_entity: f64,
1224}
1225
1226fn default_avg_versions() -> f64 {
1227    1.5
1228}
1229
1230impl Default for TemporalAttributeSchemaConfig {
1231    fn default() -> Self {
1232        Self {
1233            enabled: false,
1234            valid_time: ValidTimeSchemaConfig::default(),
1235            transaction_time: TransactionTimeSchemaConfig::default(),
1236            generate_version_chains: false,
1237            avg_versions_per_entity: 1.5,
1238        }
1239    }
1240}
1241
1242/// Configuration for valid time (business time) generation.
1243#[derive(Debug, Clone, Serialize, Deserialize)]
1244pub struct ValidTimeSchemaConfig {
1245    /// Probability that valid_to is set (entity has ended validity).
1246    #[serde(default = "default_closed_probability")]
1247    pub closed_probability: f64,
1248    /// Average validity duration in days.
1249    #[serde(default = "default_avg_validity_days")]
1250    pub avg_validity_days: u32,
1251    /// Standard deviation of validity duration in days.
1252    #[serde(default = "default_validity_stddev")]
1253    pub validity_stddev_days: u32,
1254}
1255
1256fn default_closed_probability() -> f64 {
1257    0.1
1258}
1259
1260fn default_avg_validity_days() -> u32 {
1261    365
1262}
1263
1264fn default_validity_stddev() -> u32 {
1265    90
1266}
1267
1268impl Default for ValidTimeSchemaConfig {
1269    fn default() -> Self {
1270        Self {
1271            closed_probability: 0.1,
1272            avg_validity_days: 365,
1273            validity_stddev_days: 90,
1274        }
1275    }
1276}
1277
1278/// Configuration for transaction time (system time) generation.
1279#[derive(Debug, Clone, Serialize, Deserialize)]
1280pub struct TransactionTimeSchemaConfig {
1281    /// Average recording delay in seconds (0 = immediate).
1282    #[serde(default)]
1283    pub avg_recording_delay_seconds: u32,
1284    /// Allow backdating (recording time before valid time).
1285    #[serde(default)]
1286    pub allow_backdating: bool,
1287    /// Probability of backdating if allowed.
1288    #[serde(default = "default_backdating_probability")]
1289    pub backdating_probability: f64,
1290    /// Maximum backdate days.
1291    #[serde(default = "default_max_backdate_days")]
1292    pub max_backdate_days: u32,
1293}
1294
1295fn default_backdating_probability() -> f64 {
1296    0.01
1297}
1298
1299fn default_max_backdate_days() -> u32 {
1300    30
1301}
1302
1303impl Default for TransactionTimeSchemaConfig {
1304    fn default() -> Self {
1305        Self {
1306            avg_recording_delay_seconds: 0,
1307            allow_backdating: false,
1308            backdating_probability: 0.01,
1309            max_backdate_days: 30,
1310        }
1311    }
1312}
1313
1314// ============================================================================
1315// Relationship Generation Configuration (Phase 4)
1316// ============================================================================
1317
1318/// Configuration for relationship generation.
1319#[derive(Debug, Clone, Serialize, Deserialize)]
1320pub struct RelationshipSchemaConfig {
1321    /// Relationship type definitions.
1322    #[serde(default)]
1323    pub relationship_types: Vec<RelationshipTypeSchemaConfig>,
1324    /// Allow orphan entities (entities with no relationships).
1325    #[serde(default = "default_true")]
1326    pub allow_orphans: bool,
1327    /// Probability of creating an orphan entity.
1328    #[serde(default = "default_orphan_probability")]
1329    pub orphan_probability: f64,
1330    /// Allow circular relationships.
1331    #[serde(default)]
1332    pub allow_circular: bool,
1333    /// Maximum depth for circular relationship detection.
1334    #[serde(default = "default_max_circular_depth")]
1335    pub max_circular_depth: u32,
1336}
1337
1338fn default_orphan_probability() -> f64 {
1339    0.01
1340}
1341
1342fn default_max_circular_depth() -> u32 {
1343    3
1344}
1345
1346impl Default for RelationshipSchemaConfig {
1347    fn default() -> Self {
1348        Self {
1349            relationship_types: Vec::new(),
1350            allow_orphans: true,
1351            orphan_probability: 0.01,
1352            allow_circular: false,
1353            max_circular_depth: 3,
1354        }
1355    }
1356}
1357
1358/// Configuration for a specific relationship type.
1359#[derive(Debug, Clone, Serialize, Deserialize)]
1360pub struct RelationshipTypeSchemaConfig {
1361    /// Name of the relationship type (e.g., "debits", "credits", "created").
1362    pub name: String,
1363    /// Source entity type (e.g., "journal_entry").
1364    pub source_type: String,
1365    /// Target entity type (e.g., "account").
1366    pub target_type: String,
1367    /// Cardinality rule for this relationship.
1368    #[serde(default)]
1369    pub cardinality: CardinalitySchemaRule,
1370    /// Weight for this relationship in random selection.
1371    #[serde(default = "default_relationship_weight")]
1372    pub weight: f64,
1373    /// Whether this relationship is required.
1374    #[serde(default)]
1375    pub required: bool,
1376    /// Whether this relationship is directed.
1377    #[serde(default = "default_true")]
1378    pub directed: bool,
1379}
1380
1381fn default_relationship_weight() -> f64 {
1382    1.0
1383}
1384
1385impl Default for RelationshipTypeSchemaConfig {
1386    fn default() -> Self {
1387        Self {
1388            name: String::new(),
1389            source_type: String::new(),
1390            target_type: String::new(),
1391            cardinality: CardinalitySchemaRule::default(),
1392            weight: 1.0,
1393            required: false,
1394            directed: true,
1395        }
1396    }
1397}
1398
1399/// Cardinality rule for relationships in schema config.
1400#[derive(Debug, Clone, Serialize, Deserialize)]
1401#[serde(rename_all = "snake_case")]
1402pub enum CardinalitySchemaRule {
1403    /// One source to one target.
1404    OneToOne,
1405    /// One source to many targets.
1406    OneToMany {
1407        /// Minimum number of targets.
1408        min: u32,
1409        /// Maximum number of targets.
1410        max: u32,
1411    },
1412    /// Many sources to one target.
1413    ManyToOne {
1414        /// Minimum number of sources.
1415        min: u32,
1416        /// Maximum number of sources.
1417        max: u32,
1418    },
1419    /// Many sources to many targets.
1420    ManyToMany {
1421        /// Minimum targets per source.
1422        min_per_source: u32,
1423        /// Maximum targets per source.
1424        max_per_source: u32,
1425    },
1426}
1427
1428impl Default for CardinalitySchemaRule {
1429    fn default() -> Self {
1430        Self::OneToMany { min: 1, max: 5 }
1431    }
1432}
1433
1434/// Global configuration settings.
1435#[derive(Debug, Clone, Serialize, Deserialize)]
1436pub struct GlobalConfig {
1437    /// Random seed for reproducibility
1438    pub seed: Option<u64>,
1439    /// Industry sector
1440    pub industry: IndustrySector,
1441    /// Simulation start date (YYYY-MM-DD)
1442    #[serde(alias = "startDate")]
1443    pub start_date: String,
1444    /// Simulation period in months
1445    #[serde(alias = "periodMonths")]
1446    pub period_months: u32,
1447    /// Base currency for group reporting
1448    #[serde(default = "default_currency", alias = "groupCurrency")]
1449    pub group_currency: String,
1450    /// Presentation currency for consolidated financial statements (ISO 4217).
1451    /// If not set, defaults to `group_currency`.
1452    #[serde(default, alias = "presentationCurrency")]
1453    pub presentation_currency: Option<String>,
1454    /// Enable parallel generation
1455    #[serde(default = "default_true")]
1456    pub parallel: bool,
1457    /// Number of worker threads (0 = auto-detect)
1458    #[serde(default, alias = "workerThreads")]
1459    pub worker_threads: usize,
1460    /// Memory limit in MB (0 = unlimited)
1461    #[serde(default, alias = "memoryLimitMb")]
1462    pub memory_limit_mb: usize,
1463    /// Fiscal year length in months (defaults to 12 if not set).
1464    /// Used by session-based generation to split the total period into fiscal years.
1465    #[serde(default, alias = "fiscalYearMonths")]
1466    pub fiscal_year_months: Option<u32>,
1467}
1468
1469fn default_currency() -> String {
1470    "USD".to_string()
1471}
1472fn default_true() -> bool {
1473    true
1474}
1475
1476/// Configuration for generation session behavior.
1477///
1478/// When enabled, the generation pipeline splits the total period into fiscal years
1479/// and generates data period-by-period, carrying forward balance state.
1480#[derive(Debug, Clone, Serialize, Deserialize)]
1481pub struct SessionSchemaConfig {
1482    /// Whether session-based (period-by-period) generation is enabled.
1483    #[serde(default)]
1484    pub enabled: bool,
1485    /// Optional path for saving/loading session checkpoint files.
1486    #[serde(default)]
1487    pub checkpoint_path: Option<String>,
1488    /// Whether to write output files per fiscal period (e.g., `period_01/`).
1489    #[serde(default = "default_true")]
1490    pub per_period_output: bool,
1491    /// Whether to also produce a single consolidated output across all periods.
1492    #[serde(default = "default_true")]
1493    pub consolidated_output: bool,
1494}
1495
1496impl Default for SessionSchemaConfig {
1497    fn default() -> Self {
1498        Self {
1499            enabled: false,
1500            checkpoint_path: None,
1501            per_period_output: true,
1502            consolidated_output: true,
1503        }
1504    }
1505}
1506
1507/// Company code configuration.
1508#[derive(Debug, Clone, Serialize, Deserialize)]
1509pub struct CompanyConfig {
1510    /// Company code identifier
1511    pub code: String,
1512    /// Company name
1513    pub name: String,
1514    /// Local currency (ISO 4217)
1515    pub currency: String,
1516    /// Functional currency for IAS 21 translation (ISO 4217).
1517    /// If not set, defaults to the `currency` field (i.e. local == functional).
1518    #[serde(default, alias = "functionalCurrency")]
1519    pub functional_currency: Option<String>,
1520    /// Country code (ISO 3166-1 alpha-2)
1521    pub country: String,
1522    /// Fiscal year variant
1523    #[serde(default = "default_fiscal_variant", alias = "fiscalYearVariant")]
1524    pub fiscal_year_variant: String,
1525    /// Transaction volume per year
1526    #[serde(alias = "annualTransactionVolume")]
1527    pub annual_transaction_volume: TransactionVolume,
1528    /// Company-specific transaction weight
1529    #[serde(default = "default_weight", alias = "volumeWeight")]
1530    pub volume_weight: f64,
1531}
1532
1533fn default_fiscal_variant() -> String {
1534    "K4".to_string()
1535}
1536fn default_weight() -> f64 {
1537    1.0
1538}
1539
1540/// Transaction volume presets.
1541#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1542#[serde(rename_all = "snake_case")]
1543pub enum TransactionVolume {
1544    /// 10,000 transactions per year
1545    TenK,
1546    /// 50,000 transactions per year
1547    FiftyK,
1548    /// 100,000 transactions per year
1549    HundredK,
1550    /// 1,000,000 transactions per year
1551    OneM,
1552    /// 10,000,000 transactions per year
1553    TenM,
1554    /// 100,000,000 transactions per year
1555    HundredM,
1556    /// Custom count
1557    Custom(u64),
1558}
1559
1560impl TransactionVolume {
1561    /// Get the transaction count.
1562    pub fn count(&self) -> u64 {
1563        match self {
1564            Self::TenK => 10_000,
1565            Self::FiftyK => 50_000,
1566            Self::HundredK => 100_000,
1567            Self::OneM => 1_000_000,
1568            Self::TenM => 10_000_000,
1569            Self::HundredM => 100_000_000,
1570            Self::Custom(n) => *n,
1571        }
1572    }
1573}
1574
1575/// Chart of Accounts configuration.
1576#[derive(Debug, Clone, Serialize, Deserialize)]
1577pub struct ChartOfAccountsConfig {
1578    /// CoA complexity level
1579    pub complexity: CoAComplexity,
1580    /// Use industry-specific accounts
1581    #[serde(default = "default_true")]
1582    pub industry_specific: bool,
1583    /// Custom account definitions file
1584    pub custom_accounts: Option<PathBuf>,
1585    /// Minimum hierarchy depth
1586    #[serde(default = "default_min_depth")]
1587    pub min_hierarchy_depth: u8,
1588    /// Maximum hierarchy depth
1589    #[serde(default = "default_max_depth")]
1590    pub max_hierarchy_depth: u8,
1591    /// **v5.7.0** — expand canonical accounts into industry-specific
1592    /// 6-digit sub-accounts using the embedded
1593    /// [`datasynth_core::industry_packs`] (manufacturing, retail,
1594    /// financial_services, healthcare, technology). When `true`:
1595    ///
1596    /// - Each canonical 4-digit account that has an expansion in the
1597    ///   pack becomes a non-postable control account (`is_postable =
1598    ///   false`).
1599    /// - 2–6 6-digit sub-accounts are added per parent, with
1600    ///   suffix-driven names (`"Product Revenue — Steel Products"`),
1601    ///   industry-realistic gaps, and inherited ISO 21378 codes.
1602    /// - Generators that currently target canonical accounts via
1603    ///   constants will pick a sub-account deterministically per
1604    ///   `document_id` (preserving seed-based reproducibility).
1605    ///
1606    /// Default: `false` (preserves v5.6.0 behaviour exactly — same
1607    /// account count, same numbering, same goldens).
1608    #[serde(default, alias = "expandIndustrySubaccounts")]
1609    pub expand_industry_subaccounts: bool,
1610}
1611
1612fn default_min_depth() -> u8 {
1613    2
1614}
1615fn default_max_depth() -> u8 {
1616    5
1617}
1618
1619impl Default for ChartOfAccountsConfig {
1620    fn default() -> Self {
1621        Self {
1622            complexity: CoAComplexity::Small,
1623            industry_specific: true,
1624            custom_accounts: None,
1625            min_hierarchy_depth: default_min_depth(),
1626            max_hierarchy_depth: default_max_depth(),
1627            expand_industry_subaccounts: false,
1628        }
1629    }
1630}
1631
1632/// Transaction generation configuration.
1633#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1634pub struct TransactionConfig {
1635    /// Line item distribution
1636    #[serde(default)]
1637    pub line_item_distribution: LineItemDistributionConfig,
1638    /// Debit/credit balance distribution
1639    #[serde(default)]
1640    pub debit_credit_distribution: DebitCreditDistributionConfig,
1641    /// Even/odd line count distribution
1642    #[serde(default)]
1643    pub even_odd_distribution: EvenOddDistributionConfig,
1644    /// Transaction source distribution
1645    #[serde(default)]
1646    pub source_distribution: SourceDistribution,
1647    /// Seasonality configuration
1648    #[serde(default)]
1649    pub seasonality: SeasonalityConfig,
1650    /// Amount distribution
1651    #[serde(default)]
1652    pub amounts: AmountDistributionConfig,
1653    /// Benford's Law compliance configuration
1654    #[serde(default)]
1655    pub benford: BenfordConfig,
1656}
1657
1658/// Benford's Law compliance configuration.
1659#[derive(Debug, Clone, Serialize, Deserialize)]
1660pub struct BenfordConfig {
1661    /// Enable Benford's Law compliance for amount generation
1662    #[serde(default = "default_true")]
1663    pub enabled: bool,
1664    /// Tolerance for deviation from ideal Benford distribution (0.0-1.0)
1665    #[serde(default = "default_benford_tolerance")]
1666    pub tolerance: f64,
1667    /// Transaction sources exempt from Benford's Law (fixed amounts)
1668    #[serde(default)]
1669    pub exempt_sources: Vec<BenfordExemption>,
1670}
1671
1672fn default_benford_tolerance() -> f64 {
1673    0.05
1674}
1675
1676impl Default for BenfordConfig {
1677    fn default() -> Self {
1678        Self {
1679            enabled: true,
1680            tolerance: default_benford_tolerance(),
1681            exempt_sources: vec![BenfordExemption::Recurring, BenfordExemption::Payroll],
1682        }
1683    }
1684}
1685
1686/// Types of transactions exempt from Benford's Law.
1687#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1688#[serde(rename_all = "snake_case")]
1689pub enum BenfordExemption {
1690    /// Recurring fixed amounts (rent, subscriptions)
1691    Recurring,
1692    /// Payroll (standardized salaries)
1693    Payroll,
1694    /// Fixed fees and charges
1695    FixedFees,
1696    /// Round number purchases (often legitimate)
1697    RoundAmounts,
1698}
1699
1700/// Distribution of transaction sources.
1701#[derive(Debug, Clone, Serialize, Deserialize)]
1702pub struct SourceDistribution {
1703    /// Manual entries percentage
1704    pub manual: f64,
1705    /// Automated system entries
1706    pub automated: f64,
1707    /// Recurring entries
1708    pub recurring: f64,
1709    /// Adjustment entries
1710    pub adjustment: f64,
1711}
1712
1713impl Default for SourceDistribution {
1714    fn default() -> Self {
1715        Self {
1716            manual: 0.20,
1717            automated: 0.70,
1718            recurring: 0.07,
1719            adjustment: 0.03,
1720        }
1721    }
1722}
1723
1724/// Output configuration.
1725#[derive(Debug, Clone, Serialize, Deserialize)]
1726pub struct OutputConfig {
1727    /// Output mode
1728    #[serde(default)]
1729    pub mode: OutputMode,
1730    /// Output directory
1731    #[serde(alias = "outputDirectory")]
1732    pub output_directory: PathBuf,
1733    /// File formats to generate. Accepts both `formats: [json, csv]`
1734    /// (canonical YAML) and `exportFormat: "json"` / `exportFormats:
1735    /// ["json", "csv"]` (SDK-style camelCase). The single-string
1736    /// `exportFormat` form is deserialised via `one_or_many_formats`
1737    /// so SDK clients submitting `exportFormat: "json"` hit the right
1738    /// code path instead of silently falling through to the Parquet
1739    /// default — the bug the SDK team flagged in v4.4.0.
1740    #[serde(
1741        default = "default_formats",
1742        alias = "exportFormats",
1743        alias = "exportFormat",
1744        deserialize_with = "one_or_many_formats"
1745    )]
1746    pub formats: Vec<FileFormat>,
1747    /// Compression settings
1748    #[serde(default)]
1749    pub compression: CompressionConfig,
1750    /// Batch size for writes
1751    #[serde(default = "default_batch_size", alias = "batchSize")]
1752    pub batch_size: usize,
1753    /// Include ACDOCA format
1754    #[serde(default = "default_true", alias = "includeAcdoca")]
1755    pub include_acdoca: bool,
1756    /// Include BSEG format
1757    #[serde(default, alias = "includeBseg")]
1758    pub include_bseg: bool,
1759    /// Partition by fiscal period
1760    #[serde(default = "default_true", alias = "partitionByPeriod")]
1761    pub partition_by_period: bool,
1762    /// Partition by company code
1763    #[serde(default, alias = "partitionByCompany")]
1764    pub partition_by_company: bool,
1765    /// Numeric serialization mode for JSON output.
1766    /// "string" (default): decimals as `"1729237.30"` — lossless precision.
1767    /// "native": decimals as `1729237.30` — friendlier for pandas/analytics.
1768    #[serde(default, alias = "numericMode")]
1769    pub numeric_mode: NumericMode,
1770    /// JSON export layout for journal entries and document flows.
1771    /// "nested" (default): `{"header": {...}, "lines": [...]}` — natural ERP structure.
1772    /// "flat": header fields repeated on every line — friendlier for analytics/ML.
1773    ///
1774    /// Accepts both `export_layout` (canonical / YAML) and `exportLayout`
1775    /// (camelCase / SDK JSON) so SDKs that follow camelCase conventions
1776    /// hit the flat path rather than silently getting the Nested default.
1777    /// Before v3.1.1 the missing camelCase alias meant SDK requests with
1778    /// `exportLayout: "flat"` were silently ignored, which SDK operators
1779    /// reported as "flat hangs generation" (the job completed with Nested
1780    /// layout, but manifests didn't match the expected flat shape).
1781    #[serde(default, alias = "exportLayout")]
1782    pub export_layout: ExportLayout,
1783    /// SAP / HANA export settings (only read when the CLI
1784    /// `--export-format sap` flag is passed). Empty by default so
1785    /// existing configs don't change behaviour; dialect defaults to
1786    /// `classic` for backward compatibility.
1787    #[serde(default, alias = "sapExport")]
1788    pub sap: SapExportSettings,
1789    /// SAF-T (Standard Audit File for Tax) export settings. Read when
1790    /// the CLI `--export-format saft` flag is passed. Defaults to
1791    /// Portugal (`pt`) because the PT variant is the most mature and
1792    /// cross-jurisdiction compatible. Override with
1793    /// `jurisdiction: pl|ro|no|lu` for the other supported countries.
1794    #[serde(default, alias = "saftExport")]
1795    pub saft: SaftExportSettings,
1796}
1797
1798/// Configuration for the SAP export writers (BKPF / BSEG / ACDOCA and
1799/// master-data tables).
1800///
1801/// Mirror of `datasynth_output::SapExportConfig` in YAML form — the CLI
1802/// translates this into the runtime struct before invoking the exporter,
1803/// replacing the v3.x hardcoded `SapExportConfig::default()`.
1804#[derive(Debug, Clone, Serialize, Deserialize)]
1805pub struct SapExportSettings {
1806    /// SAP client / MANDT column value on every table.
1807    #[serde(default = "default_sap_client")]
1808    pub client: String,
1809    /// Leading ledger for ACDOCA rows (0L for S/4HANA default).
1810    #[serde(default = "default_sap_ledger")]
1811    pub ledger: String,
1812    /// Source system identifier — written to ACDOCA.AWSYS so downstream
1813    /// consumers can distinguish synthetic rows from production ones.
1814    #[serde(default = "default_sap_source_system")]
1815    pub source_system: String,
1816    /// Local currency (WAERS / RWCUR).
1817    #[serde(default = "default_sap_currency")]
1818    pub local_currency: String,
1819    /// Optional group / consolidation currency (triggers the HSL / RHCUR columns).
1820    #[serde(default, skip_serializing_if = "Option::is_none")]
1821    pub group_currency: Option<String>,
1822    /// Which SAP tables to export. Empty = default set (bkpf, bseg, acdoca).
1823    #[serde(default)]
1824    pub tables: Vec<String>,
1825    /// Include ZSIM_* extension columns on ACDOCA rows.
1826    #[serde(default = "default_true")]
1827    pub include_extension_fields: bool,
1828    /// Export dialect — `classic` (R/3 / BODS) or `hana` (S/4HANA CDS).
1829    #[serde(default)]
1830    pub dialect: SapDialectSetting,
1831    /// Legacy flag, retained for backward compatibility. Has no effect
1832    /// when `dialect = hana`.
1833    #[serde(default = "default_true")]
1834    pub use_sap_date_format: bool,
1835}
1836
1837impl Default for SapExportSettings {
1838    fn default() -> Self {
1839        Self {
1840            client: default_sap_client(),
1841            ledger: default_sap_ledger(),
1842            source_system: default_sap_source_system(),
1843            local_currency: default_sap_currency(),
1844            group_currency: None,
1845            tables: Vec::new(),
1846            include_extension_fields: true,
1847            dialect: SapDialectSetting::default(),
1848            use_sap_date_format: true,
1849        }
1850    }
1851}
1852
1853fn default_sap_client() -> String {
1854    "100".to_string()
1855}
1856fn default_sap_ledger() -> String {
1857    "0L".to_string()
1858}
1859fn default_sap_source_system() -> String {
1860    "SYNTH".to_string()
1861}
1862fn default_sap_currency() -> String {
1863    "USD".to_string()
1864}
1865
1866/// SAP export dialect (wire form — `datasynth_output::SapDialect` is the
1867/// runtime form).
1868#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
1869#[serde(rename_all = "snake_case")]
1870pub enum SapDialectSetting {
1871    /// Legacy R/3 / BODS-compatible CSV (default).
1872    #[default]
1873    Classic,
1874    /// S/4HANA CDS dialect (semicolon + UTF-8 BOM + decimal comma + ISO dates).
1875    Hana,
1876}
1877
1878/// SAF-T export settings (v4.3.1).
1879#[derive(Debug, Clone, Serialize, Deserialize)]
1880pub struct SaftExportSettings {
1881    /// ISO-ish two-letter code: `pt` / `pl` / `ro` / `no` / `lu`.
1882    /// Defaults to `pt` (Portugal, most mature variant).
1883    #[serde(default = "default_saft_jurisdiction")]
1884    pub jurisdiction: String,
1885    /// Company tax registration number / VAT ID / TIN used in the
1886    /// `Header.TaxRegistrationNumber` element. Falls back to
1887    /// `"Desconhecido"` (Portuguese for "unknown") when empty.
1888    #[serde(default)]
1889    pub company_tax_id: String,
1890    /// Optional override for the company name used in the Header.
1891    /// When empty, the first configured company's `name` is used.
1892    #[serde(default)]
1893    pub company_name: String,
1894}
1895
1896impl Default for SaftExportSettings {
1897    fn default() -> Self {
1898        Self {
1899            jurisdiction: default_saft_jurisdiction(),
1900            company_tax_id: String::new(),
1901            company_name: String::new(),
1902        }
1903    }
1904}
1905
1906fn default_saft_jurisdiction() -> String {
1907    "pt".to_string()
1908}
1909
1910fn default_formats() -> Vec<FileFormat> {
1911    vec![FileFormat::Parquet]
1912}
1913fn default_batch_size() -> usize {
1914    100_000
1915}
1916
1917/// Custom deserializer for `formats` that accepts either a single
1918/// `FileFormat` (e.g. `"json"` for SDK `exportFormat: "json"`) or a
1919/// vector (e.g. `["json", "csv"]`). Without this shim an SDK config
1920/// with `exportFormat: "json"` would fail to parse (serde expects a
1921/// sequence for a `Vec` field) and silently fall through to defaults.
1922fn one_or_many_formats<'de, D>(deserializer: D) -> Result<Vec<FileFormat>, D::Error>
1923where
1924    D: serde::Deserializer<'de>,
1925{
1926    #[derive(Deserialize)]
1927    #[serde(untagged)]
1928    enum OneOrMany {
1929        One(FileFormat),
1930        Many(Vec<FileFormat>),
1931    }
1932    match OneOrMany::deserialize(deserializer)? {
1933        OneOrMany::One(f) => Ok(vec![f]),
1934        OneOrMany::Many(v) => Ok(v),
1935    }
1936}
1937
1938impl Default for OutputConfig {
1939    fn default() -> Self {
1940        Self {
1941            mode: OutputMode::FlatFile,
1942            output_directory: PathBuf::from("./output"),
1943            formats: default_formats(),
1944            compression: CompressionConfig::default(),
1945            batch_size: default_batch_size(),
1946            include_acdoca: true,
1947            include_bseg: false,
1948            partition_by_period: true,
1949            partition_by_company: false,
1950            numeric_mode: NumericMode::default(),
1951            export_layout: ExportLayout::default(),
1952            sap: SapExportSettings::default(),
1953            saft: SaftExportSettings::default(),
1954        }
1955    }
1956}
1957
1958/// Numeric serialization mode for JSON decimal fields.
1959#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
1960#[serde(rename_all = "snake_case")]
1961pub enum NumericMode {
1962    /// Decimals as JSON strings (e.g. `"1729237.30"`). Preserves full precision.
1963    #[default]
1964    String,
1965    /// Decimals as JSON numbers (e.g. `1729237.30`). Friendlier for pandas/analytics.
1966    Native,
1967}
1968
1969/// JSON export layout for nested structures (journal entries, document flows).
1970#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
1971#[serde(rename_all = "snake_case")]
1972pub enum ExportLayout {
1973    /// Nested structure: `{"header": {...}, "lines": [...]}`. Natural ERP format.
1974    #[default]
1975    Nested,
1976    /// Flat structure: header fields repeated on every line. Analytics-friendly.
1977    Flat,
1978}
1979
1980/// Output mode.
1981#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
1982#[serde(rename_all = "snake_case")]
1983pub enum OutputMode {
1984    /// Stream records as generated
1985    Streaming,
1986    /// Write to flat files
1987    #[default]
1988    FlatFile,
1989    /// Both streaming and flat file
1990    Both,
1991}
1992
1993/// Supported file formats.
1994#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1995#[serde(rename_all = "snake_case")]
1996pub enum FileFormat {
1997    Csv,
1998    Parquet,
1999    Json,
2000    JsonLines,
2001}
2002
2003/// Compression configuration.
2004#[derive(Debug, Clone, Serialize, Deserialize)]
2005pub struct CompressionConfig {
2006    /// Enable compression
2007    #[serde(default = "default_true")]
2008    pub enabled: bool,
2009    /// Compression algorithm
2010    #[serde(default)]
2011    pub algorithm: CompressionAlgorithm,
2012    /// Compression level (1-9)
2013    #[serde(default = "default_compression_level")]
2014    pub level: u8,
2015}
2016
2017fn default_compression_level() -> u8 {
2018    3
2019}
2020
2021impl Default for CompressionConfig {
2022    fn default() -> Self {
2023        Self {
2024            enabled: true,
2025            algorithm: CompressionAlgorithm::default(),
2026            level: default_compression_level(),
2027        }
2028    }
2029}
2030
2031/// Compression algorithms.
2032#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2033#[serde(rename_all = "snake_case")]
2034pub enum CompressionAlgorithm {
2035    Gzip,
2036    #[default]
2037    Zstd,
2038    Lz4,
2039    Snappy,
2040}
2041
2042/// Fraud simulation configuration.
2043///
2044/// ## Document-level vs. line-level fraud
2045///
2046/// `fraud_rate` applies to individual journal-entry lines (line-level).
2047/// `document_fraud_rate` (optional) applies to source documents
2048/// (purchase orders, vendor invoices, customer invoices, payments), and when
2049/// `propagate_to_lines` is true, every JE derived from a fraudulent document
2050/// also gets `is_fraud = true`. This lets users express either:
2051///
2052///  * pure line-level fraud (`document_fraud_rate = None`): legacy behaviour;
2053///  * pure document-level fraud (`fraud_rate ≈ 0` and `document_fraud_rate` set):
2054///    fraud rings expressed at document granularity — realistic for PO/invoice
2055///    fraud schemes where one fraudulent document spawns multiple derived JEs;
2056///  * hybrid (both set): document-level scheme fraud plus unrelated line-level
2057///    slip-ups.
2058///
2059/// `propagate_to_document` does the inverse: when a JE is tagged as fraud by
2060/// the anomaly injector, its source document is also marked fraudulent.
2061#[derive(Debug, Clone, Serialize, Deserialize)]
2062pub struct FraudConfig {
2063    /// Enable fraud scenario generation
2064    #[serde(default)]
2065    pub enabled: bool,
2066    /// Line-level fraud rate: fraction of individual JE lines flagged as fraud (0.0 to 1.0).
2067    ///
2068    /// # Effective line-level prevalence
2069    ///
2070    /// If `document_fraud_rate = Some(d)` and `propagate_to_lines = true`,
2071    /// the observed line-level fraud prevalence is roughly:
2072    ///
2073    /// > `P(line is_fraud) ≈ fraud_rate + d × avg_lines_per_fraud_doc / total_lines`
2074    ///
2075    /// For a typical retail job (avg 3 lines per document, ~30 % of lines
2076    /// come from doc-flow-derived JEs) the combined rate lands near:
2077    ///
2078    /// > `fraud_rate + 0.3 × d`
2079    ///
2080    /// so setting `fraud_rate=0.02, document_fraud_rate=0.05, propagate_to_lines=true`
2081    /// produces ~3.5 % line-level fraud, not 2 %. To target a specific
2082    /// line-level prevalence X, choose `fraud_rate = X - 0.3 × d`.
2083    #[serde(default = "default_fraud_rate", alias = "fraudRate")]
2084    pub fraud_rate: f64,
2085    /// Document-level fraud rate: fraction of source documents (PO, vendor
2086    /// invoice, customer invoice, payment) flagged as fraud. `None` disables
2087    /// document-level injection; `Some(r)` marks ~r × document-count as fraud
2088    /// independently of the line-level rate.
2089    ///
2090    /// v4.4.2+ default: `Some(0.01)` — the SDK team reported
2091    /// `is_fraud_propagated: 0/72` regressed from `12/33` in 3.1.1 because
2092    /// the default had silently become None. A 1% document-fraud default
2093    /// restores the propagation signal (~0.3% of JE headers carry
2094    /// `is_fraud_propagated = true`) without meaningfully changing the
2095    /// line-level fraud prevalence. Set to `Some(0.0)` or `null` in your
2096    /// YAML to explicitly disable document-level injection.
2097    #[serde(default = "default_document_fraud_rate", alias = "documentFraudRate")]
2098    pub document_fraud_rate: Option<f64>,
2099    /// When true, flagging a document as fraudulent cascades `is_fraud = true`
2100    /// and `fraud_type` to every journal entry derived from that document,
2101    /// and records `fraud_source_document_id` on the JE header.
2102    /// Default: `true`.
2103    #[serde(default = "default_true", alias = "propagateToLines")]
2104    pub propagate_to_lines: bool,
2105    /// When true, tagging a JE as fraud via line-level anomaly injection also
2106    /// marks the JE's source document as fraudulent (if it can be resolved).
2107    /// Default: `true`.
2108    #[serde(default = "default_true", alias = "propagateToDocument")]
2109    pub propagate_to_document: bool,
2110    /// Fraud type distribution
2111    #[serde(default)]
2112    pub fraud_type_distribution: FraudTypeDistribution,
2113    /// Enable fraud clustering
2114    #[serde(default)]
2115    pub clustering_enabled: bool,
2116    /// Clustering factor
2117    #[serde(default = "default_clustering_factor")]
2118    pub clustering_factor: f64,
2119    /// Approval thresholds for threshold-adjacent fraud pattern
2120    #[serde(default = "default_approval_thresholds")]
2121    pub approval_thresholds: Vec<f64>,
2122}
2123
2124fn default_approval_thresholds() -> Vec<f64> {
2125    vec![1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0]
2126}
2127
2128fn default_fraud_rate() -> f64 {
2129    0.005
2130}
2131fn default_document_fraud_rate() -> Option<f64> {
2132    // v5.0.1: bumped 0.01 → 0.05 to deliver meaningful scheme-level
2133    // fraud propagation at typical line-level rates. The 1 % default
2134    // (set in v4.4.2 to restore `is_fraud_propagated > 0`) was too
2135    // conservative — at `fraud_rate = 0.08` it produced ~3.6 % observed
2136    // propagation against a 26.7 % target. The new 5 % default + the
2137    // additive formula `P(line is_fraud) ≈ fraud_rate + 0.3 × d` yields
2138    // ~9.5 % combined at fraud_rate=0.08 (closer to the spec target).
2139    // Set explicitly to `Some(0.0)` or `null` in YAML to disable, or to
2140    // a higher value (e.g. 0.20) for scheme-heavy fraud workloads.
2141    Some(0.05)
2142}
2143fn default_clustering_factor() -> f64 {
2144    3.0
2145}
2146
2147impl Default for FraudConfig {
2148    fn default() -> Self {
2149        Self {
2150            enabled: false,
2151            fraud_rate: default_fraud_rate(),
2152            document_fraud_rate: default_document_fraud_rate(),
2153            propagate_to_lines: true,
2154            propagate_to_document: true,
2155            fraud_type_distribution: FraudTypeDistribution::default(),
2156            clustering_enabled: false,
2157            clustering_factor: default_clustering_factor(),
2158            approval_thresholds: default_approval_thresholds(),
2159        }
2160    }
2161}
2162
2163/// Distribution of fraud types.
2164///
2165/// All fields default to `0.0` if absent from the YAML, so partial
2166/// distributions are accepted; the validator (`validate_sum_to_one`)
2167/// then enforces that the populated weights sum to `1.0 ± 0.01`.
2168#[derive(Debug, Clone, Serialize, Deserialize)]
2169#[serde(deny_unknown_fields)]
2170pub struct FraudTypeDistribution {
2171    #[serde(default)]
2172    pub suspense_account_abuse: f64,
2173    #[serde(default)]
2174    pub fictitious_transaction: f64,
2175    #[serde(default)]
2176    pub revenue_manipulation: f64,
2177    #[serde(default)]
2178    pub expense_capitalization: f64,
2179    #[serde(default)]
2180    pub split_transaction: f64,
2181    #[serde(default)]
2182    pub timing_anomaly: f64,
2183    #[serde(default)]
2184    pub unauthorized_access: f64,
2185    #[serde(default)]
2186    pub duplicate_payment: f64,
2187    /// Vendor kickback scheme.
2188    #[serde(default)]
2189    pub kickback_scheme: f64,
2190    /// Round-tripping funds through multiple entities or accounts.
2191    #[serde(default)]
2192    pub round_tripping: f64,
2193    /// Unauthorized customer/vendor discounts (sweethearting, side deals).
2194    #[serde(default)]
2195    pub unauthorized_discount: f64,
2196}
2197
2198impl Default for FraudTypeDistribution {
2199    fn default() -> Self {
2200        // Preserves the pre-extension default sum=1.0 over the original
2201        // eight fields.  The three additional fields (kickback_scheme,
2202        // round_tripping, unauthorized_discount) default to 0.0 so that
2203        // existing fraud packs / templates that explicitly enumerate the
2204        // original eight fields continue to merge to a 1.0 sum without
2205        // modification.  Users who want those fraud types must set them
2206        // explicitly (and rebalance the others).
2207        Self {
2208            suspense_account_abuse: 0.25,
2209            fictitious_transaction: 0.15,
2210            revenue_manipulation: 0.10,
2211            expense_capitalization: 0.10,
2212            split_transaction: 0.15,
2213            timing_anomaly: 0.10,
2214            unauthorized_access: 0.10,
2215            duplicate_payment: 0.05,
2216            kickback_scheme: 0.0,
2217            round_tripping: 0.0,
2218            unauthorized_discount: 0.0,
2219        }
2220    }
2221}
2222
2223/// Internal Controls System (ICS) configuration.
2224#[derive(Debug, Clone, Serialize, Deserialize)]
2225pub struct InternalControlsConfig {
2226    /// Enable internal controls system
2227    #[serde(default)]
2228    pub enabled: bool,
2229    /// Rate at which controls result in exceptions (0.0 - 1.0)
2230    #[serde(default = "default_exception_rate")]
2231    pub exception_rate: f64,
2232    /// Rate at which SoD violations occur (0.0 - 1.0)
2233    #[serde(default = "default_sod_violation_rate")]
2234    pub sod_violation_rate: f64,
2235    /// Export control master data to separate files
2236    #[serde(default = "default_true")]
2237    pub export_control_master_data: bool,
2238    /// SOX materiality threshold for marking transactions as SOX-relevant
2239    #[serde(default = "default_sox_materiality_threshold")]
2240    pub sox_materiality_threshold: f64,
2241    /// Enable COSO 2013 framework integration
2242    #[serde(default = "default_true")]
2243    pub coso_enabled: bool,
2244    /// Include entity-level controls in generation
2245    #[serde(default)]
2246    pub include_entity_level_controls: bool,
2247    /// Target maturity level for controls
2248    /// Valid values: "ad_hoc", "repeatable", "defined", "managed", "optimized", "mixed"
2249    #[serde(default = "default_target_maturity_level")]
2250    pub target_maturity_level: String,
2251}
2252
2253fn default_exception_rate() -> f64 {
2254    0.02
2255}
2256
2257fn default_sod_violation_rate() -> f64 {
2258    0.01
2259}
2260
2261fn default_sox_materiality_threshold() -> f64 {
2262    10000.0
2263}
2264
2265fn default_target_maturity_level() -> String {
2266    "mixed".to_string()
2267}
2268
2269impl Default for InternalControlsConfig {
2270    fn default() -> Self {
2271        Self {
2272            enabled: false,
2273            exception_rate: default_exception_rate(),
2274            sod_violation_rate: default_sod_violation_rate(),
2275            export_control_master_data: true,
2276            sox_materiality_threshold: default_sox_materiality_threshold(),
2277            coso_enabled: true,
2278            include_entity_level_controls: false,
2279            target_maturity_level: default_target_maturity_level(),
2280        }
2281    }
2282}
2283
2284/// Business process configuration.
2285#[derive(Debug, Clone, Serialize, Deserialize)]
2286pub struct BusinessProcessConfig {
2287    /// Order-to-Cash weight
2288    #[serde(default = "default_o2c")]
2289    pub o2c_weight: f64,
2290    /// Procure-to-Pay weight
2291    #[serde(default = "default_p2p")]
2292    pub p2p_weight: f64,
2293    /// Record-to-Report weight
2294    #[serde(default = "default_r2r")]
2295    pub r2r_weight: f64,
2296    /// Hire-to-Retire weight
2297    #[serde(default = "default_h2r")]
2298    pub h2r_weight: f64,
2299    /// Acquire-to-Retire weight
2300    #[serde(default = "default_a2r")]
2301    pub a2r_weight: f64,
2302}
2303
2304fn default_o2c() -> f64 {
2305    0.35
2306}
2307fn default_p2p() -> f64 {
2308    0.30
2309}
2310fn default_r2r() -> f64 {
2311    0.20
2312}
2313fn default_h2r() -> f64 {
2314    0.10
2315}
2316fn default_a2r() -> f64 {
2317    0.05
2318}
2319
2320impl Default for BusinessProcessConfig {
2321    fn default() -> Self {
2322        Self {
2323            o2c_weight: default_o2c(),
2324            p2p_weight: default_p2p(),
2325            r2r_weight: default_r2r(),
2326            h2r_weight: default_h2r(),
2327            a2r_weight: default_a2r(),
2328        }
2329    }
2330}
2331
2332/// User persona configuration.
2333#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2334pub struct UserPersonaConfig {
2335    /// Distribution of user personas
2336    #[serde(default)]
2337    pub persona_distribution: PersonaDistribution,
2338    /// Users per persona type
2339    #[serde(default)]
2340    pub users_per_persona: UsersPerPersona,
2341}
2342
2343/// Distribution of user personas for transaction generation.
2344#[derive(Debug, Clone, Serialize, Deserialize)]
2345pub struct PersonaDistribution {
2346    pub junior_accountant: f64,
2347    pub senior_accountant: f64,
2348    pub controller: f64,
2349    pub manager: f64,
2350    pub automated_system: f64,
2351}
2352
2353impl Default for PersonaDistribution {
2354    fn default() -> Self {
2355        Self {
2356            junior_accountant: 0.15,
2357            senior_accountant: 0.15,
2358            controller: 0.05,
2359            manager: 0.05,
2360            automated_system: 0.60,
2361        }
2362    }
2363}
2364
2365/// Number of users per persona type.
2366#[derive(Debug, Clone, Serialize, Deserialize)]
2367pub struct UsersPerPersona {
2368    pub junior_accountant: usize,
2369    pub senior_accountant: usize,
2370    pub controller: usize,
2371    pub manager: usize,
2372    pub automated_system: usize,
2373}
2374
2375impl Default for UsersPerPersona {
2376    fn default() -> Self {
2377        Self {
2378            junior_accountant: 10,
2379            senior_accountant: 5,
2380            controller: 2,
2381            manager: 3,
2382            automated_system: 20,
2383        }
2384    }
2385}
2386
2387/// Template configuration for realistic data generation.
2388///
2389/// # User-supplied template packs (v3.2.0+)
2390///
2391/// Set `path` to a directory (or single YAML/JSON file) to override or
2392/// extend the embedded default pools for vendor names, customer names,
2393/// material/asset descriptions, audit findings, bank names, and
2394/// department names. When `path` is `None` (the default), generators
2395/// use the compiled-in pools and output is byte-identical to v3.1.2.
2396///
2397/// See `crates/datasynth-core/src/templates/loader.rs::TemplateData`
2398/// for the full YAML schema. Use `datasynth-data templates export` to
2399/// dump the defaults as a starter pack.
2400#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2401pub struct TemplateConfig {
2402    /// Name generation settings
2403    #[serde(default)]
2404    pub names: NameTemplateConfig,
2405    /// Description generation settings
2406    #[serde(default)]
2407    pub descriptions: DescriptionTemplateConfig,
2408    /// Reference number settings
2409    #[serde(default)]
2410    pub references: ReferenceTemplateConfig,
2411    /// Optional path to a user-supplied template file or directory.
2412    /// When set, entries from the file(s) augment or replace the
2413    /// embedded defaults according to `merge_strategy`.
2414    ///
2415    /// `None` (default) = use embedded pools only (byte-identical to v3.1.2).
2416    #[serde(default, alias = "templatesPath")]
2417    pub path: Option<std::path::PathBuf>,
2418    /// How file-based entries combine with embedded defaults.
2419    ///
2420    /// - `extend` (default): append file entries to embedded pools,
2421    ///   de-duplicating. Safe for incremental overlays.
2422    /// - `replace`: discard embedded pools entirely and use only file
2423    ///   entries. Requires a fully-populated template file.
2424    /// - `merge_prefer_file`: replace individual categories when present
2425    ///   in the file; keep embedded for absent categories.
2426    #[serde(default, alias = "mergeStrategy")]
2427    pub merge_strategy: TemplateMergeStrategy,
2428}
2429
2430/// Strategy for combining user-supplied template files with embedded defaults.
2431#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2432#[serde(rename_all = "snake_case")]
2433pub enum TemplateMergeStrategy {
2434    /// Append file entries to embedded pools (default).
2435    #[default]
2436    Extend,
2437    /// Replace embedded pools entirely with file entries.
2438    Replace,
2439    /// Replace individual categories when present in file; keep embedded for absent ones.
2440    MergePreferFile,
2441}
2442
2443/// Name template configuration.
2444#[derive(Debug, Clone, Serialize, Deserialize)]
2445pub struct NameTemplateConfig {
2446    /// Distribution of name cultures
2447    #[serde(default)]
2448    pub culture_distribution: CultureDistribution,
2449    /// Email domain for generated users
2450    #[serde(default = "default_email_domain")]
2451    pub email_domain: String,
2452    /// Generate realistic display names
2453    #[serde(default = "default_true")]
2454    pub generate_realistic_names: bool,
2455}
2456
2457fn default_email_domain() -> String {
2458    "company.com".to_string()
2459}
2460
2461impl Default for NameTemplateConfig {
2462    fn default() -> Self {
2463        Self {
2464            culture_distribution: CultureDistribution::default(),
2465            email_domain: default_email_domain(),
2466            generate_realistic_names: true,
2467        }
2468    }
2469}
2470
2471/// Distribution of name cultures for generation.
2472#[derive(Debug, Clone, Serialize, Deserialize)]
2473pub struct CultureDistribution {
2474    pub western_us: f64,
2475    pub hispanic: f64,
2476    pub german: f64,
2477    pub french: f64,
2478    pub chinese: f64,
2479    pub japanese: f64,
2480    pub indian: f64,
2481}
2482
2483impl Default for CultureDistribution {
2484    fn default() -> Self {
2485        Self {
2486            western_us: 0.40,
2487            hispanic: 0.20,
2488            german: 0.10,
2489            french: 0.05,
2490            chinese: 0.10,
2491            japanese: 0.05,
2492            indian: 0.10,
2493        }
2494    }
2495}
2496
2497/// Description template configuration.
2498#[derive(Debug, Clone, Serialize, Deserialize)]
2499pub struct DescriptionTemplateConfig {
2500    /// Generate header text for journal entries
2501    #[serde(default = "default_true")]
2502    pub generate_header_text: bool,
2503    /// Generate line text for journal entry lines
2504    #[serde(default = "default_true")]
2505    pub generate_line_text: bool,
2506}
2507
2508impl Default for DescriptionTemplateConfig {
2509    fn default() -> Self {
2510        Self {
2511            generate_header_text: true,
2512            generate_line_text: true,
2513        }
2514    }
2515}
2516
2517/// Reference number template configuration.
2518#[derive(Debug, Clone, Serialize, Deserialize)]
2519pub struct ReferenceTemplateConfig {
2520    /// Generate reference numbers
2521    #[serde(default = "default_true")]
2522    pub generate_references: bool,
2523    /// Invoice prefix
2524    #[serde(default = "default_invoice_prefix")]
2525    pub invoice_prefix: String,
2526    /// Purchase order prefix
2527    #[serde(default = "default_po_prefix")]
2528    pub po_prefix: String,
2529    /// Sales order prefix
2530    #[serde(default = "default_so_prefix")]
2531    pub so_prefix: String,
2532}
2533
2534fn default_invoice_prefix() -> String {
2535    "INV".to_string()
2536}
2537fn default_po_prefix() -> String {
2538    "PO".to_string()
2539}
2540fn default_so_prefix() -> String {
2541    "SO".to_string()
2542}
2543
2544impl Default for ReferenceTemplateConfig {
2545    fn default() -> Self {
2546        Self {
2547            generate_references: true,
2548            invoice_prefix: default_invoice_prefix(),
2549            po_prefix: default_po_prefix(),
2550            so_prefix: default_so_prefix(),
2551        }
2552    }
2553}
2554
2555/// Approval workflow configuration.
2556#[derive(Debug, Clone, Serialize, Deserialize)]
2557pub struct ApprovalConfig {
2558    /// Enable approval workflow generation
2559    #[serde(default)]
2560    pub enabled: bool,
2561    /// Threshold below which transactions are auto-approved
2562    #[serde(default = "default_auto_approve_threshold")]
2563    pub auto_approve_threshold: f64,
2564    /// Rate at which approvals are rejected (0.0 to 1.0)
2565    #[serde(default = "default_rejection_rate")]
2566    pub rejection_rate: f64,
2567    /// Rate at which approvals require revision (0.0 to 1.0)
2568    #[serde(default = "default_revision_rate")]
2569    pub revision_rate: f64,
2570    /// Average delay in hours for approval processing
2571    #[serde(default = "default_approval_delay_hours")]
2572    pub average_approval_delay_hours: f64,
2573    /// Approval chain thresholds
2574    #[serde(default)]
2575    pub thresholds: Vec<ApprovalThresholdConfig>,
2576}
2577
2578fn default_auto_approve_threshold() -> f64 {
2579    1000.0
2580}
2581fn default_rejection_rate() -> f64 {
2582    0.02
2583}
2584fn default_revision_rate() -> f64 {
2585    0.05
2586}
2587fn default_approval_delay_hours() -> f64 {
2588    4.0
2589}
2590
2591impl Default for ApprovalConfig {
2592    fn default() -> Self {
2593        Self {
2594            enabled: false,
2595            auto_approve_threshold: default_auto_approve_threshold(),
2596            rejection_rate: default_rejection_rate(),
2597            revision_rate: default_revision_rate(),
2598            average_approval_delay_hours: default_approval_delay_hours(),
2599            thresholds: vec![
2600                ApprovalThresholdConfig {
2601                    amount: 1000.0,
2602                    level: 1,
2603                    roles: vec!["senior_accountant".to_string()],
2604                },
2605                ApprovalThresholdConfig {
2606                    amount: 10000.0,
2607                    level: 2,
2608                    roles: vec!["senior_accountant".to_string(), "controller".to_string()],
2609                },
2610                ApprovalThresholdConfig {
2611                    amount: 100000.0,
2612                    level: 3,
2613                    roles: vec![
2614                        "senior_accountant".to_string(),
2615                        "controller".to_string(),
2616                        "manager".to_string(),
2617                    ],
2618                },
2619                ApprovalThresholdConfig {
2620                    amount: 500000.0,
2621                    level: 4,
2622                    roles: vec![
2623                        "senior_accountant".to_string(),
2624                        "controller".to_string(),
2625                        "manager".to_string(),
2626                        "executive".to_string(),
2627                    ],
2628                },
2629            ],
2630        }
2631    }
2632}
2633
2634/// Configuration for a single approval threshold.
2635#[derive(Debug, Clone, Serialize, Deserialize)]
2636pub struct ApprovalThresholdConfig {
2637    /// Amount threshold
2638    pub amount: f64,
2639    /// Approval level required
2640    pub level: u8,
2641    /// Roles that can approve at this level
2642    pub roles: Vec<String>,
2643}
2644
2645/// Department configuration.
2646#[derive(Debug, Clone, Serialize, Deserialize)]
2647pub struct DepartmentConfig {
2648    /// Enable department assignment
2649    #[serde(default)]
2650    pub enabled: bool,
2651    /// Multiplier for department headcounts
2652    #[serde(default = "default_headcount_multiplier")]
2653    pub headcount_multiplier: f64,
2654    /// Custom department definitions (optional)
2655    #[serde(default)]
2656    pub custom_departments: Vec<CustomDepartmentConfig>,
2657}
2658
2659fn default_headcount_multiplier() -> f64 {
2660    1.0
2661}
2662
2663impl Default for DepartmentConfig {
2664    fn default() -> Self {
2665        Self {
2666            enabled: false,
2667            headcount_multiplier: default_headcount_multiplier(),
2668            custom_departments: Vec::new(),
2669        }
2670    }
2671}
2672
2673/// Custom department definition.
2674#[derive(Debug, Clone, Serialize, Deserialize)]
2675pub struct CustomDepartmentConfig {
2676    /// Department code
2677    pub code: String,
2678    /// Department name
2679    pub name: String,
2680    /// Associated cost center
2681    #[serde(default)]
2682    pub cost_center: Option<String>,
2683    /// Primary business processes
2684    #[serde(default)]
2685    pub primary_processes: Vec<String>,
2686    /// Parent department code
2687    #[serde(default)]
2688    pub parent_code: Option<String>,
2689}
2690
2691// ============================================================================
2692// Master Data Configuration
2693// ============================================================================
2694
2695/// Master data generation configuration.
2696#[derive(Debug, Clone, Default, Serialize, Deserialize)]
2697pub struct MasterDataConfig {
2698    /// Vendor master data settings
2699    #[serde(default)]
2700    pub vendors: VendorMasterConfig,
2701    /// Customer master data settings
2702    #[serde(default)]
2703    pub customers: CustomerMasterConfig,
2704    /// Material master data settings
2705    #[serde(default)]
2706    pub materials: MaterialMasterConfig,
2707    /// Fixed asset master data settings
2708    #[serde(default)]
2709    pub fixed_assets: FixedAssetMasterConfig,
2710    /// Employee master data settings
2711    #[serde(default)]
2712    pub employees: EmployeeMasterConfig,
2713    /// Cost center master data settings
2714    #[serde(default)]
2715    pub cost_centers: CostCenterMasterConfig,
2716}
2717
2718/// Vendor master data configuration.
2719#[derive(Debug, Clone, Serialize, Deserialize)]
2720pub struct VendorMasterConfig {
2721    /// Number of vendors to generate
2722    #[serde(default = "default_vendor_count")]
2723    pub count: usize,
2724    /// Percentage of vendors that are intercompany (0.0 to 1.0)
2725    #[serde(default = "default_intercompany_percent")]
2726    pub intercompany_percent: f64,
2727    /// Payment terms distribution
2728    #[serde(default)]
2729    pub payment_terms_distribution: PaymentTermsDistribution,
2730    /// Vendor behavior distribution
2731    #[serde(default)]
2732    pub behavior_distribution: VendorBehaviorDistribution,
2733    /// Generate bank account details
2734    #[serde(default = "default_true")]
2735    pub generate_bank_accounts: bool,
2736    /// Generate tax IDs
2737    #[serde(default = "default_true")]
2738    pub generate_tax_ids: bool,
2739}
2740
2741fn default_vendor_count() -> usize {
2742    500
2743}
2744
2745fn default_intercompany_percent() -> f64 {
2746    0.05
2747}
2748
2749impl Default for VendorMasterConfig {
2750    fn default() -> Self {
2751        Self {
2752            count: default_vendor_count(),
2753            intercompany_percent: default_intercompany_percent(),
2754            payment_terms_distribution: PaymentTermsDistribution::default(),
2755            behavior_distribution: VendorBehaviorDistribution::default(),
2756            generate_bank_accounts: true,
2757            generate_tax_ids: true,
2758        }
2759    }
2760}
2761
2762/// Payment terms distribution for vendors.
2763#[derive(Debug, Clone, Serialize, Deserialize)]
2764pub struct PaymentTermsDistribution {
2765    /// Net 30 days
2766    pub net_30: f64,
2767    /// Net 60 days
2768    pub net_60: f64,
2769    /// Net 90 days
2770    pub net_90: f64,
2771    /// 2% 10 Net 30 (early payment discount)
2772    pub two_ten_net_30: f64,
2773    /// Due on receipt
2774    pub due_on_receipt: f64,
2775    /// End of month
2776    pub end_of_month: f64,
2777}
2778
2779impl Default for PaymentTermsDistribution {
2780    fn default() -> Self {
2781        Self {
2782            net_30: 0.40,
2783            net_60: 0.20,
2784            net_90: 0.10,
2785            two_ten_net_30: 0.15,
2786            due_on_receipt: 0.05,
2787            end_of_month: 0.10,
2788        }
2789    }
2790}
2791
2792/// Vendor behavior distribution.
2793///
2794/// All fields default to `0.0` if absent from the YAML, so partial
2795/// distributions are accepted; the validator (`validate_sum_to_one`)
2796/// then enforces that the populated weights sum to `1.0 ± 0.01`.
2797#[derive(Debug, Clone, Serialize, Deserialize)]
2798#[serde(deny_unknown_fields)]
2799pub struct VendorBehaviorDistribution {
2800    /// Reliable vendors (consistent delivery, quality)
2801    #[serde(default)]
2802    pub reliable: f64,
2803    /// Sometimes late vendors
2804    #[serde(default)]
2805    pub sometimes_late: f64,
2806    /// Inconsistent quality vendors
2807    #[serde(default)]
2808    pub inconsistent_quality: f64,
2809    /// Premium vendors (high quality, premium pricing)
2810    #[serde(default)]
2811    pub premium: f64,
2812    /// Budget vendors (lower quality, lower pricing)
2813    #[serde(default)]
2814    pub budget: f64,
2815    /// Erratic vendors (variable behavior, unpredictable performance)
2816    #[serde(default)]
2817    pub erratic: f64,
2818    /// Problematic vendors (frequent issues, high risk for fraud scenarios)
2819    #[serde(default)]
2820    pub problematic: f64,
2821}
2822
2823impl Default for VendorBehaviorDistribution {
2824    fn default() -> Self {
2825        // Preserves the pre-extension default sum=1.0 over the original
2826        // five fields.  `erratic` and `problematic` default to 0.0 so
2827        // that existing configs/packs continue to merge to a 1.0 sum
2828        // without modification.
2829        Self {
2830            reliable: 0.50,
2831            sometimes_late: 0.20,
2832            inconsistent_quality: 0.10,
2833            premium: 0.10,
2834            budget: 0.10,
2835            erratic: 0.0,
2836            problematic: 0.0,
2837        }
2838    }
2839}
2840
2841/// Customer master data configuration.
2842#[derive(Debug, Clone, Serialize, Deserialize)]
2843pub struct CustomerMasterConfig {
2844    /// Number of customers to generate
2845    #[serde(default = "default_customer_count")]
2846    pub count: usize,
2847    /// Percentage of customers that are intercompany (0.0 to 1.0)
2848    #[serde(default = "default_intercompany_percent")]
2849    pub intercompany_percent: f64,
2850    /// Credit rating distribution
2851    #[serde(default)]
2852    pub credit_rating_distribution: CreditRatingDistribution,
2853    /// Payment behavior distribution
2854    #[serde(default)]
2855    pub payment_behavior_distribution: PaymentBehaviorDistribution,
2856    /// Generate credit limits based on rating
2857    #[serde(default = "default_true")]
2858    pub generate_credit_limits: bool,
2859}
2860
2861fn default_customer_count() -> usize {
2862    2000
2863}
2864
2865impl Default for CustomerMasterConfig {
2866    fn default() -> Self {
2867        Self {
2868            count: default_customer_count(),
2869            intercompany_percent: default_intercompany_percent(),
2870            credit_rating_distribution: CreditRatingDistribution::default(),
2871            payment_behavior_distribution: PaymentBehaviorDistribution::default(),
2872            generate_credit_limits: true,
2873        }
2874    }
2875}
2876
2877/// Credit rating distribution for customers.
2878///
2879/// Two parallel vocabularies are accepted:
2880///   * Bond-grade tiers: `aaa`, `aa`, `a`, `bbb`, `bb`, `b`, `below_b`
2881///   * Plain-English tiers: `excellent`, `good`, `fair`, `poor`
2882///
2883/// All fields default to `0.0` if absent; mix and match as needed.
2884/// The validator enforces that the populated weights sum to `1.0`.
2885#[derive(Debug, Clone, Serialize, Deserialize)]
2886#[serde(deny_unknown_fields)]
2887pub struct CreditRatingDistribution {
2888    /// AAA rating
2889    #[serde(default)]
2890    pub aaa: f64,
2891    /// AA rating
2892    #[serde(default)]
2893    pub aa: f64,
2894    /// A rating
2895    #[serde(default)]
2896    pub a: f64,
2897    /// BBB rating
2898    #[serde(default)]
2899    pub bbb: f64,
2900    /// BB rating
2901    #[serde(default)]
2902    pub bb: f64,
2903    /// B rating
2904    #[serde(default)]
2905    pub b: f64,
2906    /// Below B rating
2907    #[serde(default)]
2908    pub below_b: f64,
2909    /// Plain-English: excellent credit (≈ AAA/AA tier)
2910    #[serde(default)]
2911    pub excellent: f64,
2912    /// Plain-English: good credit (≈ A tier)
2913    #[serde(default)]
2914    pub good: f64,
2915    /// Plain-English: fair credit (≈ BBB/BB tier)
2916    #[serde(default)]
2917    pub fair: f64,
2918    /// Plain-English: poor credit (≈ B/below tier)
2919    #[serde(default)]
2920    pub poor: f64,
2921}
2922
2923impl Default for CreditRatingDistribution {
2924    fn default() -> Self {
2925        Self {
2926            aaa: 0.05,
2927            aa: 0.10,
2928            a: 0.20,
2929            bbb: 0.30,
2930            bb: 0.20,
2931            b: 0.10,
2932            below_b: 0.05,
2933            excellent: 0.0,
2934            good: 0.0,
2935            fair: 0.0,
2936            poor: 0.0,
2937        }
2938    }
2939}
2940
2941/// Payment behavior distribution for customers.
2942///
2943/// All fields default to `0.0` if absent from the YAML.  Validator
2944/// enforces that populated weights sum to `1.0 ± 0.01`.
2945#[derive(Debug, Clone, Serialize, Deserialize)]
2946#[serde(deny_unknown_fields)]
2947pub struct PaymentBehaviorDistribution {
2948    /// Always pays early
2949    #[serde(default)]
2950    pub early_payer: f64,
2951    /// Pays on time
2952    #[serde(default)]
2953    pub on_time: f64,
2954    /// Occasionally late
2955    #[serde(default)]
2956    pub occasional_late: f64,
2957    /// Frequently late
2958    #[serde(default)]
2959    pub frequent_late: f64,
2960    /// Takes early payment discounts
2961    #[serde(default)]
2962    pub discount_taker: f64,
2963}
2964
2965impl Default for PaymentBehaviorDistribution {
2966    fn default() -> Self {
2967        Self {
2968            early_payer: 0.10,
2969            on_time: 0.50,
2970            occasional_late: 0.25,
2971            frequent_late: 0.10,
2972            discount_taker: 0.05,
2973        }
2974    }
2975}
2976
2977/// Material master data configuration.
2978#[derive(Debug, Clone, Serialize, Deserialize)]
2979pub struct MaterialMasterConfig {
2980    /// Number of materials to generate
2981    #[serde(default = "default_material_count")]
2982    pub count: usize,
2983    /// Material type distribution
2984    #[serde(default)]
2985    pub type_distribution: MaterialTypeDistribution,
2986    /// Valuation method distribution
2987    #[serde(default)]
2988    pub valuation_distribution: ValuationMethodDistribution,
2989    /// Percentage of materials with BOM (bill of materials)
2990    #[serde(default = "default_bom_percent")]
2991    pub bom_percent: f64,
2992    /// Maximum BOM depth
2993    #[serde(default = "default_max_bom_depth")]
2994    pub max_bom_depth: u8,
2995}
2996
2997fn default_material_count() -> usize {
2998    5000
2999}
3000
3001fn default_bom_percent() -> f64 {
3002    0.20
3003}
3004
3005fn default_max_bom_depth() -> u8 {
3006    3
3007}
3008
3009impl Default for MaterialMasterConfig {
3010    fn default() -> Self {
3011        Self {
3012            count: default_material_count(),
3013            type_distribution: MaterialTypeDistribution::default(),
3014            valuation_distribution: ValuationMethodDistribution::default(),
3015            bom_percent: default_bom_percent(),
3016            max_bom_depth: default_max_bom_depth(),
3017        }
3018    }
3019}
3020
3021/// Material type distribution.
3022#[derive(Debug, Clone, Serialize, Deserialize)]
3023pub struct MaterialTypeDistribution {
3024    /// Raw materials
3025    pub raw_material: f64,
3026    /// Semi-finished goods
3027    pub semi_finished: f64,
3028    /// Finished goods
3029    pub finished_good: f64,
3030    /// Trading goods (purchased for resale)
3031    pub trading_good: f64,
3032    /// Operating supplies
3033    pub operating_supply: f64,
3034    /// Services
3035    pub service: f64,
3036}
3037
3038impl Default for MaterialTypeDistribution {
3039    fn default() -> Self {
3040        Self {
3041            raw_material: 0.30,
3042            semi_finished: 0.15,
3043            finished_good: 0.25,
3044            trading_good: 0.15,
3045            operating_supply: 0.10,
3046            service: 0.05,
3047        }
3048    }
3049}
3050
3051/// Valuation method distribution for materials.
3052#[derive(Debug, Clone, Serialize, Deserialize)]
3053pub struct ValuationMethodDistribution {
3054    /// Standard cost
3055    pub standard_cost: f64,
3056    /// Moving average
3057    pub moving_average: f64,
3058    /// FIFO (First In, First Out)
3059    pub fifo: f64,
3060    /// LIFO (Last In, First Out)
3061    pub lifo: f64,
3062}
3063
3064impl Default for ValuationMethodDistribution {
3065    fn default() -> Self {
3066        Self {
3067            standard_cost: 0.50,
3068            moving_average: 0.30,
3069            fifo: 0.15,
3070            lifo: 0.05,
3071        }
3072    }
3073}
3074
3075/// Fixed asset master data configuration.
3076#[derive(Debug, Clone, Serialize, Deserialize)]
3077pub struct FixedAssetMasterConfig {
3078    /// Number of fixed assets to generate
3079    #[serde(default = "default_asset_count")]
3080    pub count: usize,
3081    /// Asset class distribution
3082    #[serde(default)]
3083    pub class_distribution: AssetClassDistribution,
3084    /// Depreciation method distribution
3085    #[serde(default)]
3086    pub depreciation_distribution: DepreciationMethodDistribution,
3087    /// Percentage of assets that are fully depreciated
3088    #[serde(default = "default_fully_depreciated_percent")]
3089    pub fully_depreciated_percent: f64,
3090    /// Generate acquisition history
3091    #[serde(default = "default_true")]
3092    pub generate_acquisition_history: bool,
3093}
3094
3095fn default_asset_count() -> usize {
3096    800
3097}
3098
3099fn default_fully_depreciated_percent() -> f64 {
3100    0.15
3101}
3102
3103impl Default for FixedAssetMasterConfig {
3104    fn default() -> Self {
3105        Self {
3106            count: default_asset_count(),
3107            class_distribution: AssetClassDistribution::default(),
3108            depreciation_distribution: DepreciationMethodDistribution::default(),
3109            fully_depreciated_percent: default_fully_depreciated_percent(),
3110            generate_acquisition_history: true,
3111        }
3112    }
3113}
3114
3115/// Asset class distribution.
3116#[derive(Debug, Clone, Serialize, Deserialize)]
3117pub struct AssetClassDistribution {
3118    /// Buildings and structures
3119    pub buildings: f64,
3120    /// Machinery and equipment
3121    pub machinery: f64,
3122    /// Vehicles
3123    pub vehicles: f64,
3124    /// IT equipment
3125    pub it_equipment: f64,
3126    /// Furniture and fixtures
3127    pub furniture: f64,
3128    /// Land (non-depreciable)
3129    pub land: f64,
3130    /// Leasehold improvements
3131    pub leasehold: f64,
3132}
3133
3134impl Default for AssetClassDistribution {
3135    fn default() -> Self {
3136        Self {
3137            buildings: 0.15,
3138            machinery: 0.30,
3139            vehicles: 0.15,
3140            it_equipment: 0.20,
3141            furniture: 0.10,
3142            land: 0.05,
3143            leasehold: 0.05,
3144        }
3145    }
3146}
3147
3148/// Depreciation method distribution.
3149#[derive(Debug, Clone, Serialize, Deserialize)]
3150pub struct DepreciationMethodDistribution {
3151    /// Straight line
3152    pub straight_line: f64,
3153    /// Declining balance
3154    pub declining_balance: f64,
3155    /// Double declining balance
3156    pub double_declining: f64,
3157    /// Sum of years' digits
3158    pub sum_of_years: f64,
3159    /// Units of production
3160    pub units_of_production: f64,
3161}
3162
3163impl Default for DepreciationMethodDistribution {
3164    fn default() -> Self {
3165        Self {
3166            straight_line: 0.60,
3167            declining_balance: 0.20,
3168            double_declining: 0.10,
3169            sum_of_years: 0.05,
3170            units_of_production: 0.05,
3171        }
3172    }
3173}
3174
3175/// Employee master data configuration.
3176#[derive(Debug, Clone, Serialize, Deserialize)]
3177pub struct EmployeeMasterConfig {
3178    /// Number of employees to generate
3179    #[serde(default = "default_employee_count")]
3180    pub count: usize,
3181    /// Generate organizational hierarchy
3182    #[serde(default = "default_true")]
3183    pub generate_hierarchy: bool,
3184    /// Maximum hierarchy depth
3185    #[serde(default = "default_hierarchy_depth")]
3186    pub max_hierarchy_depth: u8,
3187    /// Average span of control (direct reports per manager)
3188    #[serde(default = "default_span_of_control")]
3189    pub average_span_of_control: f64,
3190    /// Approval limit distribution by job level
3191    #[serde(default)]
3192    pub approval_limits: ApprovalLimitDistribution,
3193    /// Department distribution
3194    #[serde(default)]
3195    pub department_distribution: EmployeeDepartmentDistribution,
3196}
3197
3198fn default_employee_count() -> usize {
3199    1500
3200}
3201
3202fn default_hierarchy_depth() -> u8 {
3203    6
3204}
3205
3206fn default_span_of_control() -> f64 {
3207    5.0
3208}
3209
3210impl Default for EmployeeMasterConfig {
3211    fn default() -> Self {
3212        Self {
3213            count: default_employee_count(),
3214            generate_hierarchy: true,
3215            max_hierarchy_depth: default_hierarchy_depth(),
3216            average_span_of_control: default_span_of_control(),
3217            approval_limits: ApprovalLimitDistribution::default(),
3218            department_distribution: EmployeeDepartmentDistribution::default(),
3219        }
3220    }
3221}
3222
3223/// Approval limit distribution by job level.
3224#[derive(Debug, Clone, Serialize, Deserialize)]
3225pub struct ApprovalLimitDistribution {
3226    /// Staff level approval limit
3227    #[serde(default = "default_staff_limit")]
3228    pub staff: f64,
3229    /// Senior staff approval limit
3230    #[serde(default = "default_senior_limit")]
3231    pub senior: f64,
3232    /// Manager approval limit
3233    #[serde(default = "default_manager_limit")]
3234    pub manager: f64,
3235    /// Director approval limit
3236    #[serde(default = "default_director_limit")]
3237    pub director: f64,
3238    /// VP approval limit
3239    #[serde(default = "default_vp_limit")]
3240    pub vp: f64,
3241    /// Executive approval limit
3242    #[serde(default = "default_executive_limit")]
3243    pub executive: f64,
3244}
3245
3246fn default_staff_limit() -> f64 {
3247    1000.0
3248}
3249fn default_senior_limit() -> f64 {
3250    5000.0
3251}
3252fn default_manager_limit() -> f64 {
3253    25000.0
3254}
3255fn default_director_limit() -> f64 {
3256    100000.0
3257}
3258fn default_vp_limit() -> f64 {
3259    500000.0
3260}
3261fn default_executive_limit() -> f64 {
3262    f64::INFINITY
3263}
3264
3265impl Default for ApprovalLimitDistribution {
3266    fn default() -> Self {
3267        Self {
3268            staff: default_staff_limit(),
3269            senior: default_senior_limit(),
3270            manager: default_manager_limit(),
3271            director: default_director_limit(),
3272            vp: default_vp_limit(),
3273            executive: default_executive_limit(),
3274        }
3275    }
3276}
3277
3278/// Employee distribution across departments.
3279#[derive(Debug, Clone, Serialize, Deserialize)]
3280pub struct EmployeeDepartmentDistribution {
3281    /// Finance and Accounting
3282    pub finance: f64,
3283    /// Procurement
3284    pub procurement: f64,
3285    /// Sales
3286    pub sales: f64,
3287    /// Warehouse and Logistics
3288    pub warehouse: f64,
3289    /// IT
3290    pub it: f64,
3291    /// Human Resources
3292    pub hr: f64,
3293    /// Operations
3294    pub operations: f64,
3295    /// Executive
3296    pub executive: f64,
3297}
3298
3299impl Default for EmployeeDepartmentDistribution {
3300    fn default() -> Self {
3301        Self {
3302            finance: 0.12,
3303            procurement: 0.10,
3304            sales: 0.25,
3305            warehouse: 0.15,
3306            it: 0.10,
3307            hr: 0.05,
3308            operations: 0.20,
3309            executive: 0.03,
3310        }
3311    }
3312}
3313
3314/// Cost center master data configuration.
3315#[derive(Debug, Clone, Serialize, Deserialize)]
3316pub struct CostCenterMasterConfig {
3317    /// Number of cost centers to generate
3318    #[serde(default = "default_cost_center_count")]
3319    pub count: usize,
3320    /// Generate cost center hierarchy
3321    #[serde(default = "default_true")]
3322    pub generate_hierarchy: bool,
3323    /// Maximum hierarchy depth
3324    #[serde(default = "default_cc_hierarchy_depth")]
3325    pub max_hierarchy_depth: u8,
3326}
3327
3328fn default_cost_center_count() -> usize {
3329    50
3330}
3331
3332fn default_cc_hierarchy_depth() -> u8 {
3333    3
3334}
3335
3336impl Default for CostCenterMasterConfig {
3337    fn default() -> Self {
3338        Self {
3339            count: default_cost_center_count(),
3340            generate_hierarchy: true,
3341            max_hierarchy_depth: default_cc_hierarchy_depth(),
3342        }
3343    }
3344}
3345
3346// ============================================================================
3347// Document Flow Configuration
3348// ============================================================================
3349
3350/// Document flow generation configuration.
3351#[derive(Debug, Clone, Serialize, Deserialize)]
3352pub struct DocumentFlowConfig {
3353    /// P2P (Procure-to-Pay) flow configuration
3354    #[serde(default)]
3355    pub p2p: P2PFlowConfig,
3356    /// O2C (Order-to-Cash) flow configuration
3357    #[serde(default)]
3358    pub o2c: O2CFlowConfig,
3359    /// Generate document reference chains
3360    #[serde(default = "default_true")]
3361    pub generate_document_references: bool,
3362    /// Export document flow graph
3363    #[serde(default)]
3364    pub export_flow_graph: bool,
3365}
3366
3367impl Default for DocumentFlowConfig {
3368    fn default() -> Self {
3369        Self {
3370            p2p: P2PFlowConfig::default(),
3371            o2c: O2CFlowConfig::default(),
3372            generate_document_references: true,
3373            export_flow_graph: false,
3374        }
3375    }
3376}
3377
3378/// P2P (Procure-to-Pay) flow configuration.
3379#[derive(Debug, Clone, Serialize, Deserialize)]
3380pub struct P2PFlowConfig {
3381    /// Enable P2P document flow generation
3382    #[serde(default = "default_true")]
3383    pub enabled: bool,
3384    /// Three-way match success rate (PO-GR-Invoice)
3385    #[serde(default = "default_three_way_match_rate")]
3386    pub three_way_match_rate: f64,
3387    /// Rate of partial deliveries
3388    #[serde(default = "default_partial_delivery_rate")]
3389    pub partial_delivery_rate: f64,
3390    /// Rate of price variances between PO and Invoice
3391    #[serde(default = "default_price_variance_rate")]
3392    pub price_variance_rate: f64,
3393    /// Maximum price variance percentage
3394    #[serde(default = "default_max_price_variance")]
3395    pub max_price_variance_percent: f64,
3396    /// Rate of quantity variances between PO/GR and Invoice
3397    #[serde(default = "default_quantity_variance_rate")]
3398    pub quantity_variance_rate: f64,
3399    /// Average days from PO to goods receipt
3400    #[serde(default = "default_po_to_gr_days")]
3401    pub average_po_to_gr_days: u32,
3402    /// Average days from GR to invoice
3403    #[serde(default = "default_gr_to_invoice_days")]
3404    pub average_gr_to_invoice_days: u32,
3405    /// Average days from invoice to payment
3406    #[serde(default = "default_invoice_to_payment_days")]
3407    pub average_invoice_to_payment_days: u32,
3408    /// PO line count distribution
3409    #[serde(default)]
3410    pub line_count_distribution: DocumentLineCountDistribution,
3411    /// Payment behavior configuration
3412    #[serde(default)]
3413    pub payment_behavior: P2PPaymentBehaviorConfig,
3414    /// Rate of over-deliveries (quantity received exceeds PO quantity)
3415    #[serde(default)]
3416    pub over_delivery_rate: Option<f64>,
3417    /// Rate of early payment discounts being taken
3418    #[serde(default)]
3419    pub early_payment_discount_rate: Option<f64>,
3420}
3421
3422fn default_three_way_match_rate() -> f64 {
3423    0.95
3424}
3425
3426fn default_partial_delivery_rate() -> f64 {
3427    0.15
3428}
3429
3430fn default_price_variance_rate() -> f64 {
3431    0.08
3432}
3433
3434fn default_max_price_variance() -> f64 {
3435    0.05
3436}
3437
3438fn default_quantity_variance_rate() -> f64 {
3439    0.05
3440}
3441
3442fn default_po_to_gr_days() -> u32 {
3443    14
3444}
3445
3446fn default_gr_to_invoice_days() -> u32 {
3447    5
3448}
3449
3450fn default_invoice_to_payment_days() -> u32 {
3451    30
3452}
3453
3454impl Default for P2PFlowConfig {
3455    fn default() -> Self {
3456        Self {
3457            enabled: true,
3458            three_way_match_rate: default_three_way_match_rate(),
3459            partial_delivery_rate: default_partial_delivery_rate(),
3460            price_variance_rate: default_price_variance_rate(),
3461            max_price_variance_percent: default_max_price_variance(),
3462            quantity_variance_rate: default_quantity_variance_rate(),
3463            average_po_to_gr_days: default_po_to_gr_days(),
3464            average_gr_to_invoice_days: default_gr_to_invoice_days(),
3465            average_invoice_to_payment_days: default_invoice_to_payment_days(),
3466            line_count_distribution: DocumentLineCountDistribution::default(),
3467            payment_behavior: P2PPaymentBehaviorConfig::default(),
3468            over_delivery_rate: None,
3469            early_payment_discount_rate: None,
3470        }
3471    }
3472}
3473
3474// ============================================================================
3475// P2P Payment Behavior Configuration
3476// ============================================================================
3477
3478/// P2P payment behavior configuration.
3479#[derive(Debug, Clone, Serialize, Deserialize)]
3480pub struct P2PPaymentBehaviorConfig {
3481    /// Rate of late payments (beyond due date)
3482    #[serde(default = "default_p2p_late_payment_rate")]
3483    pub late_payment_rate: f64,
3484    /// Distribution of late payment days
3485    #[serde(default)]
3486    pub late_payment_days_distribution: LatePaymentDaysDistribution,
3487    /// Rate of partial payments
3488    #[serde(default = "default_p2p_partial_payment_rate")]
3489    pub partial_payment_rate: f64,
3490    /// Rate of payment corrections (NSF, chargebacks, reversals)
3491    #[serde(default = "default_p2p_payment_correction_rate")]
3492    pub payment_correction_rate: f64,
3493    /// Average days until partial payment remainder is paid
3494    #[serde(default = "default_p2p_avg_days_until_remainder")]
3495    pub avg_days_until_remainder: u32,
3496}
3497
3498fn default_p2p_late_payment_rate() -> f64 {
3499    0.15
3500}
3501
3502fn default_p2p_partial_payment_rate() -> f64 {
3503    0.05
3504}
3505
3506fn default_p2p_payment_correction_rate() -> f64 {
3507    0.02
3508}
3509
3510fn default_p2p_avg_days_until_remainder() -> u32 {
3511    30
3512}
3513
3514impl Default for P2PPaymentBehaviorConfig {
3515    fn default() -> Self {
3516        Self {
3517            late_payment_rate: default_p2p_late_payment_rate(),
3518            late_payment_days_distribution: LatePaymentDaysDistribution::default(),
3519            partial_payment_rate: default_p2p_partial_payment_rate(),
3520            payment_correction_rate: default_p2p_payment_correction_rate(),
3521            avg_days_until_remainder: default_p2p_avg_days_until_remainder(),
3522        }
3523    }
3524}
3525
3526/// Distribution of late payment days for P2P.
3527#[derive(Debug, Clone, Serialize, Deserialize)]
3528pub struct LatePaymentDaysDistribution {
3529    /// 1-7 days late (slightly late)
3530    #[serde(default = "default_slightly_late")]
3531    pub slightly_late_1_to_7: f64,
3532    /// 8-14 days late
3533    #[serde(default = "default_late_8_14")]
3534    pub late_8_to_14: f64,
3535    /// 15-30 days late (very late)
3536    #[serde(default = "default_very_late")]
3537    pub very_late_15_to_30: f64,
3538    /// 31-60 days late (severely late)
3539    #[serde(default = "default_severely_late")]
3540    pub severely_late_31_to_60: f64,
3541    /// Over 60 days late (extremely late)
3542    #[serde(default = "default_extremely_late")]
3543    pub extremely_late_over_60: f64,
3544}
3545
3546fn default_slightly_late() -> f64 {
3547    0.50
3548}
3549
3550fn default_late_8_14() -> f64 {
3551    0.25
3552}
3553
3554fn default_very_late() -> f64 {
3555    0.15
3556}
3557
3558fn default_severely_late() -> f64 {
3559    0.07
3560}
3561
3562fn default_extremely_late() -> f64 {
3563    0.03
3564}
3565
3566impl Default for LatePaymentDaysDistribution {
3567    fn default() -> Self {
3568        Self {
3569            slightly_late_1_to_7: default_slightly_late(),
3570            late_8_to_14: default_late_8_14(),
3571            very_late_15_to_30: default_very_late(),
3572            severely_late_31_to_60: default_severely_late(),
3573            extremely_late_over_60: default_extremely_late(),
3574        }
3575    }
3576}
3577
3578/// O2C (Order-to-Cash) flow configuration.
3579#[derive(Debug, Clone, Serialize, Deserialize)]
3580pub struct O2CFlowConfig {
3581    /// Enable O2C document flow generation
3582    #[serde(default = "default_true")]
3583    pub enabled: bool,
3584    /// Credit check failure rate
3585    #[serde(default = "default_credit_check_failure_rate")]
3586    pub credit_check_failure_rate: f64,
3587    /// Rate of partial shipments
3588    #[serde(default = "default_partial_shipment_rate")]
3589    pub partial_shipment_rate: f64,
3590    /// Rate of returns
3591    #[serde(default = "default_return_rate")]
3592    pub return_rate: f64,
3593    /// Bad debt write-off rate
3594    #[serde(default = "default_bad_debt_rate")]
3595    pub bad_debt_rate: f64,
3596    /// Average days from SO to delivery
3597    #[serde(default = "default_so_to_delivery_days")]
3598    pub average_so_to_delivery_days: u32,
3599    /// Average days from delivery to invoice
3600    #[serde(default = "default_delivery_to_invoice_days")]
3601    pub average_delivery_to_invoice_days: u32,
3602    /// Average days from invoice to receipt
3603    #[serde(default = "default_invoice_to_receipt_days")]
3604    pub average_invoice_to_receipt_days: u32,
3605    /// SO line count distribution
3606    #[serde(default)]
3607    pub line_count_distribution: DocumentLineCountDistribution,
3608    /// Cash discount configuration
3609    #[serde(default)]
3610    pub cash_discount: CashDiscountConfig,
3611    /// Payment behavior configuration
3612    #[serde(default)]
3613    pub payment_behavior: O2CPaymentBehaviorConfig,
3614    /// Rate of late payments
3615    #[serde(default)]
3616    pub late_payment_rate: Option<f64>,
3617}
3618
3619fn default_credit_check_failure_rate() -> f64 {
3620    0.02
3621}
3622
3623fn default_partial_shipment_rate() -> f64 {
3624    0.10
3625}
3626
3627fn default_return_rate() -> f64 {
3628    0.03
3629}
3630
3631fn default_bad_debt_rate() -> f64 {
3632    0.01
3633}
3634
3635fn default_so_to_delivery_days() -> u32 {
3636    7
3637}
3638
3639fn default_delivery_to_invoice_days() -> u32 {
3640    1
3641}
3642
3643fn default_invoice_to_receipt_days() -> u32 {
3644    45
3645}
3646
3647impl Default for O2CFlowConfig {
3648    fn default() -> Self {
3649        Self {
3650            enabled: true,
3651            credit_check_failure_rate: default_credit_check_failure_rate(),
3652            partial_shipment_rate: default_partial_shipment_rate(),
3653            return_rate: default_return_rate(),
3654            bad_debt_rate: default_bad_debt_rate(),
3655            average_so_to_delivery_days: default_so_to_delivery_days(),
3656            average_delivery_to_invoice_days: default_delivery_to_invoice_days(),
3657            average_invoice_to_receipt_days: default_invoice_to_receipt_days(),
3658            line_count_distribution: DocumentLineCountDistribution::default(),
3659            cash_discount: CashDiscountConfig::default(),
3660            payment_behavior: O2CPaymentBehaviorConfig::default(),
3661            late_payment_rate: None,
3662        }
3663    }
3664}
3665
3666// ============================================================================
3667// O2C Payment Behavior Configuration
3668// ============================================================================
3669
3670/// O2C payment behavior configuration.
3671#[derive(Debug, Clone, Serialize, Deserialize, Default)]
3672pub struct O2CPaymentBehaviorConfig {
3673    /// Dunning (Mahnung) configuration
3674    #[serde(default)]
3675    pub dunning: DunningConfig,
3676    /// Partial payment configuration
3677    #[serde(default)]
3678    pub partial_payments: PartialPaymentConfig,
3679    /// Short payment configuration (unauthorized deductions)
3680    #[serde(default)]
3681    pub short_payments: ShortPaymentConfig,
3682    /// On-account payment configuration (unapplied payments)
3683    #[serde(default)]
3684    pub on_account_payments: OnAccountPaymentConfig,
3685    /// Payment correction configuration (NSF, chargebacks)
3686    #[serde(default)]
3687    pub payment_corrections: PaymentCorrectionConfig,
3688}
3689
3690/// Dunning (Mahnungen) configuration for AR collections.
3691#[derive(Debug, Clone, Serialize, Deserialize)]
3692pub struct DunningConfig {
3693    /// Enable dunning process
3694    #[serde(default)]
3695    pub enabled: bool,
3696    /// Days overdue for level 1 dunning (1st reminder)
3697    #[serde(default = "default_dunning_level_1_days")]
3698    pub level_1_days_overdue: u32,
3699    /// Days overdue for level 2 dunning (2nd reminder)
3700    #[serde(default = "default_dunning_level_2_days")]
3701    pub level_2_days_overdue: u32,
3702    /// Days overdue for level 3 dunning (final notice)
3703    #[serde(default = "default_dunning_level_3_days")]
3704    pub level_3_days_overdue: u32,
3705    /// Days overdue for collection handover
3706    #[serde(default = "default_collection_days")]
3707    pub collection_days_overdue: u32,
3708    /// Payment rates after each dunning level
3709    #[serde(default)]
3710    pub payment_after_dunning_rates: DunningPaymentRates,
3711    /// Rate of invoices blocked from dunning (disputes)
3712    #[serde(default = "default_dunning_block_rate")]
3713    pub dunning_block_rate: f64,
3714    /// Interest rate per year for overdue amounts
3715    #[serde(default = "default_dunning_interest_rate")]
3716    pub interest_rate_per_year: f64,
3717    /// Fixed dunning charge per letter
3718    #[serde(default = "default_dunning_charge")]
3719    pub dunning_charge: f64,
3720}
3721
3722fn default_dunning_level_1_days() -> u32 {
3723    14
3724}
3725
3726fn default_dunning_level_2_days() -> u32 {
3727    28
3728}
3729
3730fn default_dunning_level_3_days() -> u32 {
3731    42
3732}
3733
3734fn default_collection_days() -> u32 {
3735    60
3736}
3737
3738fn default_dunning_block_rate() -> f64 {
3739    0.05
3740}
3741
3742fn default_dunning_interest_rate() -> f64 {
3743    0.09
3744}
3745
3746fn default_dunning_charge() -> f64 {
3747    25.0
3748}
3749
3750impl Default for DunningConfig {
3751    fn default() -> Self {
3752        Self {
3753            enabled: false,
3754            level_1_days_overdue: default_dunning_level_1_days(),
3755            level_2_days_overdue: default_dunning_level_2_days(),
3756            level_3_days_overdue: default_dunning_level_3_days(),
3757            collection_days_overdue: default_collection_days(),
3758            payment_after_dunning_rates: DunningPaymentRates::default(),
3759            dunning_block_rate: default_dunning_block_rate(),
3760            interest_rate_per_year: default_dunning_interest_rate(),
3761            dunning_charge: default_dunning_charge(),
3762        }
3763    }
3764}
3765
3766/// Payment rates after each dunning level.
3767#[derive(Debug, Clone, Serialize, Deserialize)]
3768pub struct DunningPaymentRates {
3769    /// Rate that pays after level 1 reminder
3770    #[serde(default = "default_after_level_1")]
3771    pub after_level_1: f64,
3772    /// Rate that pays after level 2 reminder
3773    #[serde(default = "default_after_level_2")]
3774    pub after_level_2: f64,
3775    /// Rate that pays after level 3 final notice
3776    #[serde(default = "default_after_level_3")]
3777    pub after_level_3: f64,
3778    /// Rate that pays during collection
3779    #[serde(default = "default_during_collection")]
3780    pub during_collection: f64,
3781    /// Rate that never pays (becomes bad debt)
3782    #[serde(default = "default_never_pay")]
3783    pub never_pay: f64,
3784}
3785
3786fn default_after_level_1() -> f64 {
3787    0.40
3788}
3789
3790fn default_after_level_2() -> f64 {
3791    0.30
3792}
3793
3794fn default_after_level_3() -> f64 {
3795    0.15
3796}
3797
3798fn default_during_collection() -> f64 {
3799    0.05
3800}
3801
3802fn default_never_pay() -> f64 {
3803    0.10
3804}
3805
3806impl Default for DunningPaymentRates {
3807    fn default() -> Self {
3808        Self {
3809            after_level_1: default_after_level_1(),
3810            after_level_2: default_after_level_2(),
3811            after_level_3: default_after_level_3(),
3812            during_collection: default_during_collection(),
3813            never_pay: default_never_pay(),
3814        }
3815    }
3816}
3817
3818/// Partial payment configuration.
3819#[derive(Debug, Clone, Serialize, Deserialize)]
3820pub struct PartialPaymentConfig {
3821    /// Rate of invoices paid partially
3822    #[serde(default = "default_partial_payment_rate")]
3823    pub rate: f64,
3824    /// Distribution of partial payment percentages
3825    #[serde(default)]
3826    pub percentage_distribution: PartialPaymentPercentageDistribution,
3827    /// Average days until remainder is paid
3828    #[serde(default = "default_avg_days_until_remainder")]
3829    pub avg_days_until_remainder: u32,
3830}
3831
3832fn default_partial_payment_rate() -> f64 {
3833    0.08
3834}
3835
3836fn default_avg_days_until_remainder() -> u32 {
3837    30
3838}
3839
3840impl Default for PartialPaymentConfig {
3841    fn default() -> Self {
3842        Self {
3843            rate: default_partial_payment_rate(),
3844            percentage_distribution: PartialPaymentPercentageDistribution::default(),
3845            avg_days_until_remainder: default_avg_days_until_remainder(),
3846        }
3847    }
3848}
3849
3850/// Distribution of partial payment percentages.
3851#[derive(Debug, Clone, Serialize, Deserialize)]
3852pub struct PartialPaymentPercentageDistribution {
3853    /// Pay 25% of invoice
3854    #[serde(default = "default_partial_25")]
3855    pub pay_25_percent: f64,
3856    /// Pay 50% of invoice
3857    #[serde(default = "default_partial_50")]
3858    pub pay_50_percent: f64,
3859    /// Pay 75% of invoice
3860    #[serde(default = "default_partial_75")]
3861    pub pay_75_percent: f64,
3862    /// Pay random percentage
3863    #[serde(default = "default_partial_random")]
3864    pub pay_random_percent: f64,
3865}
3866
3867fn default_partial_25() -> f64 {
3868    0.15
3869}
3870
3871fn default_partial_50() -> f64 {
3872    0.50
3873}
3874
3875fn default_partial_75() -> f64 {
3876    0.25
3877}
3878
3879fn default_partial_random() -> f64 {
3880    0.10
3881}
3882
3883impl Default for PartialPaymentPercentageDistribution {
3884    fn default() -> Self {
3885        Self {
3886            pay_25_percent: default_partial_25(),
3887            pay_50_percent: default_partial_50(),
3888            pay_75_percent: default_partial_75(),
3889            pay_random_percent: default_partial_random(),
3890        }
3891    }
3892}
3893
3894/// Short payment configuration (unauthorized deductions).
3895#[derive(Debug, Clone, Serialize, Deserialize)]
3896pub struct ShortPaymentConfig {
3897    /// Rate of payments that are short
3898    #[serde(default = "default_short_payment_rate")]
3899    pub rate: f64,
3900    /// Distribution of short payment reasons
3901    #[serde(default)]
3902    pub reason_distribution: ShortPaymentReasonDistribution,
3903    /// Maximum percentage that can be short
3904    #[serde(default = "default_max_short_percent")]
3905    pub max_short_percent: f64,
3906}
3907
3908fn default_short_payment_rate() -> f64 {
3909    0.03
3910}
3911
3912fn default_max_short_percent() -> f64 {
3913    0.10
3914}
3915
3916impl Default for ShortPaymentConfig {
3917    fn default() -> Self {
3918        Self {
3919            rate: default_short_payment_rate(),
3920            reason_distribution: ShortPaymentReasonDistribution::default(),
3921            max_short_percent: default_max_short_percent(),
3922        }
3923    }
3924}
3925
3926/// Distribution of short payment reasons.
3927#[derive(Debug, Clone, Serialize, Deserialize)]
3928pub struct ShortPaymentReasonDistribution {
3929    /// Pricing dispute
3930    #[serde(default = "default_pricing_dispute")]
3931    pub pricing_dispute: f64,
3932    /// Quality issue
3933    #[serde(default = "default_quality_issue")]
3934    pub quality_issue: f64,
3935    /// Quantity discrepancy
3936    #[serde(default = "default_quantity_discrepancy")]
3937    pub quantity_discrepancy: f64,
3938    /// Unauthorized deduction
3939    #[serde(default = "default_unauthorized_deduction")]
3940    pub unauthorized_deduction: f64,
3941    /// Early payment discount taken incorrectly
3942    #[serde(default = "default_incorrect_discount")]
3943    pub incorrect_discount: f64,
3944}
3945
3946fn default_pricing_dispute() -> f64 {
3947    0.30
3948}
3949
3950fn default_quality_issue() -> f64 {
3951    0.20
3952}
3953
3954fn default_quantity_discrepancy() -> f64 {
3955    0.20
3956}
3957
3958fn default_unauthorized_deduction() -> f64 {
3959    0.15
3960}
3961
3962fn default_incorrect_discount() -> f64 {
3963    0.15
3964}
3965
3966impl Default for ShortPaymentReasonDistribution {
3967    fn default() -> Self {
3968        Self {
3969            pricing_dispute: default_pricing_dispute(),
3970            quality_issue: default_quality_issue(),
3971            quantity_discrepancy: default_quantity_discrepancy(),
3972            unauthorized_deduction: default_unauthorized_deduction(),
3973            incorrect_discount: default_incorrect_discount(),
3974        }
3975    }
3976}
3977
3978/// On-account payment configuration (unapplied payments).
3979#[derive(Debug, Clone, Serialize, Deserialize)]
3980pub struct OnAccountPaymentConfig {
3981    /// Rate of payments that are on-account (unapplied)
3982    #[serde(default = "default_on_account_rate")]
3983    pub rate: f64,
3984    /// Average days until on-account payments are applied
3985    #[serde(default = "default_avg_days_until_applied")]
3986    pub avg_days_until_applied: u32,
3987}
3988
3989fn default_on_account_rate() -> f64 {
3990    0.02
3991}
3992
3993fn default_avg_days_until_applied() -> u32 {
3994    14
3995}
3996
3997impl Default for OnAccountPaymentConfig {
3998    fn default() -> Self {
3999        Self {
4000            rate: default_on_account_rate(),
4001            avg_days_until_applied: default_avg_days_until_applied(),
4002        }
4003    }
4004}
4005
4006/// Payment correction configuration.
4007#[derive(Debug, Clone, Serialize, Deserialize)]
4008pub struct PaymentCorrectionConfig {
4009    /// Rate of payments requiring correction
4010    #[serde(default = "default_payment_correction_rate")]
4011    pub rate: f64,
4012    /// Distribution of correction types
4013    #[serde(default)]
4014    pub type_distribution: PaymentCorrectionTypeDistribution,
4015}
4016
4017fn default_payment_correction_rate() -> f64 {
4018    0.02
4019}
4020
4021impl Default for PaymentCorrectionConfig {
4022    fn default() -> Self {
4023        Self {
4024            rate: default_payment_correction_rate(),
4025            type_distribution: PaymentCorrectionTypeDistribution::default(),
4026        }
4027    }
4028}
4029
4030/// Distribution of payment correction types.
4031#[derive(Debug, Clone, Serialize, Deserialize)]
4032pub struct PaymentCorrectionTypeDistribution {
4033    /// NSF (Non-sufficient funds) / bounced check
4034    #[serde(default = "default_nsf_rate")]
4035    pub nsf: f64,
4036    /// Chargeback
4037    #[serde(default = "default_chargeback_rate")]
4038    pub chargeback: f64,
4039    /// Wrong amount applied
4040    #[serde(default = "default_wrong_amount_rate")]
4041    pub wrong_amount: f64,
4042    /// Wrong customer applied
4043    #[serde(default = "default_wrong_customer_rate")]
4044    pub wrong_customer: f64,
4045    /// Duplicate payment
4046    #[serde(default = "default_duplicate_payment_rate")]
4047    pub duplicate_payment: f64,
4048}
4049
4050fn default_nsf_rate() -> f64 {
4051    0.30
4052}
4053
4054fn default_chargeback_rate() -> f64 {
4055    0.20
4056}
4057
4058fn default_wrong_amount_rate() -> f64 {
4059    0.20
4060}
4061
4062fn default_wrong_customer_rate() -> f64 {
4063    0.15
4064}
4065
4066fn default_duplicate_payment_rate() -> f64 {
4067    0.15
4068}
4069
4070impl Default for PaymentCorrectionTypeDistribution {
4071    fn default() -> Self {
4072        Self {
4073            nsf: default_nsf_rate(),
4074            chargeback: default_chargeback_rate(),
4075            wrong_amount: default_wrong_amount_rate(),
4076            wrong_customer: default_wrong_customer_rate(),
4077            duplicate_payment: default_duplicate_payment_rate(),
4078        }
4079    }
4080}
4081
4082/// Document line count distribution.
4083#[derive(Debug, Clone, Serialize, Deserialize)]
4084pub struct DocumentLineCountDistribution {
4085    /// Minimum number of lines
4086    #[serde(default = "default_min_lines")]
4087    pub min_lines: u32,
4088    /// Maximum number of lines
4089    #[serde(default = "default_max_lines")]
4090    pub max_lines: u32,
4091    /// Most common line count (mode)
4092    #[serde(default = "default_mode_lines")]
4093    pub mode_lines: u32,
4094}
4095
4096fn default_min_lines() -> u32 {
4097    1
4098}
4099
4100fn default_max_lines() -> u32 {
4101    20
4102}
4103
4104fn default_mode_lines() -> u32 {
4105    3
4106}
4107
4108impl Default for DocumentLineCountDistribution {
4109    fn default() -> Self {
4110        Self {
4111            min_lines: default_min_lines(),
4112            max_lines: default_max_lines(),
4113            mode_lines: default_mode_lines(),
4114        }
4115    }
4116}
4117
4118/// Cash discount configuration.
4119#[derive(Debug, Clone, Serialize, Deserialize)]
4120pub struct CashDiscountConfig {
4121    /// Percentage of invoices eligible for cash discount
4122    #[serde(default = "default_discount_eligible_rate")]
4123    pub eligible_rate: f64,
4124    /// Rate at which customers take the discount
4125    #[serde(default = "default_discount_taken_rate")]
4126    pub taken_rate: f64,
4127    /// Standard discount percentage
4128    #[serde(default = "default_discount_percent")]
4129    pub discount_percent: f64,
4130    /// Days within which discount must be taken
4131    #[serde(default = "default_discount_days")]
4132    pub discount_days: u32,
4133}
4134
4135fn default_discount_eligible_rate() -> f64 {
4136    0.30
4137}
4138
4139fn default_discount_taken_rate() -> f64 {
4140    0.60
4141}
4142
4143fn default_discount_percent() -> f64 {
4144    0.02
4145}
4146
4147fn default_discount_days() -> u32 {
4148    10
4149}
4150
4151impl Default for CashDiscountConfig {
4152    fn default() -> Self {
4153        Self {
4154            eligible_rate: default_discount_eligible_rate(),
4155            taken_rate: default_discount_taken_rate(),
4156            discount_percent: default_discount_percent(),
4157            discount_days: default_discount_days(),
4158        }
4159    }
4160}
4161
4162// ============================================================================
4163// Intercompany Configuration
4164// ============================================================================
4165
4166/// Intercompany transaction configuration.
4167#[derive(Debug, Clone, Serialize, Deserialize)]
4168pub struct IntercompanyConfig {
4169    /// Enable intercompany transaction generation
4170    #[serde(default)]
4171    pub enabled: bool,
4172    /// Rate of transactions that are intercompany
4173    #[serde(default = "default_ic_transaction_rate")]
4174    pub ic_transaction_rate: f64,
4175    /// Transfer pricing method
4176    #[serde(default)]
4177    pub transfer_pricing_method: TransferPricingMethod,
4178    /// Transfer pricing markup percentage (for cost-plus)
4179    #[serde(default = "default_markup_percent")]
4180    pub markup_percent: f64,
4181    /// Generate matched IC pairs (offsetting entries)
4182    #[serde(default = "default_true")]
4183    pub generate_matched_pairs: bool,
4184    /// IC transaction type distribution
4185    #[serde(default)]
4186    pub transaction_type_distribution: ICTransactionTypeDistribution,
4187    /// Generate elimination entries for consolidation
4188    #[serde(default)]
4189    pub generate_eliminations: bool,
4190}
4191
4192fn default_ic_transaction_rate() -> f64 {
4193    0.15
4194}
4195
4196fn default_markup_percent() -> f64 {
4197    0.05
4198}
4199
4200impl Default for IntercompanyConfig {
4201    fn default() -> Self {
4202        Self {
4203            enabled: false,
4204            ic_transaction_rate: default_ic_transaction_rate(),
4205            transfer_pricing_method: TransferPricingMethod::default(),
4206            markup_percent: default_markup_percent(),
4207            generate_matched_pairs: true,
4208            transaction_type_distribution: ICTransactionTypeDistribution::default(),
4209            generate_eliminations: false,
4210        }
4211    }
4212}
4213
4214/// Transfer pricing method.
4215#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
4216#[serde(rename_all = "snake_case")]
4217pub enum TransferPricingMethod {
4218    /// Cost plus a markup
4219    #[default]
4220    CostPlus,
4221    /// Comparable uncontrolled price
4222    ComparableUncontrolled,
4223    /// Resale price method
4224    ResalePrice,
4225    /// Transactional net margin method
4226    TransactionalNetMargin,
4227    /// Profit split method
4228    ProfitSplit,
4229}
4230
4231/// IC transaction type distribution.
4232#[derive(Debug, Clone, Serialize, Deserialize)]
4233pub struct ICTransactionTypeDistribution {
4234    /// Goods sales between entities
4235    pub goods_sale: f64,
4236    /// Services provided
4237    pub service_provided: f64,
4238    /// Intercompany loans
4239    pub loan: f64,
4240    /// Dividends
4241    pub dividend: f64,
4242    /// Management fees
4243    pub management_fee: f64,
4244    /// Royalties
4245    pub royalty: f64,
4246    /// Cost sharing
4247    pub cost_sharing: f64,
4248}
4249
4250impl Default for ICTransactionTypeDistribution {
4251    fn default() -> Self {
4252        Self {
4253            goods_sale: 0.35,
4254            service_provided: 0.20,
4255            loan: 0.10,
4256            dividend: 0.05,
4257            management_fee: 0.15,
4258            royalty: 0.10,
4259            cost_sharing: 0.05,
4260        }
4261    }
4262}
4263
4264// ============================================================================
4265// Balance Configuration
4266// ============================================================================
4267
4268/// Balance and trial balance configuration.
4269#[derive(Debug, Clone, Serialize, Deserialize)]
4270pub struct BalanceConfig {
4271    /// Generate opening balances
4272    #[serde(default)]
4273    pub generate_opening_balances: bool,
4274    /// Generate trial balances
4275    #[serde(default = "default_true")]
4276    pub generate_trial_balances: bool,
4277    /// Target gross margin (for revenue/COGS coherence)
4278    #[serde(default = "default_gross_margin")]
4279    pub target_gross_margin: f64,
4280    /// Target DSO (Days Sales Outstanding)
4281    #[serde(default = "default_dso")]
4282    pub target_dso_days: u32,
4283    /// Target DPO (Days Payable Outstanding)
4284    #[serde(default = "default_dpo")]
4285    pub target_dpo_days: u32,
4286    /// Target current ratio
4287    #[serde(default = "default_current_ratio")]
4288    pub target_current_ratio: f64,
4289    /// Target debt-to-equity ratio
4290    #[serde(default = "default_debt_equity")]
4291    pub target_debt_to_equity: f64,
4292    /// Validate balance sheet equation (A = L + E)
4293    #[serde(default = "default_true")]
4294    pub validate_balance_equation: bool,
4295    /// Reconcile subledgers to GL control accounts
4296    #[serde(default = "default_true")]
4297    pub reconcile_subledgers: bool,
4298}
4299
4300fn default_gross_margin() -> f64 {
4301    0.35
4302}
4303
4304fn default_dso() -> u32 {
4305    45
4306}
4307
4308fn default_dpo() -> u32 {
4309    30
4310}
4311
4312fn default_current_ratio() -> f64 {
4313    1.5
4314}
4315
4316fn default_debt_equity() -> f64 {
4317    0.5
4318}
4319
4320impl Default for BalanceConfig {
4321    fn default() -> Self {
4322        Self {
4323            generate_opening_balances: false,
4324            generate_trial_balances: true,
4325            target_gross_margin: default_gross_margin(),
4326            target_dso_days: default_dso(),
4327            target_dpo_days: default_dpo(),
4328            target_current_ratio: default_current_ratio(),
4329            target_debt_to_equity: default_debt_equity(),
4330            validate_balance_equation: true,
4331            reconcile_subledgers: true,
4332        }
4333    }
4334}
4335
4336// ==========================================================================
4337// OCPM (Object-Centric Process Mining) Configuration
4338// ==========================================================================
4339
4340/// OCPM (Object-Centric Process Mining) configuration.
4341///
4342/// Controls generation of OCEL 2.0 compatible event logs with
4343/// many-to-many event-to-object relationships.
4344#[derive(Debug, Clone, Serialize, Deserialize)]
4345pub struct OcpmConfig {
4346    /// Enable OCPM event log generation
4347    #[serde(default)]
4348    pub enabled: bool,
4349
4350    /// Generate lifecycle events (Start/Complete pairs vs atomic events)
4351    #[serde(default = "default_true")]
4352    pub generate_lifecycle_events: bool,
4353
4354    /// Include object-to-object relationships in output
4355    #[serde(default = "default_true")]
4356    pub include_object_relationships: bool,
4357
4358    /// Compute and export process variants
4359    #[serde(default = "default_true")]
4360    pub compute_variants: bool,
4361
4362    /// Maximum variants to track (0 = unlimited)
4363    #[serde(default)]
4364    pub max_variants: usize,
4365
4366    /// P2P process configuration
4367    #[serde(default)]
4368    pub p2p_process: OcpmProcessConfig,
4369
4370    /// O2C process configuration
4371    #[serde(default)]
4372    pub o2c_process: OcpmProcessConfig,
4373
4374    /// Output format configuration
4375    #[serde(default)]
4376    pub output: OcpmOutputConfig,
4377}
4378
4379impl Default for OcpmConfig {
4380    fn default() -> Self {
4381        Self {
4382            enabled: false,
4383            generate_lifecycle_events: true,
4384            include_object_relationships: true,
4385            compute_variants: true,
4386            max_variants: 0,
4387            p2p_process: OcpmProcessConfig::default(),
4388            o2c_process: OcpmProcessConfig::default(),
4389            output: OcpmOutputConfig::default(),
4390        }
4391    }
4392}
4393
4394/// Process-specific OCPM configuration.
4395#[derive(Debug, Clone, Serialize, Deserialize)]
4396pub struct OcpmProcessConfig {
4397    /// Rework probability (0.0-1.0)
4398    #[serde(default = "default_rework_probability")]
4399    pub rework_probability: f64,
4400
4401    /// Skip step probability (0.0-1.0)
4402    #[serde(default = "default_skip_probability")]
4403    pub skip_step_probability: f64,
4404
4405    /// Out-of-order step probability (0.0-1.0)
4406    #[serde(default = "default_out_of_order_probability")]
4407    pub out_of_order_probability: f64,
4408}
4409
4410// Defaults deliberately produce variant counts and Inductive-Miner fitness
4411// in the range seen in real ERP data (dozens of variants, ~0.7–0.9 fitness).
4412// Lowering them all to 0 yields a single-variant happy-path log.
4413fn default_rework_probability() -> f64 {
4414    0.15
4415}
4416
4417fn default_skip_probability() -> f64 {
4418    0.10
4419}
4420
4421fn default_out_of_order_probability() -> f64 {
4422    0.08
4423}
4424
4425impl Default for OcpmProcessConfig {
4426    fn default() -> Self {
4427        Self {
4428            rework_probability: default_rework_probability(),
4429            skip_step_probability: default_skip_probability(),
4430            out_of_order_probability: default_out_of_order_probability(),
4431        }
4432    }
4433}
4434
4435/// OCPM output format configuration.
4436#[derive(Debug, Clone, Serialize, Deserialize)]
4437pub struct OcpmOutputConfig {
4438    /// Export OCEL 2.0 JSON format
4439    #[serde(default = "default_true")]
4440    pub ocel_json: bool,
4441
4442    /// Export OCEL 2.0 XML format
4443    #[serde(default)]
4444    pub ocel_xml: bool,
4445
4446    /// Export XES 2.0 XML format (IEEE standard for process mining tools)
4447    #[serde(default)]
4448    pub xes: bool,
4449
4450    /// Include lifecycle transitions in XES output (start/complete pairs)
4451    #[serde(default = "default_true")]
4452    pub xes_include_lifecycle: bool,
4453
4454    /// Include resource attributes in XES output
4455    #[serde(default = "default_true")]
4456    pub xes_include_resources: bool,
4457
4458    /// Export flattened CSV for each object type
4459    #[serde(default = "default_true")]
4460    pub flattened_csv: bool,
4461
4462    /// Export event-object relationship table
4463    #[serde(default = "default_true")]
4464    pub event_object_csv: bool,
4465
4466    /// Export object-object relationship table
4467    #[serde(default = "default_true")]
4468    pub object_relationship_csv: bool,
4469
4470    /// Export process variants summary
4471    #[serde(default = "default_true")]
4472    pub variants_csv: bool,
4473
4474    /// Export reference process models (canonical P2P, O2C, R2R)
4475    #[serde(default)]
4476    pub export_reference_models: bool,
4477}
4478
4479impl Default for OcpmOutputConfig {
4480    fn default() -> Self {
4481        Self {
4482            ocel_json: true,
4483            ocel_xml: false,
4484            xes: false,
4485            xes_include_lifecycle: true,
4486            xes_include_resources: true,
4487            flattened_csv: true,
4488            event_object_csv: true,
4489            object_relationship_csv: true,
4490            variants_csv: true,
4491            export_reference_models: false,
4492        }
4493    }
4494}
4495
4496/// Audit engagement and workpaper generation configuration.
4497#[derive(Debug, Clone, Serialize, Deserialize)]
4498pub struct AuditGenerationConfig {
4499    /// Enable audit engagement generation
4500    #[serde(default)]
4501    pub enabled: bool,
4502
4503    /// Gate for workpaper generation (v3.3.2+).
4504    /// When `false`, workpapers and dependent evidence are skipped
4505    /// while engagements / risk assessments / findings still generate.
4506    #[serde(default = "default_true")]
4507    pub generate_workpapers: bool,
4508
4509    /// Engagement type distribution (v3.3.2+). Drives per-engagement
4510    /// type draw via `AuditEngagementGenerator::draw_engagement_type`.
4511    #[serde(default)]
4512    pub engagement_types: AuditEngagementTypesConfig,
4513
4514    /// Workpaper configuration (v3.3.2+). `average_per_phase` maps onto
4515    /// `WorkpaperGenerator.workpapers_per_section` as a ±50% band
4516    /// around the average. Sampling / ISA / cross-reference flags are
4517    /// surfaced for downstream formatting overlays.
4518    #[serde(default)]
4519    pub workpapers: WorkpaperConfig,
4520
4521    /// Audit team configuration (v3.3.2+). `min_team_size` /
4522    /// `max_team_size` map directly onto
4523    /// `AuditEngagementGenerator.team_size_range`.
4524    /// `specialist_probability` is reserved for v3.4 (explicit
4525    /// specialist-role support).
4526    #[serde(default)]
4527    pub team: AuditTeamConfig,
4528
4529    /// Review workflow configuration (v3.3.2+).
4530    /// `average_review_delay_days` drives both
4531    /// `first_review_delay_range` and `second_review_delay_range` as
4532    /// a ±1-day band around the average. `rework_probability` and
4533    /// `require_partner_signoff` are reserved for v3.4 workflow
4534    /// modeling.
4535    #[serde(default)]
4536    pub review: ReviewWorkflowConfig,
4537
4538    /// FSM-driven audit generation configuration.
4539    #[serde(default)]
4540    pub fsm: Option<AuditFsmConfig>,
4541
4542    /// v3.3.0: IT general controls (access logs, change management
4543    /// records) emitted alongside audit engagements. Requires both
4544    /// `audit.enabled = true` and `audit.it_controls.enabled = true`
4545    /// to take effect — the latter defaults to `false` so current
4546    /// archives are byte-identical to v3.2.1.
4547    #[serde(default)]
4548    pub it_controls: ItControlsConfig,
4549}
4550
4551/// IT general controls config (v3.3.0+).
4552#[derive(Debug, Clone, Serialize, Deserialize)]
4553pub struct ItControlsConfig {
4554    /// Master switch — when `false`, no access logs or change records
4555    /// are generated.
4556    #[serde(default)]
4557    pub enabled: bool,
4558    /// Number of access-log entries per engagement (approximate — the
4559    /// generator may round or scale based on company size).
4560    #[serde(default = "default_access_log_count")]
4561    pub access_logs_per_engagement: usize,
4562    /// Number of change-management records per engagement.
4563    #[serde(default = "default_change_record_count")]
4564    pub change_records_per_engagement: usize,
4565}
4566
4567fn default_access_log_count() -> usize {
4568    500
4569}
4570fn default_change_record_count() -> usize {
4571    50
4572}
4573
4574impl Default for ItControlsConfig {
4575    fn default() -> Self {
4576        Self {
4577            enabled: false,
4578            access_logs_per_engagement: default_access_log_count(),
4579            change_records_per_engagement: default_change_record_count(),
4580        }
4581    }
4582}
4583
4584impl Default for AuditGenerationConfig {
4585    fn default() -> Self {
4586        Self {
4587            enabled: false,
4588            generate_workpapers: true,
4589            engagement_types: AuditEngagementTypesConfig::default(),
4590            workpapers: WorkpaperConfig::default(),
4591            team: AuditTeamConfig::default(),
4592            review: ReviewWorkflowConfig::default(),
4593            fsm: None,
4594            it_controls: ItControlsConfig::default(),
4595        }
4596    }
4597}
4598
4599/// FSM-driven audit generation configuration.
4600#[derive(Debug, Clone, Serialize, Deserialize)]
4601pub struct AuditFsmConfig {
4602    /// Enable FSM-driven audit generation.
4603    #[serde(default)]
4604    pub enabled: bool,
4605
4606    /// Blueprint source: "builtin:fsa", "builtin:ia", or a file path.
4607    #[serde(default = "default_audit_fsm_blueprint")]
4608    pub blueprint: String,
4609
4610    /// Overlay source: "builtin:default", "builtin:thorough", "builtin:rushed", or a file path.
4611    #[serde(default = "default_audit_fsm_overlay")]
4612    pub overlay: String,
4613
4614    /// Depth level override.
4615    #[serde(default)]
4616    pub depth: Option<String>,
4617
4618    /// Discriminator filter.
4619    #[serde(default)]
4620    pub discriminators: std::collections::HashMap<String, Vec<String>>,
4621
4622    /// Event trail output config.
4623    #[serde(default)]
4624    pub event_trail: AuditEventTrailConfig,
4625
4626    /// RNG seed override.
4627    #[serde(default)]
4628    pub seed: Option<u64>,
4629}
4630
4631impl Default for AuditFsmConfig {
4632    fn default() -> Self {
4633        Self {
4634            enabled: false,
4635            blueprint: default_audit_fsm_blueprint(),
4636            overlay: default_audit_fsm_overlay(),
4637            depth: None,
4638            discriminators: std::collections::HashMap::new(),
4639            event_trail: AuditEventTrailConfig::default(),
4640            seed: None,
4641        }
4642    }
4643}
4644
4645fn default_audit_fsm_blueprint() -> String {
4646    "builtin:fsa".to_string()
4647}
4648
4649fn default_audit_fsm_overlay() -> String {
4650    "builtin:default".to_string()
4651}
4652
4653/// Event trail output configuration for FSM-driven audit generation.
4654#[derive(Debug, Clone, Serialize, Deserialize)]
4655pub struct AuditEventTrailConfig {
4656    /// Emit a flat event log.
4657    #[serde(default = "default_true")]
4658    pub flat_log: bool,
4659    /// Project events to OCEL 2.0 format.
4660    #[serde(default)]
4661    pub ocel_projection: bool,
4662}
4663
4664impl Default for AuditEventTrailConfig {
4665    fn default() -> Self {
4666        Self {
4667            flat_log: true,
4668            ocel_projection: false,
4669        }
4670    }
4671}
4672
4673/// Engagement type distribution configuration.
4674#[derive(Debug, Clone, Serialize, Deserialize)]
4675pub struct AuditEngagementTypesConfig {
4676    /// Financial statement audit probability
4677    #[serde(default = "default_financial_audit_prob")]
4678    pub financial_statement: f64,
4679    /// SOX/ICFR audit probability
4680    #[serde(default = "default_sox_audit_prob")]
4681    pub sox_icfr: f64,
4682    /// Integrated audit probability
4683    #[serde(default = "default_integrated_audit_prob")]
4684    pub integrated: f64,
4685    /// Review engagement probability
4686    #[serde(default = "default_review_prob")]
4687    pub review: f64,
4688    /// Agreed-upon procedures probability
4689    #[serde(default = "default_aup_prob")]
4690    pub agreed_upon_procedures: f64,
4691}
4692
4693fn default_financial_audit_prob() -> f64 {
4694    0.40
4695}
4696fn default_sox_audit_prob() -> f64 {
4697    0.20
4698}
4699fn default_integrated_audit_prob() -> f64 {
4700    0.25
4701}
4702fn default_review_prob() -> f64 {
4703    0.10
4704}
4705fn default_aup_prob() -> f64 {
4706    0.05
4707}
4708
4709impl Default for AuditEngagementTypesConfig {
4710    fn default() -> Self {
4711        Self {
4712            financial_statement: default_financial_audit_prob(),
4713            sox_icfr: default_sox_audit_prob(),
4714            integrated: default_integrated_audit_prob(),
4715            review: default_review_prob(),
4716            agreed_upon_procedures: default_aup_prob(),
4717        }
4718    }
4719}
4720
4721/// Workpaper generation configuration.
4722#[derive(Debug, Clone, Serialize, Deserialize)]
4723pub struct WorkpaperConfig {
4724    /// Average workpapers per engagement phase
4725    #[serde(default = "default_workpapers_per_phase")]
4726    pub average_per_phase: usize,
4727
4728    /// Include ISA compliance references
4729    #[serde(default = "default_true")]
4730    pub include_isa_references: bool,
4731
4732    /// Generate sample details
4733    #[serde(default = "default_true")]
4734    pub include_sample_details: bool,
4735
4736    /// Include cross-references between workpapers
4737    #[serde(default = "default_true")]
4738    pub include_cross_references: bool,
4739
4740    /// Sampling configuration
4741    #[serde(default)]
4742    pub sampling: SamplingConfig,
4743}
4744
4745fn default_workpapers_per_phase() -> usize {
4746    5
4747}
4748
4749impl Default for WorkpaperConfig {
4750    fn default() -> Self {
4751        Self {
4752            average_per_phase: default_workpapers_per_phase(),
4753            include_isa_references: true,
4754            include_sample_details: true,
4755            include_cross_references: true,
4756            sampling: SamplingConfig::default(),
4757        }
4758    }
4759}
4760
4761/// Sampling method configuration.
4762#[derive(Debug, Clone, Serialize, Deserialize)]
4763pub struct SamplingConfig {
4764    /// Statistical sampling rate (0.0-1.0)
4765    #[serde(default = "default_statistical_rate")]
4766    pub statistical_rate: f64,
4767    /// Judgmental sampling rate (0.0-1.0)
4768    #[serde(default = "default_judgmental_rate")]
4769    pub judgmental_rate: f64,
4770    /// Haphazard sampling rate (0.0-1.0)
4771    #[serde(default = "default_haphazard_rate")]
4772    pub haphazard_rate: f64,
4773    /// 100% examination rate (0.0-1.0)
4774    #[serde(default = "default_complete_examination_rate")]
4775    pub complete_examination_rate: f64,
4776}
4777
4778fn default_statistical_rate() -> f64 {
4779    0.40
4780}
4781fn default_judgmental_rate() -> f64 {
4782    0.30
4783}
4784fn default_haphazard_rate() -> f64 {
4785    0.20
4786}
4787fn default_complete_examination_rate() -> f64 {
4788    0.10
4789}
4790
4791impl Default for SamplingConfig {
4792    fn default() -> Self {
4793        Self {
4794            statistical_rate: default_statistical_rate(),
4795            judgmental_rate: default_judgmental_rate(),
4796            haphazard_rate: default_haphazard_rate(),
4797            complete_examination_rate: default_complete_examination_rate(),
4798        }
4799    }
4800}
4801
4802/// Audit team configuration.
4803#[derive(Debug, Clone, Serialize, Deserialize)]
4804pub struct AuditTeamConfig {
4805    /// Minimum team size
4806    #[serde(default = "default_min_team_size")]
4807    pub min_team_size: usize,
4808    /// Maximum team size
4809    #[serde(default = "default_max_team_size")]
4810    pub max_team_size: usize,
4811    /// Probability of having a specialist on the team
4812    #[serde(default = "default_specialist_probability")]
4813    pub specialist_probability: f64,
4814}
4815
4816fn default_min_team_size() -> usize {
4817    3
4818}
4819fn default_max_team_size() -> usize {
4820    8
4821}
4822fn default_specialist_probability() -> f64 {
4823    0.30
4824}
4825
4826impl Default for AuditTeamConfig {
4827    fn default() -> Self {
4828        Self {
4829            min_team_size: default_min_team_size(),
4830            max_team_size: default_max_team_size(),
4831            specialist_probability: default_specialist_probability(),
4832        }
4833    }
4834}
4835
4836/// Review workflow configuration.
4837#[derive(Debug, Clone, Serialize, Deserialize)]
4838pub struct ReviewWorkflowConfig {
4839    /// Average days between preparer completion and first review
4840    #[serde(default = "default_review_delay_days")]
4841    pub average_review_delay_days: u32,
4842    /// Probability of review notes requiring rework
4843    #[serde(default = "default_rework_probability_review")]
4844    pub rework_probability: f64,
4845    /// Require partner sign-off for all workpapers
4846    #[serde(default = "default_true")]
4847    pub require_partner_signoff: bool,
4848}
4849
4850fn default_review_delay_days() -> u32 {
4851    2
4852}
4853fn default_rework_probability_review() -> f64 {
4854    0.15
4855}
4856
4857impl Default for ReviewWorkflowConfig {
4858    fn default() -> Self {
4859        Self {
4860            average_review_delay_days: default_review_delay_days(),
4861            rework_probability: default_rework_probability_review(),
4862            require_partner_signoff: true,
4863        }
4864    }
4865}
4866
4867// =============================================================================
4868// Data Quality Configuration
4869// =============================================================================
4870
4871/// Data quality variation settings for realistic flakiness injection.
4872#[derive(Debug, Clone, Serialize, Deserialize)]
4873pub struct DataQualitySchemaConfig {
4874    /// Enable data quality variations
4875    #[serde(default)]
4876    pub enabled: bool,
4877    /// Preset to use (overrides individual settings if set)
4878    #[serde(default)]
4879    pub preset: DataQualityPreset,
4880    /// Missing value injection settings
4881    #[serde(default)]
4882    pub missing_values: MissingValuesSchemaConfig,
4883    /// Typo injection settings
4884    #[serde(default)]
4885    pub typos: TypoSchemaConfig,
4886    /// Format variation settings
4887    #[serde(default)]
4888    pub format_variations: FormatVariationSchemaConfig,
4889    /// Duplicate injection settings
4890    #[serde(default)]
4891    pub duplicates: DuplicateSchemaConfig,
4892    /// Encoding issue settings
4893    #[serde(default)]
4894    pub encoding_issues: EncodingIssueSchemaConfig,
4895    /// Generate quality issue labels for ML training
4896    #[serde(default)]
4897    pub generate_labels: bool,
4898    /// Per-sink quality profiles (different settings for CSV vs JSON etc.)
4899    #[serde(default)]
4900    pub sink_profiles: SinkQualityProfiles,
4901}
4902
4903impl Default for DataQualitySchemaConfig {
4904    fn default() -> Self {
4905        Self {
4906            enabled: false,
4907            preset: DataQualityPreset::None,
4908            missing_values: MissingValuesSchemaConfig::default(),
4909            typos: TypoSchemaConfig::default(),
4910            format_variations: FormatVariationSchemaConfig::default(),
4911            duplicates: DuplicateSchemaConfig::default(),
4912            encoding_issues: EncodingIssueSchemaConfig::default(),
4913            generate_labels: true,
4914            sink_profiles: SinkQualityProfiles::default(),
4915        }
4916    }
4917}
4918
4919impl DataQualitySchemaConfig {
4920    /// Creates a config for a specific preset profile.
4921    pub fn with_preset(preset: DataQualityPreset) -> Self {
4922        let mut config = Self {
4923            preset,
4924            ..Default::default()
4925        };
4926        config.apply_preset();
4927        config
4928    }
4929
4930    /// Applies the preset settings to the individual configuration fields.
4931    /// Call this after deserializing if preset is not Custom or None.
4932    pub fn apply_preset(&mut self) {
4933        if !self.preset.overrides_settings() {
4934            return;
4935        }
4936
4937        self.enabled = true;
4938
4939        // Missing values
4940        self.missing_values.enabled = self.preset.missing_rate() > 0.0;
4941        self.missing_values.rate = self.preset.missing_rate();
4942
4943        // Typos
4944        self.typos.enabled = self.preset.typo_rate() > 0.0;
4945        self.typos.char_error_rate = self.preset.typo_rate();
4946
4947        // Duplicates
4948        self.duplicates.enabled = self.preset.duplicate_rate() > 0.0;
4949        self.duplicates.exact_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
4950        self.duplicates.near_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
4951        self.duplicates.fuzzy_duplicate_ratio = self.preset.duplicate_rate() * 0.2;
4952
4953        // Format variations
4954        self.format_variations.enabled = self.preset.format_variations_enabled();
4955
4956        // Encoding issues
4957        self.encoding_issues.enabled = self.preset.encoding_issues_enabled();
4958        self.encoding_issues.rate = self.preset.encoding_issue_rate();
4959
4960        // OCR errors for typos in legacy preset
4961        if self.preset.ocr_errors_enabled() {
4962            self.typos.type_weights.ocr_errors = 0.3;
4963        }
4964    }
4965
4966    /// Returns the effective missing value rate (considering preset).
4967    pub fn effective_missing_rate(&self) -> f64 {
4968        if self.preset.overrides_settings() {
4969            self.preset.missing_rate()
4970        } else {
4971            self.missing_values.rate
4972        }
4973    }
4974
4975    /// Returns the effective typo rate (considering preset).
4976    pub fn effective_typo_rate(&self) -> f64 {
4977        if self.preset.overrides_settings() {
4978            self.preset.typo_rate()
4979        } else {
4980            self.typos.char_error_rate
4981        }
4982    }
4983
4984    /// Returns the effective duplicate rate (considering preset).
4985    pub fn effective_duplicate_rate(&self) -> f64 {
4986        if self.preset.overrides_settings() {
4987            self.preset.duplicate_rate()
4988        } else {
4989            self.duplicates.exact_duplicate_ratio
4990                + self.duplicates.near_duplicate_ratio
4991                + self.duplicates.fuzzy_duplicate_ratio
4992        }
4993    }
4994
4995    /// Creates a clean profile config.
4996    pub fn clean() -> Self {
4997        Self::with_preset(DataQualityPreset::Clean)
4998    }
4999
5000    /// Creates a noisy profile config.
5001    pub fn noisy() -> Self {
5002        Self::with_preset(DataQualityPreset::Noisy)
5003    }
5004
5005    /// Creates a legacy profile config.
5006    pub fn legacy() -> Self {
5007        Self::with_preset(DataQualityPreset::Legacy)
5008    }
5009}
5010
5011/// Preset configurations for common data quality scenarios.
5012#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5013#[serde(rename_all = "snake_case")]
5014pub enum DataQualityPreset {
5015    /// No data quality variations (clean data)
5016    #[default]
5017    None,
5018    /// Minimal variations (very clean data with rare issues)
5019    Minimal,
5020    /// Normal variations (realistic enterprise data quality)
5021    Normal,
5022    /// High variations (messy data for stress testing)
5023    High,
5024    /// Custom (use individual settings)
5025    Custom,
5026
5027    // ========================================
5028    // ML-Oriented Profiles (Phase 2.1)
5029    // ========================================
5030    /// Clean profile for ML training - minimal data quality issues
5031    /// Missing: 0.1%, Typos: 0.05%, Duplicates: 0%, Format: None
5032    Clean,
5033    /// Noisy profile simulating typical production data issues
5034    /// Missing: 5%, Typos: 2%, Duplicates: 1%, Format: Medium
5035    Noisy,
5036    /// Legacy profile simulating migrated/OCR'd historical data
5037    /// Missing: 10%, Typos: 5%, Duplicates: 3%, Format: Heavy + OCR
5038    Legacy,
5039}
5040
5041impl DataQualityPreset {
5042    /// Returns the missing value rate for this preset.
5043    pub fn missing_rate(&self) -> f64 {
5044        match self {
5045            DataQualityPreset::None => 0.0,
5046            DataQualityPreset::Minimal => 0.005,
5047            DataQualityPreset::Normal => 0.02,
5048            DataQualityPreset::High => 0.08,
5049            DataQualityPreset::Custom => 0.01, // Use config value
5050            DataQualityPreset::Clean => 0.001,
5051            DataQualityPreset::Noisy => 0.05,
5052            DataQualityPreset::Legacy => 0.10,
5053        }
5054    }
5055
5056    /// Returns the typo rate for this preset.
5057    pub fn typo_rate(&self) -> f64 {
5058        match self {
5059            DataQualityPreset::None => 0.0,
5060            DataQualityPreset::Minimal => 0.0005,
5061            DataQualityPreset::Normal => 0.002,
5062            DataQualityPreset::High => 0.01,
5063            DataQualityPreset::Custom => 0.001, // Use config value
5064            DataQualityPreset::Clean => 0.0005,
5065            DataQualityPreset::Noisy => 0.02,
5066            DataQualityPreset::Legacy => 0.05,
5067        }
5068    }
5069
5070    /// Returns the duplicate rate for this preset.
5071    pub fn duplicate_rate(&self) -> f64 {
5072        match self {
5073            DataQualityPreset::None => 0.0,
5074            DataQualityPreset::Minimal => 0.001,
5075            DataQualityPreset::Normal => 0.005,
5076            DataQualityPreset::High => 0.02,
5077            DataQualityPreset::Custom => 0.0, // Use config value
5078            DataQualityPreset::Clean => 0.0,
5079            DataQualityPreset::Noisy => 0.01,
5080            DataQualityPreset::Legacy => 0.03,
5081        }
5082    }
5083
5084    /// Returns whether format variations are enabled for this preset.
5085    pub fn format_variations_enabled(&self) -> bool {
5086        match self {
5087            DataQualityPreset::None | DataQualityPreset::Clean => false,
5088            DataQualityPreset::Minimal => true,
5089            DataQualityPreset::Normal => true,
5090            DataQualityPreset::High => true,
5091            DataQualityPreset::Custom => true,
5092            DataQualityPreset::Noisy => true,
5093            DataQualityPreset::Legacy => true,
5094        }
5095    }
5096
5097    /// Returns whether OCR-style errors are enabled for this preset.
5098    pub fn ocr_errors_enabled(&self) -> bool {
5099        matches!(self, DataQualityPreset::Legacy | DataQualityPreset::High)
5100    }
5101
5102    /// Returns whether encoding issues are enabled for this preset.
5103    pub fn encoding_issues_enabled(&self) -> bool {
5104        matches!(
5105            self,
5106            DataQualityPreset::Legacy | DataQualityPreset::High | DataQualityPreset::Noisy
5107        )
5108    }
5109
5110    /// Returns the encoding issue rate for this preset.
5111    pub fn encoding_issue_rate(&self) -> f64 {
5112        match self {
5113            DataQualityPreset::None | DataQualityPreset::Clean | DataQualityPreset::Minimal => 0.0,
5114            DataQualityPreset::Normal => 0.002,
5115            DataQualityPreset::High => 0.01,
5116            DataQualityPreset::Custom => 0.0,
5117            DataQualityPreset::Noisy => 0.005,
5118            DataQualityPreset::Legacy => 0.02,
5119        }
5120    }
5121
5122    /// Returns true if this preset overrides individual settings.
5123    pub fn overrides_settings(&self) -> bool {
5124        !matches!(self, DataQualityPreset::Custom | DataQualityPreset::None)
5125    }
5126
5127    /// Returns a human-readable description of this preset.
5128    pub fn description(&self) -> &'static str {
5129        match self {
5130            DataQualityPreset::None => "No data quality issues (pristine data)",
5131            DataQualityPreset::Minimal => "Very rare data quality issues",
5132            DataQualityPreset::Normal => "Realistic enterprise data quality",
5133            DataQualityPreset::High => "Messy data for stress testing",
5134            DataQualityPreset::Custom => "Custom settings from configuration",
5135            DataQualityPreset::Clean => "ML-ready clean data with minimal issues",
5136            DataQualityPreset::Noisy => "Typical production data with moderate issues",
5137            DataQualityPreset::Legacy => "Legacy/migrated data with heavy issues and OCR errors",
5138        }
5139    }
5140}
5141
5142/// Missing value injection configuration.
5143#[derive(Debug, Clone, Serialize, Deserialize)]
5144pub struct MissingValuesSchemaConfig {
5145    /// Enable missing value injection
5146    #[serde(default)]
5147    pub enabled: bool,
5148    /// Global missing rate (0.0 to 1.0)
5149    #[serde(default = "default_missing_rate")]
5150    pub rate: f64,
5151    /// Missing value strategy
5152    #[serde(default)]
5153    pub strategy: MissingValueStrategy,
5154    /// Field-specific rates (field name -> rate)
5155    #[serde(default)]
5156    pub field_rates: std::collections::HashMap<String, f64>,
5157    /// Fields that should never have missing values
5158    #[serde(default)]
5159    pub protected_fields: Vec<String>,
5160}
5161
5162fn default_missing_rate() -> f64 {
5163    0.01
5164}
5165
5166impl Default for MissingValuesSchemaConfig {
5167    fn default() -> Self {
5168        Self {
5169            enabled: false,
5170            rate: default_missing_rate(),
5171            strategy: MissingValueStrategy::Mcar,
5172            field_rates: std::collections::HashMap::new(),
5173            protected_fields: vec![
5174                "document_id".to_string(),
5175                "company_code".to_string(),
5176                "posting_date".to_string(),
5177            ],
5178        }
5179    }
5180}
5181
5182/// Missing value strategy types.
5183#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5184#[serde(rename_all = "snake_case")]
5185pub enum MissingValueStrategy {
5186    /// Missing Completely At Random - equal probability for all values
5187    #[default]
5188    Mcar,
5189    /// Missing At Random - depends on other observed values
5190    Mar,
5191    /// Missing Not At Random - depends on the value itself
5192    Mnar,
5193    /// Systematic - entire field groups missing together
5194    Systematic,
5195}
5196
5197/// Typo injection configuration.
5198#[derive(Debug, Clone, Serialize, Deserialize)]
5199pub struct TypoSchemaConfig {
5200    /// Enable typo injection
5201    #[serde(default)]
5202    pub enabled: bool,
5203    /// Character error rate (per character, not per field)
5204    #[serde(default = "default_typo_rate")]
5205    pub char_error_rate: f64,
5206    /// Typo type weights
5207    #[serde(default)]
5208    pub type_weights: TypoTypeWeights,
5209    /// Fields that should never have typos
5210    #[serde(default)]
5211    pub protected_fields: Vec<String>,
5212}
5213
5214fn default_typo_rate() -> f64 {
5215    0.001
5216}
5217
5218impl Default for TypoSchemaConfig {
5219    fn default() -> Self {
5220        Self {
5221            enabled: false,
5222            char_error_rate: default_typo_rate(),
5223            type_weights: TypoTypeWeights::default(),
5224            protected_fields: vec![
5225                "document_id".to_string(),
5226                "gl_account".to_string(),
5227                "company_code".to_string(),
5228            ],
5229        }
5230    }
5231}
5232
5233/// Weights for different typo types.
5234#[derive(Debug, Clone, Serialize, Deserialize)]
5235pub struct TypoTypeWeights {
5236    /// Keyboard-adjacent substitution (e.g., 'a' -> 's')
5237    #[serde(default = "default_substitution_weight")]
5238    pub substitution: f64,
5239    /// Adjacent character transposition (e.g., 'ab' -> 'ba')
5240    #[serde(default = "default_transposition_weight")]
5241    pub transposition: f64,
5242    /// Character insertion
5243    #[serde(default = "default_insertion_weight")]
5244    pub insertion: f64,
5245    /// Character deletion
5246    #[serde(default = "default_deletion_weight")]
5247    pub deletion: f64,
5248    /// OCR-style errors (e.g., '0' -> 'O')
5249    #[serde(default = "default_ocr_weight")]
5250    pub ocr_errors: f64,
5251    /// Homophone substitution (e.g., 'their' -> 'there')
5252    #[serde(default = "default_homophone_weight")]
5253    pub homophones: f64,
5254}
5255
5256fn default_substitution_weight() -> f64 {
5257    0.35
5258}
5259fn default_transposition_weight() -> f64 {
5260    0.25
5261}
5262fn default_insertion_weight() -> f64 {
5263    0.10
5264}
5265fn default_deletion_weight() -> f64 {
5266    0.15
5267}
5268fn default_ocr_weight() -> f64 {
5269    0.10
5270}
5271fn default_homophone_weight() -> f64 {
5272    0.05
5273}
5274
5275impl Default for TypoTypeWeights {
5276    fn default() -> Self {
5277        Self {
5278            substitution: default_substitution_weight(),
5279            transposition: default_transposition_weight(),
5280            insertion: default_insertion_weight(),
5281            deletion: default_deletion_weight(),
5282            ocr_errors: default_ocr_weight(),
5283            homophones: default_homophone_weight(),
5284        }
5285    }
5286}
5287
5288/// Format variation configuration.
5289#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5290pub struct FormatVariationSchemaConfig {
5291    /// Enable format variations
5292    #[serde(default)]
5293    pub enabled: bool,
5294    /// Date format variation settings
5295    #[serde(default)]
5296    pub dates: DateFormatVariationConfig,
5297    /// Amount format variation settings
5298    #[serde(default)]
5299    pub amounts: AmountFormatVariationConfig,
5300    /// Identifier format variation settings
5301    #[serde(default)]
5302    pub identifiers: IdentifierFormatVariationConfig,
5303}
5304
5305/// Date format variation configuration.
5306#[derive(Debug, Clone, Serialize, Deserialize)]
5307pub struct DateFormatVariationConfig {
5308    /// Enable date format variations
5309    #[serde(default)]
5310    pub enabled: bool,
5311    /// Overall variation rate
5312    #[serde(default = "default_date_variation_rate")]
5313    pub rate: f64,
5314    /// Include ISO format (2024-01-15)
5315    #[serde(default = "default_true")]
5316    pub iso_format: bool,
5317    /// Include US format (01/15/2024)
5318    #[serde(default)]
5319    pub us_format: bool,
5320    /// Include EU format (15.01.2024)
5321    #[serde(default)]
5322    pub eu_format: bool,
5323    /// Include long format (January 15, 2024)
5324    #[serde(default)]
5325    pub long_format: bool,
5326}
5327
5328fn default_date_variation_rate() -> f64 {
5329    0.05
5330}
5331
5332impl Default for DateFormatVariationConfig {
5333    fn default() -> Self {
5334        Self {
5335            enabled: false,
5336            rate: default_date_variation_rate(),
5337            iso_format: true,
5338            us_format: false,
5339            eu_format: false,
5340            long_format: false,
5341        }
5342    }
5343}
5344
5345/// Amount format variation configuration.
5346#[derive(Debug, Clone, Serialize, Deserialize)]
5347pub struct AmountFormatVariationConfig {
5348    /// Enable amount format variations
5349    #[serde(default)]
5350    pub enabled: bool,
5351    /// Overall variation rate
5352    #[serde(default = "default_amount_variation_rate")]
5353    pub rate: f64,
5354    /// Include US comma format (1,234.56)
5355    #[serde(default)]
5356    pub us_comma_format: bool,
5357    /// Include EU format (1.234,56)
5358    #[serde(default)]
5359    pub eu_format: bool,
5360    /// Include currency prefix ($1,234.56)
5361    #[serde(default)]
5362    pub currency_prefix: bool,
5363    /// Include accounting format with parentheses for negatives
5364    #[serde(default)]
5365    pub accounting_format: bool,
5366}
5367
5368fn default_amount_variation_rate() -> f64 {
5369    0.02
5370}
5371
5372impl Default for AmountFormatVariationConfig {
5373    fn default() -> Self {
5374        Self {
5375            enabled: false,
5376            rate: default_amount_variation_rate(),
5377            us_comma_format: false,
5378            eu_format: false,
5379            currency_prefix: false,
5380            accounting_format: false,
5381        }
5382    }
5383}
5384
5385/// Identifier format variation configuration.
5386#[derive(Debug, Clone, Serialize, Deserialize)]
5387pub struct IdentifierFormatVariationConfig {
5388    /// Enable identifier format variations
5389    #[serde(default)]
5390    pub enabled: bool,
5391    /// Overall variation rate
5392    #[serde(default = "default_identifier_variation_rate")]
5393    pub rate: f64,
5394    /// Case variations (uppercase, lowercase, mixed)
5395    #[serde(default)]
5396    pub case_variations: bool,
5397    /// Padding variations (leading zeros)
5398    #[serde(default)]
5399    pub padding_variations: bool,
5400    /// Separator variations (dash vs underscore)
5401    #[serde(default)]
5402    pub separator_variations: bool,
5403}
5404
5405fn default_identifier_variation_rate() -> f64 {
5406    0.02
5407}
5408
5409impl Default for IdentifierFormatVariationConfig {
5410    fn default() -> Self {
5411        Self {
5412            enabled: false,
5413            rate: default_identifier_variation_rate(),
5414            case_variations: false,
5415            padding_variations: false,
5416            separator_variations: false,
5417        }
5418    }
5419}
5420
5421/// Duplicate injection configuration.
5422#[derive(Debug, Clone, Serialize, Deserialize)]
5423pub struct DuplicateSchemaConfig {
5424    /// Enable duplicate injection
5425    #[serde(default)]
5426    pub enabled: bool,
5427    /// Overall duplicate rate
5428    #[serde(default = "default_duplicate_rate")]
5429    pub rate: f64,
5430    /// Exact duplicate proportion (out of duplicates)
5431    #[serde(default = "default_exact_duplicate_ratio")]
5432    pub exact_duplicate_ratio: f64,
5433    /// Near duplicate proportion (slight variations)
5434    #[serde(default = "default_near_duplicate_ratio")]
5435    pub near_duplicate_ratio: f64,
5436    /// Fuzzy duplicate proportion (typos in key fields)
5437    #[serde(default = "default_fuzzy_duplicate_ratio")]
5438    pub fuzzy_duplicate_ratio: f64,
5439    /// Maximum date offset for near/fuzzy duplicates (days)
5440    #[serde(default = "default_max_date_offset")]
5441    pub max_date_offset_days: u32,
5442    /// Maximum amount variance for near duplicates (fraction)
5443    #[serde(default = "default_max_amount_variance")]
5444    pub max_amount_variance: f64,
5445}
5446
5447fn default_duplicate_rate() -> f64 {
5448    0.005
5449}
5450fn default_exact_duplicate_ratio() -> f64 {
5451    0.4
5452}
5453fn default_near_duplicate_ratio() -> f64 {
5454    0.35
5455}
5456fn default_fuzzy_duplicate_ratio() -> f64 {
5457    0.25
5458}
5459fn default_max_date_offset() -> u32 {
5460    3
5461}
5462fn default_max_amount_variance() -> f64 {
5463    0.01
5464}
5465
5466impl Default for DuplicateSchemaConfig {
5467    fn default() -> Self {
5468        Self {
5469            enabled: false,
5470            rate: default_duplicate_rate(),
5471            exact_duplicate_ratio: default_exact_duplicate_ratio(),
5472            near_duplicate_ratio: default_near_duplicate_ratio(),
5473            fuzzy_duplicate_ratio: default_fuzzy_duplicate_ratio(),
5474            max_date_offset_days: default_max_date_offset(),
5475            max_amount_variance: default_max_amount_variance(),
5476        }
5477    }
5478}
5479
5480/// Encoding issue configuration.
5481#[derive(Debug, Clone, Serialize, Deserialize)]
5482pub struct EncodingIssueSchemaConfig {
5483    /// Enable encoding issue injection
5484    #[serde(default)]
5485    pub enabled: bool,
5486    /// Overall encoding issue rate
5487    #[serde(default = "default_encoding_rate")]
5488    pub rate: f64,
5489    /// Include mojibake (UTF-8/Latin-1 confusion)
5490    #[serde(default)]
5491    pub mojibake: bool,
5492    /// Include HTML entity corruption
5493    #[serde(default)]
5494    pub html_entities: bool,
5495    /// Include BOM issues
5496    #[serde(default)]
5497    pub bom_issues: bool,
5498}
5499
5500fn default_encoding_rate() -> f64 {
5501    0.001
5502}
5503
5504impl Default for EncodingIssueSchemaConfig {
5505    fn default() -> Self {
5506        Self {
5507            enabled: false,
5508            rate: default_encoding_rate(),
5509            mojibake: false,
5510            html_entities: false,
5511            bom_issues: false,
5512        }
5513    }
5514}
5515
5516/// Per-sink quality profiles for different output formats.
5517#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5518pub struct SinkQualityProfiles {
5519    /// CSV-specific quality settings
5520    #[serde(default)]
5521    pub csv: Option<SinkQualityOverride>,
5522    /// JSON-specific quality settings
5523    #[serde(default)]
5524    pub json: Option<SinkQualityOverride>,
5525    /// Parquet-specific quality settings
5526    #[serde(default)]
5527    pub parquet: Option<SinkQualityOverride>,
5528}
5529
5530/// Quality setting overrides for a specific sink type.
5531#[derive(Debug, Clone, Serialize, Deserialize)]
5532pub struct SinkQualityOverride {
5533    /// Override enabled state
5534    pub enabled: Option<bool>,
5535    /// Override missing value rate
5536    pub missing_rate: Option<f64>,
5537    /// Override typo rate
5538    pub typo_rate: Option<f64>,
5539    /// Override format variation rate
5540    pub format_variation_rate: Option<f64>,
5541    /// Override duplicate rate
5542    pub duplicate_rate: Option<f64>,
5543}
5544
5545// =============================================================================
5546// Accounting Standards Configuration
5547// =============================================================================
5548
5549/// Accounting standards framework configuration for generating standards-compliant data.
5550///
5551/// Supports US GAAP, IFRS, and French GAAP (PCG) frameworks with specific standards:
5552/// - ASC 606/IFRS 15/PCG: Revenue Recognition
5553/// - ASC 842/IFRS 16/PCG: Leases
5554/// - ASC 820/IFRS 13/PCG: Fair Value Measurement
5555/// - ASC 360/IAS 36/PCG: Impairment
5556#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5557pub struct AccountingStandardsConfig {
5558    /// Enable accounting standards generation
5559    #[serde(default)]
5560    pub enabled: bool,
5561
5562    /// Accounting framework to use.
5563    /// When `None`, the country pack's `accounting.framework` is used as fallback;
5564    /// if that is also absent the orchestrator defaults to US GAAP.
5565    #[serde(default, skip_serializing_if = "Option::is_none")]
5566    pub framework: Option<AccountingFrameworkConfig>,
5567
5568    /// Revenue recognition configuration (ASC 606/IFRS 15)
5569    #[serde(default)]
5570    pub revenue_recognition: RevenueRecognitionConfig,
5571
5572    /// Lease accounting configuration (ASC 842/IFRS 16)
5573    #[serde(default)]
5574    pub leases: LeaseAccountingConfig,
5575
5576    /// Fair value measurement configuration (ASC 820/IFRS 13)
5577    #[serde(default)]
5578    pub fair_value: FairValueConfig,
5579
5580    /// Impairment testing configuration (ASC 360/IAS 36)
5581    #[serde(default)]
5582    pub impairment: ImpairmentConfig,
5583
5584    /// Business combination configuration (IFRS 3 / ASC 805)
5585    #[serde(default)]
5586    pub business_combinations: BusinessCombinationsConfig,
5587
5588    /// Expected Credit Loss configuration (IFRS 9 / ASC 326)
5589    #[serde(default)]
5590    pub expected_credit_loss: EclConfig,
5591
5592    /// Generate framework differences for dual reporting
5593    #[serde(default)]
5594    pub generate_differences: bool,
5595}
5596
5597/// Accounting framework selection.
5598#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5599#[serde(rename_all = "snake_case")]
5600pub enum AccountingFrameworkConfig {
5601    /// US Generally Accepted Accounting Principles
5602    #[default]
5603    UsGaap,
5604    /// International Financial Reporting Standards
5605    Ifrs,
5606    /// Generate data for both frameworks with reconciliation
5607    DualReporting,
5608    /// French GAAP (Plan Comptable Général – PCG)
5609    FrenchGaap,
5610    /// German GAAP (Handelsgesetzbuch – HGB, §238-263)
5611    GermanGaap,
5612}
5613
5614/// Revenue recognition configuration (ASC 606/IFRS 15).
5615#[derive(Debug, Clone, Serialize, Deserialize)]
5616pub struct RevenueRecognitionConfig {
5617    /// Enable revenue recognition generation
5618    #[serde(default)]
5619    pub enabled: bool,
5620
5621    /// Generate customer contracts
5622    #[serde(default = "default_true")]
5623    pub generate_contracts: bool,
5624
5625    /// Average number of performance obligations per contract
5626    #[serde(default = "default_avg_obligations")]
5627    pub avg_obligations_per_contract: f64,
5628
5629    /// Rate of contracts with variable consideration
5630    #[serde(default = "default_variable_consideration_rate")]
5631    pub variable_consideration_rate: f64,
5632
5633    /// Rate of over-time revenue recognition (vs point-in-time)
5634    #[serde(default = "default_over_time_rate")]
5635    pub over_time_recognition_rate: f64,
5636
5637    /// Number of contracts to generate
5638    #[serde(default = "default_contract_count")]
5639    pub contract_count: usize,
5640}
5641
5642fn default_avg_obligations() -> f64 {
5643    2.0
5644}
5645
5646fn default_variable_consideration_rate() -> f64 {
5647    0.15
5648}
5649
5650fn default_over_time_rate() -> f64 {
5651    0.30
5652}
5653
5654fn default_contract_count() -> usize {
5655    100
5656}
5657
5658impl Default for RevenueRecognitionConfig {
5659    fn default() -> Self {
5660        Self {
5661            enabled: false,
5662            generate_contracts: true,
5663            avg_obligations_per_contract: default_avg_obligations(),
5664            variable_consideration_rate: default_variable_consideration_rate(),
5665            over_time_recognition_rate: default_over_time_rate(),
5666            contract_count: default_contract_count(),
5667        }
5668    }
5669}
5670
5671/// Lease accounting configuration (ASC 842/IFRS 16).
5672#[derive(Debug, Clone, Serialize, Deserialize)]
5673pub struct LeaseAccountingConfig {
5674    /// Enable lease accounting generation
5675    #[serde(default)]
5676    pub enabled: bool,
5677
5678    /// Number of leases to generate
5679    #[serde(default = "default_lease_count")]
5680    pub lease_count: usize,
5681
5682    /// Percentage of finance leases (vs operating)
5683    #[serde(default = "default_finance_lease_pct")]
5684    pub finance_lease_percent: f64,
5685
5686    /// Average lease term in months
5687    #[serde(default = "default_avg_lease_term")]
5688    pub avg_lease_term_months: u32,
5689
5690    /// Generate amortization schedules
5691    #[serde(default = "default_true")]
5692    pub generate_amortization: bool,
5693
5694    /// Real estate lease percentage
5695    #[serde(default = "default_real_estate_pct")]
5696    pub real_estate_percent: f64,
5697}
5698
5699fn default_lease_count() -> usize {
5700    50
5701}
5702
5703fn default_finance_lease_pct() -> f64 {
5704    0.30
5705}
5706
5707fn default_avg_lease_term() -> u32 {
5708    60
5709}
5710
5711fn default_real_estate_pct() -> f64 {
5712    0.40
5713}
5714
5715impl Default for LeaseAccountingConfig {
5716    fn default() -> Self {
5717        Self {
5718            enabled: false,
5719            lease_count: default_lease_count(),
5720            finance_lease_percent: default_finance_lease_pct(),
5721            avg_lease_term_months: default_avg_lease_term(),
5722            generate_amortization: true,
5723            real_estate_percent: default_real_estate_pct(),
5724        }
5725    }
5726}
5727
5728/// Fair value measurement configuration (ASC 820/IFRS 13).
5729#[derive(Debug, Clone, Serialize, Deserialize)]
5730pub struct FairValueConfig {
5731    /// Enable fair value measurement generation
5732    #[serde(default)]
5733    pub enabled: bool,
5734
5735    /// Number of fair value measurements to generate
5736    #[serde(default = "default_fv_count")]
5737    pub measurement_count: usize,
5738
5739    /// Level 1 (quoted prices) percentage
5740    #[serde(default = "default_level1_pct")]
5741    pub level1_percent: f64,
5742
5743    /// Level 2 (observable inputs) percentage
5744    #[serde(default = "default_level2_pct")]
5745    pub level2_percent: f64,
5746
5747    /// Level 3 (unobservable inputs) percentage
5748    #[serde(default = "default_level3_pct")]
5749    pub level3_percent: f64,
5750
5751    /// Include sensitivity analysis for Level 3
5752    #[serde(default)]
5753    pub include_sensitivity_analysis: bool,
5754}
5755
5756fn default_fv_count() -> usize {
5757    25
5758}
5759
5760fn default_level1_pct() -> f64 {
5761    0.40
5762}
5763
5764fn default_level2_pct() -> f64 {
5765    0.35
5766}
5767
5768fn default_level3_pct() -> f64 {
5769    0.25
5770}
5771
5772impl Default for FairValueConfig {
5773    fn default() -> Self {
5774        Self {
5775            enabled: false,
5776            measurement_count: default_fv_count(),
5777            level1_percent: default_level1_pct(),
5778            level2_percent: default_level2_pct(),
5779            level3_percent: default_level3_pct(),
5780            include_sensitivity_analysis: false,
5781        }
5782    }
5783}
5784
5785/// Impairment testing configuration (ASC 360/IAS 36).
5786#[derive(Debug, Clone, Serialize, Deserialize)]
5787pub struct ImpairmentConfig {
5788    /// Enable impairment testing generation
5789    #[serde(default)]
5790    pub enabled: bool,
5791
5792    /// Number of impairment tests to generate
5793    #[serde(default = "default_impairment_count")]
5794    pub test_count: usize,
5795
5796    /// Rate of tests resulting in impairment
5797    #[serde(default = "default_impairment_rate")]
5798    pub impairment_rate: f64,
5799
5800    /// Generate cash flow projections
5801    #[serde(default = "default_true")]
5802    pub generate_projections: bool,
5803
5804    /// Include goodwill impairment tests
5805    #[serde(default)]
5806    pub include_goodwill: bool,
5807}
5808
5809fn default_impairment_count() -> usize {
5810    15
5811}
5812
5813fn default_impairment_rate() -> f64 {
5814    0.10
5815}
5816
5817impl Default for ImpairmentConfig {
5818    fn default() -> Self {
5819        Self {
5820            enabled: false,
5821            test_count: default_impairment_count(),
5822            impairment_rate: default_impairment_rate(),
5823            generate_projections: true,
5824            include_goodwill: false,
5825        }
5826    }
5827}
5828
5829// =============================================================================
5830// Business Combinations Configuration (IFRS 3 / ASC 805)
5831// =============================================================================
5832
5833/// Configuration for generating business combination (acquisition) data.
5834#[derive(Debug, Clone, Serialize, Deserialize)]
5835pub struct BusinessCombinationsConfig {
5836    /// Enable business combination generation
5837    #[serde(default)]
5838    pub enabled: bool,
5839
5840    /// Number of acquisitions to generate per company (1-5)
5841    #[serde(default = "default_bc_acquisition_count")]
5842    pub acquisition_count: usize,
5843}
5844
5845fn default_bc_acquisition_count() -> usize {
5846    2
5847}
5848
5849impl Default for BusinessCombinationsConfig {
5850    fn default() -> Self {
5851        Self {
5852            enabled: false,
5853            acquisition_count: default_bc_acquisition_count(),
5854        }
5855    }
5856}
5857
5858// =============================================================================
5859// ECL Configuration (IFRS 9 / ASC 326)
5860// =============================================================================
5861
5862/// Configuration for Expected Credit Loss generation.
5863#[derive(Debug, Clone, Serialize, Deserialize)]
5864pub struct EclConfig {
5865    /// Enable ECL generation.
5866    #[serde(default)]
5867    pub enabled: bool,
5868
5869    /// Weight for base economic scenario (0–1).
5870    #[serde(default = "default_ecl_base_weight")]
5871    pub base_scenario_weight: f64,
5872
5873    /// Multiplier for base scenario (typically 1.0).
5874    #[serde(default = "default_ecl_base_multiplier")]
5875    pub base_scenario_multiplier: f64,
5876
5877    /// Weight for optimistic economic scenario (0–1).
5878    #[serde(default = "default_ecl_optimistic_weight")]
5879    pub optimistic_scenario_weight: f64,
5880
5881    /// Multiplier for optimistic scenario (< 1.0 means lower losses).
5882    #[serde(default = "default_ecl_optimistic_multiplier")]
5883    pub optimistic_scenario_multiplier: f64,
5884
5885    /// Weight for pessimistic economic scenario (0–1).
5886    #[serde(default = "default_ecl_pessimistic_weight")]
5887    pub pessimistic_scenario_weight: f64,
5888
5889    /// Multiplier for pessimistic scenario (> 1.0 means higher losses).
5890    #[serde(default = "default_ecl_pessimistic_multiplier")]
5891    pub pessimistic_scenario_multiplier: f64,
5892}
5893
5894fn default_ecl_base_weight() -> f64 {
5895    0.50
5896}
5897fn default_ecl_base_multiplier() -> f64 {
5898    1.0
5899}
5900fn default_ecl_optimistic_weight() -> f64 {
5901    0.30
5902}
5903fn default_ecl_optimistic_multiplier() -> f64 {
5904    0.8
5905}
5906fn default_ecl_pessimistic_weight() -> f64 {
5907    0.20
5908}
5909fn default_ecl_pessimistic_multiplier() -> f64 {
5910    1.4
5911}
5912
5913impl Default for EclConfig {
5914    fn default() -> Self {
5915        Self {
5916            enabled: false,
5917            base_scenario_weight: default_ecl_base_weight(),
5918            base_scenario_multiplier: default_ecl_base_multiplier(),
5919            optimistic_scenario_weight: default_ecl_optimistic_weight(),
5920            optimistic_scenario_multiplier: default_ecl_optimistic_multiplier(),
5921            pessimistic_scenario_weight: default_ecl_pessimistic_weight(),
5922            pessimistic_scenario_multiplier: default_ecl_pessimistic_multiplier(),
5923        }
5924    }
5925}
5926
5927// =============================================================================
5928// Audit Standards Configuration
5929// =============================================================================
5930
5931/// Audit standards framework configuration for generating standards-compliant audit data.
5932///
5933/// Supports ISA (International Standards on Auditing) and PCAOB standards:
5934/// - ISA 200-720: Complete coverage of audit standards
5935/// - ISA 520: Analytical Procedures
5936/// - ISA 505: External Confirmations
5937/// - ISA 700/705/706/701: Audit Reports
5938/// - PCAOB AS 2201: ICFR Auditing
5939#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5940pub struct AuditStandardsConfig {
5941    /// Enable audit standards generation
5942    #[serde(default)]
5943    pub enabled: bool,
5944
5945    /// ISA compliance configuration
5946    #[serde(default)]
5947    pub isa_compliance: IsaComplianceConfig,
5948
5949    /// Analytical procedures configuration (ISA 520)
5950    #[serde(default)]
5951    pub analytical_procedures: AnalyticalProceduresConfig,
5952
5953    /// External confirmations configuration (ISA 505)
5954    #[serde(default)]
5955    pub confirmations: ConfirmationsConfig,
5956
5957    /// Audit opinion configuration (ISA 700/705/706/701)
5958    #[serde(default)]
5959    pub opinion: AuditOpinionConfig,
5960
5961    /// Generate complete audit trail with traceability
5962    #[serde(default)]
5963    pub generate_audit_trail: bool,
5964
5965    /// SOX 302/404 compliance configuration
5966    #[serde(default)]
5967    pub sox: SoxComplianceConfig,
5968
5969    /// PCAOB-specific configuration
5970    #[serde(default)]
5971    pub pcaob: PcaobConfig,
5972}
5973
5974/// ISA compliance level configuration.
5975#[derive(Debug, Clone, Serialize, Deserialize)]
5976pub struct IsaComplianceConfig {
5977    /// Enable ISA compliance tracking
5978    #[serde(default)]
5979    pub enabled: bool,
5980
5981    /// Compliance level: "basic", "standard", "comprehensive"
5982    #[serde(default = "default_compliance_level")]
5983    pub compliance_level: String,
5984
5985    /// Generate ISA requirement mappings
5986    #[serde(default = "default_true")]
5987    pub generate_isa_mappings: bool,
5988
5989    /// Generate ISA coverage summary
5990    #[serde(default = "default_true")]
5991    pub generate_coverage_summary: bool,
5992
5993    /// Include PCAOB standard mappings (for dual framework)
5994    #[serde(default)]
5995    pub include_pcaob: bool,
5996
5997    /// Framework to use: "isa", "pcaob", "dual"
5998    #[serde(default = "default_audit_framework")]
5999    pub framework: String,
6000}
6001
6002fn default_compliance_level() -> String {
6003    "standard".to_string()
6004}
6005
6006fn default_audit_framework() -> String {
6007    "isa".to_string()
6008}
6009
6010impl Default for IsaComplianceConfig {
6011    fn default() -> Self {
6012        Self {
6013            enabled: false,
6014            compliance_level: default_compliance_level(),
6015            generate_isa_mappings: true,
6016            generate_coverage_summary: true,
6017            include_pcaob: false,
6018            framework: default_audit_framework(),
6019        }
6020    }
6021}
6022
6023/// Analytical procedures configuration (ISA 520).
6024#[derive(Debug, Clone, Serialize, Deserialize)]
6025pub struct AnalyticalProceduresConfig {
6026    /// Enable analytical procedures generation
6027    #[serde(default)]
6028    pub enabled: bool,
6029
6030    /// Number of procedures per account/area
6031    #[serde(default = "default_procedures_per_account")]
6032    pub procedures_per_account: usize,
6033
6034    /// Probability of variance exceeding threshold
6035    #[serde(default = "default_variance_probability")]
6036    pub variance_probability: f64,
6037
6038    /// Include variance investigations
6039    #[serde(default = "default_true")]
6040    pub generate_investigations: bool,
6041
6042    /// Include financial ratio analysis
6043    #[serde(default = "default_true")]
6044    pub include_ratio_analysis: bool,
6045}
6046
6047fn default_procedures_per_account() -> usize {
6048    3
6049}
6050
6051fn default_variance_probability() -> f64 {
6052    0.20
6053}
6054
6055impl Default for AnalyticalProceduresConfig {
6056    fn default() -> Self {
6057        Self {
6058            enabled: false,
6059            procedures_per_account: default_procedures_per_account(),
6060            variance_probability: default_variance_probability(),
6061            generate_investigations: true,
6062            include_ratio_analysis: true,
6063        }
6064    }
6065}
6066
6067/// External confirmations configuration (ISA 505).
6068#[derive(Debug, Clone, Serialize, Deserialize)]
6069pub struct ConfirmationsConfig {
6070    /// Enable confirmation generation
6071    #[serde(default)]
6072    pub enabled: bool,
6073
6074    /// Number of confirmations to generate
6075    #[serde(default = "default_confirmation_count")]
6076    pub confirmation_count: usize,
6077
6078    /// Positive response rate
6079    #[serde(default = "default_positive_response_rate")]
6080    pub positive_response_rate: f64,
6081
6082    /// Exception rate (responses with differences)
6083    #[serde(default = "default_exception_rate_confirm")]
6084    pub exception_rate: f64,
6085
6086    /// Non-response rate
6087    #[serde(default = "default_non_response_rate")]
6088    pub non_response_rate: f64,
6089
6090    /// Generate alternative procedures for non-responses
6091    #[serde(default = "default_true")]
6092    pub generate_alternative_procedures: bool,
6093}
6094
6095fn default_confirmation_count() -> usize {
6096    50
6097}
6098
6099fn default_positive_response_rate() -> f64 {
6100    0.85
6101}
6102
6103fn default_exception_rate_confirm() -> f64 {
6104    0.10
6105}
6106
6107fn default_non_response_rate() -> f64 {
6108    0.05
6109}
6110
6111impl Default for ConfirmationsConfig {
6112    fn default() -> Self {
6113        Self {
6114            enabled: false,
6115            confirmation_count: default_confirmation_count(),
6116            positive_response_rate: default_positive_response_rate(),
6117            exception_rate: default_exception_rate_confirm(),
6118            non_response_rate: default_non_response_rate(),
6119            generate_alternative_procedures: true,
6120        }
6121    }
6122}
6123
6124/// Audit opinion configuration (ISA 700/705/706/701).
6125#[derive(Debug, Clone, Serialize, Deserialize)]
6126pub struct AuditOpinionConfig {
6127    /// Enable audit opinion generation
6128    #[serde(default)]
6129    pub enabled: bool,
6130
6131    /// Generate Key Audit Matters (KAM) / Critical Audit Matters (CAM)
6132    #[serde(default = "default_true")]
6133    pub generate_kam: bool,
6134
6135    /// Average number of KAMs/CAMs per opinion
6136    #[serde(default = "default_kam_count")]
6137    pub average_kam_count: usize,
6138
6139    /// Rate of modified opinions
6140    #[serde(default = "default_modified_opinion_rate")]
6141    pub modified_opinion_rate: f64,
6142
6143    /// Include emphasis of matter paragraphs
6144    #[serde(default)]
6145    pub include_emphasis_of_matter: bool,
6146
6147    /// Include going concern conclusions
6148    #[serde(default = "default_true")]
6149    pub include_going_concern: bool,
6150}
6151
6152fn default_kam_count() -> usize {
6153    3
6154}
6155
6156fn default_modified_opinion_rate() -> f64 {
6157    0.05
6158}
6159
6160impl Default for AuditOpinionConfig {
6161    fn default() -> Self {
6162        Self {
6163            enabled: false,
6164            generate_kam: true,
6165            average_kam_count: default_kam_count(),
6166            modified_opinion_rate: default_modified_opinion_rate(),
6167            include_emphasis_of_matter: false,
6168            include_going_concern: true,
6169        }
6170    }
6171}
6172
6173/// SOX compliance configuration (Sections 302/404).
6174#[derive(Debug, Clone, Serialize, Deserialize)]
6175pub struct SoxComplianceConfig {
6176    /// Enable SOX compliance generation
6177    #[serde(default)]
6178    pub enabled: bool,
6179
6180    /// Generate Section 302 CEO/CFO certifications
6181    #[serde(default = "default_true")]
6182    pub generate_302_certifications: bool,
6183
6184    /// Generate Section 404 ICFR assessments
6185    #[serde(default = "default_true")]
6186    pub generate_404_assessments: bool,
6187
6188    /// Materiality threshold for SOX testing
6189    #[serde(default = "default_sox_materiality_threshold")]
6190    pub materiality_threshold: f64,
6191
6192    /// Rate of material weaknesses
6193    #[serde(default = "default_material_weakness_rate")]
6194    pub material_weakness_rate: f64,
6195
6196    /// Rate of significant deficiencies
6197    #[serde(default = "default_significant_deficiency_rate")]
6198    pub significant_deficiency_rate: f64,
6199}
6200
6201fn default_material_weakness_rate() -> f64 {
6202    0.02
6203}
6204
6205fn default_significant_deficiency_rate() -> f64 {
6206    0.08
6207}
6208
6209impl Default for SoxComplianceConfig {
6210    fn default() -> Self {
6211        Self {
6212            enabled: false,
6213            generate_302_certifications: true,
6214            generate_404_assessments: true,
6215            materiality_threshold: default_sox_materiality_threshold(),
6216            material_weakness_rate: default_material_weakness_rate(),
6217            significant_deficiency_rate: default_significant_deficiency_rate(),
6218        }
6219    }
6220}
6221
6222/// PCAOB-specific configuration.
6223#[derive(Debug, Clone, Serialize, Deserialize)]
6224pub struct PcaobConfig {
6225    /// Enable PCAOB-specific elements
6226    #[serde(default)]
6227    pub enabled: bool,
6228
6229    /// Treat as PCAOB audit (vs ISA-only)
6230    #[serde(default)]
6231    pub is_pcaob_audit: bool,
6232
6233    /// Generate Critical Audit Matters (CAM)
6234    #[serde(default = "default_true")]
6235    pub generate_cam: bool,
6236
6237    /// Include ICFR opinion (for integrated audits)
6238    #[serde(default)]
6239    pub include_icfr_opinion: bool,
6240
6241    /// Generate PCAOB-ISA standard mappings
6242    #[serde(default)]
6243    pub generate_standard_mappings: bool,
6244}
6245
6246impl Default for PcaobConfig {
6247    fn default() -> Self {
6248        Self {
6249            enabled: false,
6250            is_pcaob_audit: false,
6251            generate_cam: true,
6252            include_icfr_opinion: false,
6253            generate_standard_mappings: false,
6254        }
6255    }
6256}
6257
6258// =============================================================================
6259// Advanced Distribution Configuration
6260// =============================================================================
6261
6262/// Advanced distribution configuration for realistic data generation.
6263///
6264/// This section enables sophisticated distribution models including:
6265/// - Mixture models (multi-modal distributions)
6266/// - Cross-field correlations
6267/// - Conditional distributions
6268/// - Regime changes and economic cycles
6269/// - Statistical validation
6270#[derive(Debug, Clone, Serialize, Deserialize, Default)]
6271pub struct AdvancedDistributionConfig {
6272    /// Enable advanced distribution features.
6273    #[serde(default)]
6274    pub enabled: bool,
6275
6276    /// Mixture model configuration for amounts.
6277    #[serde(default)]
6278    pub amounts: MixtureDistributionSchemaConfig,
6279
6280    /// Cross-field correlation configuration.
6281    #[serde(default)]
6282    pub correlations: CorrelationSchemaConfig,
6283
6284    /// Conditional distribution configurations.
6285    #[serde(default)]
6286    pub conditional: Vec<ConditionalDistributionSchemaConfig>,
6287
6288    /// Regime change configuration.
6289    #[serde(default)]
6290    pub regime_changes: RegimeChangeSchemaConfig,
6291
6292    /// Industry-specific distribution profile.
6293    #[serde(default)]
6294    pub industry_profile: Option<IndustryProfileType>,
6295
6296    /// Statistical validation configuration.
6297    #[serde(default)]
6298    pub validation: StatisticalValidationSchemaConfig,
6299
6300    /// v3.4.4+ — Pareto heavy-tailed distribution for monetary amounts.
6301    /// When set and `enabled`, overrides `amounts` mixture model for the
6302    /// non-fraud amount-sampling path (fraud patterns remain orthogonal).
6303    /// Useful for capex, strategic contracts, and any domain where a small
6304    /// number of very large values dominates the tail.
6305    #[serde(default)]
6306    pub pareto: Option<ParetoSchemaConfig>,
6307}
6308
6309/// Schema-level Pareto distribution configuration (v3.4.4+).
6310///
6311/// Thin wrapper around `datasynth_core::distributions::ParetoConfig` that
6312/// adds an `enabled` gate and serde-friendly field names.
6313#[derive(Debug, Clone, Serialize, Deserialize)]
6314pub struct ParetoSchemaConfig {
6315    /// Enable Pareto sampling. When true, replaces the `amounts` mixture
6316    /// model for the non-fraud amount-sampling path.
6317    #[serde(default)]
6318    pub enabled: bool,
6319
6320    /// Shape parameter (tail heaviness). Lower values → heavier tail.
6321    /// Typical range: 1.5-3.0. Default: 2.0.
6322    #[serde(default = "default_pareto_alpha")]
6323    pub alpha: f64,
6324
6325    /// Scale / minimum value. All samples are >= x_min.
6326    /// Typical: 1000 (for capex) to 100,000 (for large contracts). Default: 100.
6327    #[serde(default = "default_pareto_x_min")]
6328    pub x_min: f64,
6329
6330    /// Optional upper clamp. `None` = unbounded (recommended for realistic
6331    /// heavy tails).
6332    #[serde(default)]
6333    pub max_value: Option<f64>,
6334
6335    /// Decimal places for rounding. Default: 2.
6336    #[serde(default = "default_pareto_decimal_places")]
6337    pub decimal_places: u8,
6338}
6339
6340fn default_pareto_alpha() -> f64 {
6341    2.0
6342}
6343
6344fn default_pareto_x_min() -> f64 {
6345    100.0
6346}
6347
6348fn default_pareto_decimal_places() -> u8 {
6349    2
6350}
6351
6352impl Default for ParetoSchemaConfig {
6353    fn default() -> Self {
6354        Self {
6355            enabled: false,
6356            alpha: default_pareto_alpha(),
6357            x_min: default_pareto_x_min(),
6358            max_value: None,
6359            decimal_places: default_pareto_decimal_places(),
6360        }
6361    }
6362}
6363
6364impl ParetoSchemaConfig {
6365    /// Convert this schema config into a `datasynth_core::distributions::ParetoConfig`.
6366    pub fn to_core_config(&self) -> datasynth_core::distributions::ParetoConfig {
6367        datasynth_core::distributions::ParetoConfig {
6368            alpha: self.alpha,
6369            x_min: self.x_min,
6370            max_value: self.max_value,
6371            decimal_places: self.decimal_places,
6372        }
6373    }
6374}
6375
6376/// Industry profile types for pre-configured distribution settings.
6377#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
6378#[serde(rename_all = "snake_case")]
6379pub enum IndustryProfileType {
6380    /// Retail industry profile (POS sales, inventory, seasonal)
6381    Retail,
6382    /// Manufacturing industry profile (raw materials, maintenance, capital)
6383    Manufacturing,
6384    /// Financial services profile (wire transfers, ACH, fee income)
6385    FinancialServices,
6386    /// Healthcare profile (claims, procedures, supplies)
6387    Healthcare,
6388    /// Technology profile (subscriptions, services, R&D)
6389    Technology,
6390}
6391
6392/// Mixture model distribution configuration.
6393#[derive(Debug, Clone, Serialize, Deserialize)]
6394pub struct MixtureDistributionSchemaConfig {
6395    /// Enable mixture model for amount generation.
6396    #[serde(default)]
6397    pub enabled: bool,
6398
6399    /// Distribution type: "gaussian" or "lognormal".
6400    #[serde(default = "default_mixture_type")]
6401    pub distribution_type: MixtureDistributionType,
6402
6403    /// Mixture components with weights.
6404    #[serde(default)]
6405    pub components: Vec<MixtureComponentConfig>,
6406
6407    /// Minimum value constraint.
6408    #[serde(default = "default_min_amount")]
6409    pub min_value: f64,
6410
6411    /// Maximum value constraint (optional).
6412    #[serde(default)]
6413    pub max_value: Option<f64>,
6414
6415    /// Decimal places for rounding.
6416    #[serde(default = "default_decimal_places")]
6417    pub decimal_places: u8,
6418}
6419
6420fn default_mixture_type() -> MixtureDistributionType {
6421    MixtureDistributionType::LogNormal
6422}
6423
6424fn default_min_amount() -> f64 {
6425    0.01
6426}
6427
6428fn default_decimal_places() -> u8 {
6429    2
6430}
6431
6432impl Default for MixtureDistributionSchemaConfig {
6433    fn default() -> Self {
6434        Self {
6435            enabled: false,
6436            distribution_type: MixtureDistributionType::LogNormal,
6437            components: Vec::new(),
6438            min_value: 0.01,
6439            max_value: None,
6440            decimal_places: 2,
6441        }
6442    }
6443}
6444
6445impl MixtureDistributionSchemaConfig {
6446    /// Convert this schema-level config into a [`LogNormalMixtureConfig`]
6447    /// suitable for `LogNormalMixtureSampler::new`. Returns `None` if there
6448    /// are no components (schema default is an empty list, which cannot
6449    /// drive a sampler).
6450    ///
6451    /// Callers should gate this with `self.enabled` before invoking.
6452    pub fn to_log_normal_config(
6453        &self,
6454    ) -> Option<datasynth_core::distributions::LogNormalMixtureConfig> {
6455        if self.components.is_empty() {
6456            return None;
6457        }
6458        Some(datasynth_core::distributions::LogNormalMixtureConfig {
6459            components: self
6460                .components
6461                .iter()
6462                .map(|c| match &c.label {
6463                    Some(lbl) => datasynth_core::distributions::LogNormalComponent::with_label(
6464                        c.weight,
6465                        c.mu,
6466                        c.sigma,
6467                        lbl.clone(),
6468                    ),
6469                    None => datasynth_core::distributions::LogNormalComponent::new(
6470                        c.weight, c.mu, c.sigma,
6471                    ),
6472                })
6473                .collect(),
6474            min_value: self.min_value,
6475            max_value: self.max_value,
6476            decimal_places: self.decimal_places,
6477        })
6478    }
6479
6480    /// Convert this schema-level config into a [`GaussianMixtureConfig`].
6481    /// Returns `None` if there are no components.
6482    pub fn to_gaussian_config(
6483        &self,
6484    ) -> Option<datasynth_core::distributions::GaussianMixtureConfig> {
6485        if self.components.is_empty() {
6486            return None;
6487        }
6488        Some(datasynth_core::distributions::GaussianMixtureConfig {
6489            components: self
6490                .components
6491                .iter()
6492                .map(|c| {
6493                    datasynth_core::distributions::GaussianComponent::new(c.weight, c.mu, c.sigma)
6494                })
6495                .collect(),
6496            allow_negative: true,
6497            min_value: Some(self.min_value),
6498            max_value: self.max_value,
6499        })
6500    }
6501}
6502
6503/// Mixture distribution type.
6504#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
6505#[serde(rename_all = "snake_case")]
6506pub enum MixtureDistributionType {
6507    /// Gaussian (normal) mixture
6508    Gaussian,
6509    /// Log-normal mixture (for positive amounts)
6510    #[default]
6511    LogNormal,
6512}
6513
6514/// Configuration for a single mixture component.
6515#[derive(Debug, Clone, Serialize, Deserialize)]
6516pub struct MixtureComponentConfig {
6517    /// Weight of this component (must sum to 1.0 across all components).
6518    pub weight: f64,
6519
6520    /// Location parameter (mean for Gaussian, mu for log-normal).
6521    pub mu: f64,
6522
6523    /// Scale parameter (std dev for Gaussian, sigma for log-normal).
6524    pub sigma: f64,
6525
6526    /// Optional label for this component (e.g., "routine", "significant", "major").
6527    #[serde(default)]
6528    pub label: Option<String>,
6529}
6530
6531/// Cross-field correlation configuration.
6532#[derive(Debug, Clone, Serialize, Deserialize)]
6533pub struct CorrelationSchemaConfig {
6534    /// Enable correlation modeling.
6535    #[serde(default)]
6536    pub enabled: bool,
6537
6538    /// Copula type for dependency modeling.
6539    #[serde(default)]
6540    pub copula_type: CopulaSchemaType,
6541
6542    /// Field definitions for correlation.
6543    #[serde(default)]
6544    pub fields: Vec<CorrelatedFieldConfig>,
6545
6546    /// Correlation matrix (upper triangular, row-major).
6547    /// For n fields, this should have n*(n-1)/2 values.
6548    #[serde(default)]
6549    pub matrix: Vec<f64>,
6550
6551    /// Expected correlations for validation.
6552    #[serde(default)]
6553    pub expected_correlations: Vec<ExpectedCorrelationConfig>,
6554}
6555
6556impl Default for CorrelationSchemaConfig {
6557    fn default() -> Self {
6558        Self {
6559            enabled: false,
6560            copula_type: CopulaSchemaType::Gaussian,
6561            fields: Vec::new(),
6562            matrix: Vec::new(),
6563            expected_correlations: Vec::new(),
6564        }
6565    }
6566}
6567
6568impl CorrelationSchemaConfig {
6569    /// v3.5.4+: extract the correlation for a specific field pair from
6570    /// either the upper-triangular flat matrix (n*(n-1)/2 values) or a
6571    /// full symmetric n×n matrix (n*n values). Returns `None` when the
6572    /// named fields aren't both present or the matrix shape doesn't
6573    /// match.
6574    pub fn correlation_between(&self, field_a: &str, field_b: &str) -> Option<f64> {
6575        let idx_a = self.fields.iter().position(|f| f.name == field_a)?;
6576        let idx_b = self.fields.iter().position(|f| f.name == field_b)?;
6577        if idx_a == idx_b {
6578            return Some(1.0);
6579        }
6580        let (i, j) = if idx_a < idx_b {
6581            (idx_a, idx_b)
6582        } else {
6583            (idx_b, idx_a)
6584        };
6585        let n = self.fields.len();
6586        // Full n×n symmetric matrix?
6587        if self.matrix.len() == n * n {
6588            return self.matrix.get(idx_a * n + idx_b).copied();
6589        }
6590        // Upper triangular flat (row-major, excluding diagonal)?
6591        let expected_tri = n * (n - 1) / 2;
6592        if self.matrix.len() == expected_tri {
6593            // Row i, col j where j > i: flat index is
6594            //   sum_{k=0..i}((n-1-k)) + (j - i - 1)
6595            // = i*(n-1) - i*(i-1)/2 + (j - i - 1)
6596            let flat = i * (n - 1) - i * (i.saturating_sub(1)) / 2 + (j - i - 1);
6597            return self.matrix.get(flat).copied();
6598        }
6599        None
6600    }
6601
6602    /// Convert this schema config to a core `CopulaConfig` when the
6603    /// declared field pair `(field_a, field_b)` has a valid correlation
6604    /// entry. Returns `None` when disabled, fields missing, or matrix
6605    /// malformed.
6606    pub fn to_core_config_for_pair(
6607        &self,
6608        field_a: &str,
6609        field_b: &str,
6610    ) -> Option<datasynth_core::distributions::CopulaConfig> {
6611        if !self.enabled {
6612            return None;
6613        }
6614        let rho = self.correlation_between(field_a, field_b)?;
6615        use datasynth_core::distributions::{CopulaConfig, CopulaType};
6616        let copula_type = match self.copula_type {
6617            CopulaSchemaType::Gaussian => CopulaType::Gaussian,
6618            CopulaSchemaType::Clayton => CopulaType::Clayton,
6619            CopulaSchemaType::Gumbel => CopulaType::Gumbel,
6620            CopulaSchemaType::Frank => CopulaType::Frank,
6621            CopulaSchemaType::StudentT => CopulaType::StudentT,
6622        };
6623        // Gaussian / StudentT interpret theta as correlation; others
6624        // as a shape parameter. Minimal v3.5.4 only wires Gaussian in
6625        // the runtime, but the converter is general so follow-ups can
6626        // light up the other copulas.
6627        let theta = rho.clamp(-0.999, 0.999);
6628        Some(CopulaConfig {
6629            copula_type,
6630            theta,
6631            degrees_of_freedom: 4.0,
6632        })
6633    }
6634}
6635
6636/// Copula type for dependency modeling.
6637#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
6638#[serde(rename_all = "snake_case")]
6639pub enum CopulaSchemaType {
6640    /// Gaussian copula (symmetric, no tail dependence)
6641    #[default]
6642    Gaussian,
6643    /// Clayton copula (lower tail dependence)
6644    Clayton,
6645    /// Gumbel copula (upper tail dependence)
6646    Gumbel,
6647    /// Frank copula (symmetric, no tail dependence)
6648    Frank,
6649    /// Student-t copula (both tail dependencies)
6650    StudentT,
6651}
6652
6653/// Configuration for a correlated field.
6654#[derive(Debug, Clone, Serialize, Deserialize)]
6655pub struct CorrelatedFieldConfig {
6656    /// Field name.
6657    pub name: String,
6658
6659    /// Marginal distribution type.
6660    #[serde(default)]
6661    pub distribution: MarginalDistributionConfig,
6662}
6663
6664/// Marginal distribution configuration.
6665#[derive(Debug, Clone, Serialize, Deserialize)]
6666#[serde(tag = "type", rename_all = "snake_case")]
6667pub enum MarginalDistributionConfig {
6668    /// Normal distribution.
6669    Normal {
6670        /// Mean
6671        mu: f64,
6672        /// Standard deviation
6673        sigma: f64,
6674    },
6675    /// Log-normal distribution.
6676    LogNormal {
6677        /// Location parameter
6678        mu: f64,
6679        /// Scale parameter
6680        sigma: f64,
6681    },
6682    /// Uniform distribution.
6683    Uniform {
6684        /// Minimum value
6685        min: f64,
6686        /// Maximum value
6687        max: f64,
6688    },
6689    /// Discrete uniform distribution.
6690    DiscreteUniform {
6691        /// Minimum integer value
6692        min: i32,
6693        /// Maximum integer value
6694        max: i32,
6695    },
6696}
6697
6698impl Default for MarginalDistributionConfig {
6699    fn default() -> Self {
6700        Self::Normal {
6701            mu: 0.0,
6702            sigma: 1.0,
6703        }
6704    }
6705}
6706
6707/// Expected correlation for validation.
6708#[derive(Debug, Clone, Serialize, Deserialize)]
6709pub struct ExpectedCorrelationConfig {
6710    /// First field name.
6711    pub field1: String,
6712    /// Second field name.
6713    pub field2: String,
6714    /// Expected correlation coefficient.
6715    pub expected_r: f64,
6716    /// Acceptable tolerance.
6717    #[serde(default = "default_correlation_tolerance")]
6718    pub tolerance: f64,
6719}
6720
6721fn default_correlation_tolerance() -> f64 {
6722    0.10
6723}
6724
6725/// Conditional distribution configuration.
6726#[derive(Debug, Clone, Serialize, Deserialize)]
6727pub struct ConditionalDistributionSchemaConfig {
6728    /// Output field name to generate.
6729    pub output_field: String,
6730
6731    /// Input field name that conditions the distribution.
6732    pub input_field: String,
6733
6734    /// Breakpoints defining distribution changes.
6735    #[serde(default)]
6736    pub breakpoints: Vec<ConditionalBreakpointConfig>,
6737
6738    /// Default distribution when below all breakpoints.
6739    #[serde(default)]
6740    pub default_distribution: ConditionalDistributionParamsConfig,
6741
6742    /// Minimum output value constraint.
6743    #[serde(default)]
6744    pub min_value: Option<f64>,
6745
6746    /// Maximum output value constraint.
6747    #[serde(default)]
6748    pub max_value: Option<f64>,
6749
6750    /// Decimal places for output rounding.
6751    #[serde(default = "default_decimal_places")]
6752    pub decimal_places: u8,
6753}
6754
6755/// Breakpoint for conditional distribution.
6756#[derive(Debug, Clone, Serialize, Deserialize)]
6757pub struct ConditionalBreakpointConfig {
6758    /// Input value threshold.
6759    pub threshold: f64,
6760
6761    /// Distribution to use when input >= threshold.
6762    pub distribution: ConditionalDistributionParamsConfig,
6763}
6764
6765impl ConditionalDistributionSchemaConfig {
6766    /// Convert this schema config into a core
6767    /// [`ConditionalDistributionConfig`] suitable for
6768    /// [`ConditionalSampler::new`]. v3.5.3+.
6769    pub fn to_core_config(&self) -> datasynth_core::distributions::ConditionalDistributionConfig {
6770        use datasynth_core::distributions::{
6771            Breakpoint, ConditionalDistributionConfig, ConditionalDistributionParams,
6772        };
6773
6774        let default_distribution = convert_conditional_params(&self.default_distribution);
6775        let breakpoints: Vec<Breakpoint> = self
6776            .breakpoints
6777            .iter()
6778            .map(|bp| Breakpoint {
6779                threshold: bp.threshold,
6780                distribution: convert_conditional_params(&bp.distribution),
6781            })
6782            .collect();
6783
6784        // Use a sentinel default_distribution when the schema default is
6785        // its factory default (Fixed { value: 0.0 })  and we have
6786        // breakpoints — we don't want to clobber data for values below
6787        // the first breakpoint.
6788        let final_default = if breakpoints.is_empty() {
6789            default_distribution
6790        } else {
6791            match default_distribution {
6792                ConditionalDistributionParams::Fixed { value: 0.0 } => {
6793                    // Reuse the first breakpoint's distribution as the
6794                    // default to avoid surprising zeros.
6795                    breakpoints[0].distribution.clone()
6796                }
6797                other => other,
6798            }
6799        };
6800
6801        ConditionalDistributionConfig {
6802            output_field: self.output_field.clone(),
6803            input_field: self.input_field.clone(),
6804            breakpoints,
6805            default_distribution: final_default,
6806            min_value: self.min_value,
6807            max_value: self.max_value,
6808            decimal_places: self.decimal_places,
6809        }
6810    }
6811}
6812
6813fn convert_conditional_params(
6814    p: &ConditionalDistributionParamsConfig,
6815) -> datasynth_core::distributions::ConditionalDistributionParams {
6816    use datasynth_core::distributions::ConditionalDistributionParams as Core;
6817    match p {
6818        ConditionalDistributionParamsConfig::Fixed { value } => Core::Fixed { value: *value },
6819        ConditionalDistributionParamsConfig::Normal { mu, sigma } => Core::Normal {
6820            mu: *mu,
6821            sigma: *sigma,
6822        },
6823        ConditionalDistributionParamsConfig::LogNormal { mu, sigma } => Core::LogNormal {
6824            mu: *mu,
6825            sigma: *sigma,
6826        },
6827        ConditionalDistributionParamsConfig::Uniform { min, max } => Core::Uniform {
6828            min: *min,
6829            max: *max,
6830        },
6831        ConditionalDistributionParamsConfig::Beta {
6832            alpha,
6833            beta,
6834            min,
6835            max,
6836        } => Core::Beta {
6837            alpha: *alpha,
6838            beta: *beta,
6839            min: *min,
6840            max: *max,
6841        },
6842        ConditionalDistributionParamsConfig::Discrete { values, weights } => Core::Discrete {
6843            values: values.clone(),
6844            weights: weights.clone(),
6845        },
6846    }
6847}
6848
6849/// Distribution parameters for conditional distributions.
6850#[derive(Debug, Clone, Serialize, Deserialize)]
6851#[serde(tag = "type", rename_all = "snake_case")]
6852pub enum ConditionalDistributionParamsConfig {
6853    /// Fixed value.
6854    Fixed {
6855        /// The fixed value
6856        value: f64,
6857    },
6858    /// Normal distribution.
6859    Normal {
6860        /// Mean
6861        mu: f64,
6862        /// Standard deviation
6863        sigma: f64,
6864    },
6865    /// Log-normal distribution.
6866    LogNormal {
6867        /// Location parameter
6868        mu: f64,
6869        /// Scale parameter
6870        sigma: f64,
6871    },
6872    /// Uniform distribution.
6873    Uniform {
6874        /// Minimum
6875        min: f64,
6876        /// Maximum
6877        max: f64,
6878    },
6879    /// Beta distribution (scaled).
6880    Beta {
6881        /// Alpha parameter
6882        alpha: f64,
6883        /// Beta parameter
6884        beta: f64,
6885        /// Minimum output value
6886        min: f64,
6887        /// Maximum output value
6888        max: f64,
6889    },
6890    /// Discrete values with weights.
6891    Discrete {
6892        /// Possible values
6893        values: Vec<f64>,
6894        /// Weights (should sum to 1.0)
6895        weights: Vec<f64>,
6896    },
6897}
6898
6899impl Default for ConditionalDistributionParamsConfig {
6900    fn default() -> Self {
6901        Self::Normal {
6902            mu: 0.0,
6903            sigma: 1.0,
6904        }
6905    }
6906}
6907
6908/// Regime change configuration.
6909#[derive(Debug, Clone, Serialize, Deserialize, Default)]
6910pub struct RegimeChangeSchemaConfig {
6911    /// Enable regime change modeling.
6912    #[serde(default)]
6913    pub enabled: bool,
6914
6915    /// List of regime changes.
6916    #[serde(default)]
6917    pub changes: Vec<RegimeChangeEventConfig>,
6918
6919    /// Economic cycle configuration.
6920    #[serde(default)]
6921    pub economic_cycle: Option<EconomicCycleSchemaConfig>,
6922
6923    /// Parameter drift configurations.
6924    #[serde(default)]
6925    pub parameter_drifts: Vec<ParameterDriftSchemaConfig>,
6926}
6927
6928/// A single regime change event.
6929#[derive(Debug, Clone, Serialize, Deserialize)]
6930pub struct RegimeChangeEventConfig {
6931    /// Date when the change occurs (ISO 8601 format).
6932    pub date: String,
6933
6934    /// Type of regime change.
6935    pub change_type: RegimeChangeTypeConfig,
6936
6937    /// Description of the change.
6938    #[serde(default)]
6939    pub description: Option<String>,
6940
6941    /// Effects of this regime change.
6942    #[serde(default)]
6943    pub effects: Vec<RegimeEffectConfig>,
6944}
6945
6946/// Type of regime change.
6947#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
6948#[serde(rename_all = "snake_case")]
6949pub enum RegimeChangeTypeConfig {
6950    /// Acquisition - sudden volume and amount increase
6951    Acquisition,
6952    /// Divestiture - sudden volume and amount decrease
6953    Divestiture,
6954    /// Price increase - amounts increase
6955    PriceIncrease,
6956    /// Price decrease - amounts decrease
6957    PriceDecrease,
6958    /// New product launch - volume ramp-up
6959    ProductLaunch,
6960    /// Product discontinuation - volume ramp-down
6961    ProductDiscontinuation,
6962    /// Policy change - affects patterns
6963    PolicyChange,
6964    /// Competitor entry - market disruption
6965    CompetitorEntry,
6966    /// Custom effect
6967    Custom,
6968}
6969
6970/// Effect of a regime change on a specific field.
6971#[derive(Debug, Clone, Serialize, Deserialize)]
6972pub struct RegimeEffectConfig {
6973    /// Field being affected.
6974    pub field: String,
6975
6976    /// Multiplier to apply (1.0 = no change, 1.5 = 50% increase).
6977    pub multiplier: f64,
6978}
6979
6980/// Economic cycle configuration.
6981#[derive(Debug, Clone, Serialize, Deserialize)]
6982pub struct EconomicCycleSchemaConfig {
6983    /// Enable economic cycle modeling.
6984    #[serde(default)]
6985    pub enabled: bool,
6986
6987    /// Cycle period in months (e.g., 48 for 4-year business cycle).
6988    #[serde(default = "default_cycle_period")]
6989    pub period_months: u32,
6990
6991    /// Amplitude of cycle effect (0.0-1.0).
6992    #[serde(default = "default_cycle_amplitude")]
6993    pub amplitude: f64,
6994
6995    /// Phase offset in months.
6996    #[serde(default)]
6997    pub phase_offset: u32,
6998
6999    /// Recession periods (start_month, duration_months).
7000    #[serde(default)]
7001    pub recessions: Vec<RecessionPeriodConfig>,
7002}
7003
7004fn default_cycle_period() -> u32 {
7005    48
7006}
7007
7008fn default_cycle_amplitude() -> f64 {
7009    0.15
7010}
7011
7012impl Default for EconomicCycleSchemaConfig {
7013    fn default() -> Self {
7014        Self {
7015            enabled: false,
7016            period_months: 48,
7017            amplitude: 0.15,
7018            phase_offset: 0,
7019            recessions: Vec::new(),
7020        }
7021    }
7022}
7023
7024/// Recession period configuration.
7025#[derive(Debug, Clone, Serialize, Deserialize)]
7026pub struct RecessionPeriodConfig {
7027    /// Start month (0-indexed from generation start).
7028    pub start_month: u32,
7029
7030    /// Duration in months.
7031    pub duration_months: u32,
7032
7033    /// Severity (0.0-1.0, affects volume reduction).
7034    #[serde(default = "default_recession_severity")]
7035    pub severity: f64,
7036}
7037
7038impl RegimeChangeSchemaConfig {
7039    /// Populate the regime-change, economic-cycle, and parameter-drift
7040    /// slots on a `DriftConfig` from this schema config. v3.5.2+.
7041    ///
7042    /// `generation_start` must match `config.global.start_date` so that
7043    /// absolute regime-change dates can be mapped to 0-indexed periods.
7044    /// Unparseable / out-of-range dates are silently skipped to keep
7045    /// runtime robust against user typos.
7046    pub fn apply_to(
7047        &self,
7048        drift: &mut datasynth_core::distributions::DriftConfig,
7049        generation_start: chrono::NaiveDate,
7050    ) {
7051        if !self.enabled {
7052            return;
7053        }
7054
7055        // Enable drift if any regime-change feature wants it.
7056        drift.enabled = true;
7057
7058        // Regime-change events (absolute dates → period offsets).
7059        for event in &self.changes {
7060            let period = match chrono::NaiveDate::parse_from_str(&event.date, "%Y-%m-%d") {
7061                Ok(d) => {
7062                    let days = (d - generation_start).num_days();
7063                    if days < 0 {
7064                        continue;
7065                    }
7066                    // Approximate month by dividing by 30.4 so we don't
7067                    // need chrono::Months arithmetic.
7068                    (days as f64 / 30.4).round() as u32
7069                }
7070                Err(_) => continue,
7071            };
7072            let change_type = convert_regime_change_type(event.change_type);
7073            let core_effects = event
7074                .effects
7075                .iter()
7076                .map(|e| datasynth_core::distributions::RegimeEffect {
7077                    field: e.field.clone(),
7078                    multiplier: e.multiplier,
7079                })
7080                .collect();
7081            drift
7082                .regime_changes
7083                .push(datasynth_core::distributions::RegimeChange {
7084                    period,
7085                    change_type,
7086                    description: event.description.clone(),
7087                    effects: core_effects,
7088                    transition_periods: 0,
7089                });
7090        }
7091
7092        // Economic cycle.
7093        if let Some(ec) = &self.economic_cycle {
7094            if ec.enabled {
7095                let recession_periods: Vec<u32> = ec
7096                    .recessions
7097                    .iter()
7098                    .flat_map(|r| r.start_month..r.start_month + r.duration_months)
7099                    .collect();
7100                // Use the most-severe recession as the severity driver;
7101                // fall back to default when none declared.
7102                let severity = ec
7103                    .recessions
7104                    .iter()
7105                    .map(|r| 1.0 - r.severity)
7106                    .fold(0.75f64, f64::min);
7107                drift.economic_cycle = datasynth_core::distributions::EconomicCycleConfig {
7108                    enabled: true,
7109                    cycle_length: ec.period_months,
7110                    amplitude: ec.amplitude,
7111                    phase_offset: ec.phase_offset,
7112                    recession_periods,
7113                    recession_severity: severity,
7114                };
7115                drift.drift_type = datasynth_core::distributions::DriftType::Mixed;
7116            }
7117        }
7118
7119        // Parameter drifts.
7120        for pd in &self.parameter_drifts {
7121            let drift_type = match pd.drift_type {
7122                ParameterDriftTypeConfig::Linear => {
7123                    datasynth_core::distributions::ParameterDriftType::Linear
7124                }
7125                ParameterDriftTypeConfig::Exponential => {
7126                    datasynth_core::distributions::ParameterDriftType::Exponential
7127                }
7128                ParameterDriftTypeConfig::Logistic => {
7129                    datasynth_core::distributions::ParameterDriftType::Logistic
7130                }
7131                ParameterDriftTypeConfig::Step => {
7132                    datasynth_core::distributions::ParameterDriftType::Step
7133                }
7134            };
7135            drift
7136                .parameter_drifts
7137                .push(datasynth_core::distributions::ParameterDrift {
7138                    parameter: pd.parameter.clone(),
7139                    drift_type,
7140                    initial_value: pd.start_value,
7141                    target_or_rate: pd.end_value,
7142                    start_period: pd.start_period,
7143                    end_period: pd.end_period,
7144                    steepness: 1.0,
7145                });
7146        }
7147    }
7148}
7149
7150fn convert_regime_change_type(
7151    t: RegimeChangeTypeConfig,
7152) -> datasynth_core::distributions::RegimeChangeType {
7153    use datasynth_core::distributions::RegimeChangeType as Core;
7154    match t {
7155        RegimeChangeTypeConfig::Acquisition => Core::Acquisition,
7156        RegimeChangeTypeConfig::Divestiture => Core::Divestiture,
7157        RegimeChangeTypeConfig::PriceIncrease => Core::PriceIncrease,
7158        RegimeChangeTypeConfig::PriceDecrease => Core::PriceDecrease,
7159        RegimeChangeTypeConfig::ProductLaunch => Core::ProductLaunch,
7160        RegimeChangeTypeConfig::ProductDiscontinuation => Core::ProductDiscontinuation,
7161        RegimeChangeTypeConfig::PolicyChange => Core::PolicyChange,
7162        RegimeChangeTypeConfig::CompetitorEntry => Core::CompetitorEntry,
7163        RegimeChangeTypeConfig::Custom => Core::Custom,
7164    }
7165}
7166
7167fn default_recession_severity() -> f64 {
7168    0.20
7169}
7170
7171/// Parameter drift configuration.
7172#[derive(Debug, Clone, Serialize, Deserialize)]
7173pub struct ParameterDriftSchemaConfig {
7174    /// Parameter being drifted.
7175    pub parameter: String,
7176
7177    /// Drift type.
7178    pub drift_type: ParameterDriftTypeConfig,
7179
7180    /// Start value.
7181    pub start_value: f64,
7182
7183    /// End value.
7184    pub end_value: f64,
7185
7186    /// Start period (month, 0-indexed).
7187    #[serde(default)]
7188    pub start_period: u32,
7189
7190    /// End period (month, optional - defaults to end of generation).
7191    #[serde(default)]
7192    pub end_period: Option<u32>,
7193}
7194
7195/// Parameter drift type.
7196#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7197#[serde(rename_all = "snake_case")]
7198pub enum ParameterDriftTypeConfig {
7199    /// Linear interpolation
7200    #[default]
7201    Linear,
7202    /// Exponential growth/decay
7203    Exponential,
7204    /// S-curve (logistic)
7205    Logistic,
7206    /// Step function
7207    Step,
7208}
7209
7210/// Statistical validation configuration.
7211#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7212pub struct StatisticalValidationSchemaConfig {
7213    /// Enable statistical validation.
7214    #[serde(default)]
7215    pub enabled: bool,
7216
7217    /// Statistical tests to run.
7218    #[serde(default)]
7219    pub tests: Vec<StatisticalTestConfig>,
7220
7221    /// Validation reporting configuration.
7222    #[serde(default)]
7223    pub reporting: ValidationReportingConfig,
7224}
7225
7226/// Statistical test configuration.
7227#[derive(Debug, Clone, Serialize, Deserialize)]
7228#[serde(tag = "type", rename_all = "snake_case")]
7229pub enum StatisticalTestConfig {
7230    /// Benford's Law first digit test.
7231    BenfordFirstDigit {
7232        /// Threshold MAD for failure.
7233        #[serde(default = "default_benford_threshold")]
7234        threshold_mad: f64,
7235        /// Warning MAD threshold.
7236        #[serde(default = "default_benford_warning")]
7237        warning_mad: f64,
7238    },
7239    /// Distribution fit test.
7240    DistributionFit {
7241        /// Target distribution to test.
7242        target: TargetDistributionConfig,
7243        /// K-S test significance level.
7244        #[serde(default = "default_ks_significance")]
7245        ks_significance: f64,
7246        /// Test method (ks, anderson_darling, chi_squared).
7247        #[serde(default)]
7248        method: DistributionFitMethod,
7249    },
7250    /// Correlation check.
7251    CorrelationCheck {
7252        /// Expected correlations to validate.
7253        expected_correlations: Vec<ExpectedCorrelationConfig>,
7254    },
7255    /// Chi-squared test.
7256    ChiSquared {
7257        /// Number of bins.
7258        #[serde(default = "default_chi_squared_bins")]
7259        bins: usize,
7260        /// Significance level.
7261        #[serde(default = "default_chi_squared_significance")]
7262        significance: f64,
7263    },
7264    /// Anderson-Darling test.
7265    AndersonDarling {
7266        /// Target distribution.
7267        target: TargetDistributionConfig,
7268        /// Significance level.
7269        #[serde(default = "default_ad_significance")]
7270        significance: f64,
7271    },
7272}
7273
7274fn default_benford_threshold() -> f64 {
7275    0.015
7276}
7277
7278fn default_benford_warning() -> f64 {
7279    0.010
7280}
7281
7282fn default_ks_significance() -> f64 {
7283    0.05
7284}
7285
7286fn default_chi_squared_bins() -> usize {
7287    10
7288}
7289
7290fn default_chi_squared_significance() -> f64 {
7291    0.05
7292}
7293
7294fn default_ad_significance() -> f64 {
7295    0.05
7296}
7297
7298/// Target distribution for fit tests.
7299#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7300#[serde(rename_all = "snake_case")]
7301pub enum TargetDistributionConfig {
7302    /// Normal distribution
7303    Normal,
7304    /// Log-normal distribution
7305    #[default]
7306    LogNormal,
7307    /// Exponential distribution
7308    Exponential,
7309    /// Uniform distribution
7310    Uniform,
7311}
7312
7313/// Distribution fit test method.
7314#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7315#[serde(rename_all = "snake_case")]
7316pub enum DistributionFitMethod {
7317    /// Kolmogorov-Smirnov test
7318    #[default]
7319    KolmogorovSmirnov,
7320    /// Anderson-Darling test
7321    AndersonDarling,
7322    /// Chi-squared test
7323    ChiSquared,
7324}
7325
7326/// Validation reporting configuration.
7327#[derive(Debug, Clone, Serialize, Deserialize)]
7328pub struct ValidationReportingConfig {
7329    /// Output validation report to file.
7330    #[serde(default)]
7331    pub output_report: bool,
7332
7333    /// Report format.
7334    #[serde(default)]
7335    pub format: ValidationReportFormat,
7336
7337    /// Fail generation if validation fails.
7338    #[serde(default)]
7339    pub fail_on_error: bool,
7340
7341    /// Include detailed statistics in report.
7342    #[serde(default = "default_true")]
7343    pub include_details: bool,
7344}
7345
7346impl Default for ValidationReportingConfig {
7347    fn default() -> Self {
7348        Self {
7349            output_report: false,
7350            format: ValidationReportFormat::Json,
7351            fail_on_error: false,
7352            include_details: true,
7353        }
7354    }
7355}
7356
7357/// Validation report format.
7358#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7359#[serde(rename_all = "snake_case")]
7360pub enum ValidationReportFormat {
7361    /// JSON format
7362    #[default]
7363    Json,
7364    /// YAML format
7365    Yaml,
7366    /// HTML report
7367    Html,
7368}
7369
7370// =============================================================================
7371// Temporal Patterns Configuration
7372// =============================================================================
7373
7374/// Temporal patterns configuration for business days, period-end dynamics, and processing lags.
7375///
7376/// This section enables sophisticated temporal modeling including:
7377/// - Business day calculations and settlement dates
7378/// - Regional holiday calendars
7379/// - Period-end decay curves (non-flat volume spikes)
7380/// - Processing lag modeling (event-to-posting delays)
7381#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7382pub struct TemporalPatternsConfig {
7383    /// Enable temporal patterns features.
7384    #[serde(default)]
7385    pub enabled: bool,
7386
7387    /// Business day calculation configuration.
7388    #[serde(default)]
7389    pub business_days: BusinessDaySchemaConfig,
7390
7391    /// Regional calendar configuration.
7392    #[serde(default)]
7393    pub calendars: CalendarSchemaConfig,
7394
7395    /// Period-end dynamics configuration.
7396    #[serde(default)]
7397    pub period_end: PeriodEndSchemaConfig,
7398
7399    /// Processing lag configuration.
7400    #[serde(default)]
7401    pub processing_lags: ProcessingLagSchemaConfig,
7402
7403    /// Fiscal calendar configuration (custom year start, 4-4-5, 13-period).
7404    #[serde(default)]
7405    pub fiscal_calendar: FiscalCalendarSchemaConfig,
7406
7407    /// Intra-day patterns configuration (morning spike, lunch dip, EOD rush).
7408    #[serde(default)]
7409    pub intraday: IntraDaySchemaConfig,
7410
7411    /// Timezone handling configuration.
7412    #[serde(default)]
7413    pub timezones: TimezoneSchemaConfig,
7414}
7415
7416/// Business day calculation configuration.
7417#[derive(Debug, Clone, Serialize, Deserialize)]
7418pub struct BusinessDaySchemaConfig {
7419    /// Enable business day calculations.
7420    #[serde(default = "default_true")]
7421    pub enabled: bool,
7422
7423    /// Half-day policy: "full_day", "half_day", "non_business_day".
7424    #[serde(default = "default_half_day_policy")]
7425    pub half_day_policy: String,
7426
7427    /// Settlement rules configuration.
7428    #[serde(default)]
7429    pub settlement_rules: SettlementRulesSchemaConfig,
7430
7431    /// Month-end convention: "modified_following", "preceding", "following", "end_of_month".
7432    #[serde(default = "default_month_end_convention")]
7433    pub month_end_convention: String,
7434
7435    /// Weekend days (e.g., ["saturday", "sunday"] or ["friday", "saturday"] for Middle East).
7436    #[serde(default)]
7437    pub weekend_days: Option<Vec<String>>,
7438}
7439
7440fn default_half_day_policy() -> String {
7441    "half_day".to_string()
7442}
7443
7444fn default_month_end_convention() -> String {
7445    "modified_following".to_string()
7446}
7447
7448impl Default for BusinessDaySchemaConfig {
7449    fn default() -> Self {
7450        Self {
7451            enabled: true,
7452            half_day_policy: "half_day".to_string(),
7453            settlement_rules: SettlementRulesSchemaConfig::default(),
7454            month_end_convention: "modified_following".to_string(),
7455            weekend_days: None,
7456        }
7457    }
7458}
7459
7460/// Settlement rules configuration.
7461#[derive(Debug, Clone, Serialize, Deserialize)]
7462pub struct SettlementRulesSchemaConfig {
7463    /// Equity settlement days (T+N).
7464    #[serde(default = "default_settlement_2")]
7465    pub equity_days: i32,
7466
7467    /// Government bonds settlement days.
7468    #[serde(default = "default_settlement_1")]
7469    pub government_bonds_days: i32,
7470
7471    /// FX spot settlement days.
7472    #[serde(default = "default_settlement_2")]
7473    pub fx_spot_days: i32,
7474
7475    /// Corporate bonds settlement days.
7476    #[serde(default = "default_settlement_2")]
7477    pub corporate_bonds_days: i32,
7478
7479    /// Wire transfer cutoff time (HH:MM format).
7480    #[serde(default = "default_wire_cutoff")]
7481    pub wire_cutoff_time: String,
7482
7483    /// International wire settlement days.
7484    #[serde(default = "default_settlement_1")]
7485    pub wire_international_days: i32,
7486
7487    /// ACH settlement days.
7488    #[serde(default = "default_settlement_1")]
7489    pub ach_days: i32,
7490}
7491
7492fn default_settlement_1() -> i32 {
7493    1
7494}
7495
7496fn default_settlement_2() -> i32 {
7497    2
7498}
7499
7500fn default_wire_cutoff() -> String {
7501    "14:00".to_string()
7502}
7503
7504impl Default for SettlementRulesSchemaConfig {
7505    fn default() -> Self {
7506        Self {
7507            equity_days: 2,
7508            government_bonds_days: 1,
7509            fx_spot_days: 2,
7510            corporate_bonds_days: 2,
7511            wire_cutoff_time: "14:00".to_string(),
7512            wire_international_days: 1,
7513            ach_days: 1,
7514        }
7515    }
7516}
7517
7518/// Regional calendar configuration.
7519#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7520pub struct CalendarSchemaConfig {
7521    /// List of regions to include (e.g., ["US", "DE", "BR", "SG", "KR"]).
7522    #[serde(default)]
7523    pub regions: Vec<String>,
7524
7525    /// Custom holidays (in addition to regional calendars).
7526    #[serde(default)]
7527    pub custom_holidays: Vec<CustomHolidaySchemaConfig>,
7528}
7529
7530/// Custom holiday configuration.
7531#[derive(Debug, Clone, Serialize, Deserialize)]
7532pub struct CustomHolidaySchemaConfig {
7533    /// Holiday name.
7534    pub name: String,
7535    /// Month (1-12).
7536    pub month: u8,
7537    /// Day of month.
7538    pub day: u8,
7539    /// Activity multiplier (0.0-1.0, default 0.05).
7540    #[serde(default = "default_holiday_multiplier")]
7541    pub activity_multiplier: f64,
7542}
7543
7544fn default_holiday_multiplier() -> f64 {
7545    0.05
7546}
7547
7548/// Period-end dynamics configuration.
7549#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7550pub struct PeriodEndSchemaConfig {
7551    /// Model type: "flat", "exponential", "extended_crunch", "daily_profile".
7552    #[serde(default)]
7553    pub model: Option<String>,
7554
7555    /// Month-end configuration.
7556    #[serde(default)]
7557    pub month_end: Option<PeriodEndModelSchemaConfig>,
7558
7559    /// Quarter-end configuration.
7560    #[serde(default)]
7561    pub quarter_end: Option<PeriodEndModelSchemaConfig>,
7562
7563    /// Year-end configuration.
7564    #[serde(default)]
7565    pub year_end: Option<PeriodEndModelSchemaConfig>,
7566}
7567
7568/// Period-end model configuration.
7569#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7570pub struct PeriodEndModelSchemaConfig {
7571    /// Inherit configuration from another period (e.g., "month_end").
7572    #[serde(default)]
7573    pub inherit_from: Option<String>,
7574
7575    /// Additional multiplier on top of inherited/base model.
7576    #[serde(default)]
7577    pub additional_multiplier: Option<f64>,
7578
7579    /// Days before period end to start acceleration (negative, e.g., -10).
7580    #[serde(default)]
7581    pub start_day: Option<i32>,
7582
7583    /// Base multiplier at start of acceleration.
7584    #[serde(default)]
7585    pub base_multiplier: Option<f64>,
7586
7587    /// Peak multiplier on last day.
7588    #[serde(default)]
7589    pub peak_multiplier: Option<f64>,
7590
7591    /// Decay rate for exponential model (0.1-0.5 typical).
7592    #[serde(default)]
7593    pub decay_rate: Option<f64>,
7594
7595    /// Sustained high days for crunch model.
7596    #[serde(default)]
7597    pub sustained_high_days: Option<i32>,
7598}
7599
7600/// Processing lag configuration.
7601#[derive(Debug, Clone, Serialize, Deserialize)]
7602pub struct ProcessingLagSchemaConfig {
7603    /// Enable processing lag calculations.
7604    #[serde(default = "default_true")]
7605    pub enabled: bool,
7606
7607    /// Sales order lag configuration (log-normal mu, sigma).
7608    #[serde(default)]
7609    pub sales_order_lag: Option<LagDistributionSchemaConfig>,
7610
7611    /// Purchase order lag configuration.
7612    #[serde(default)]
7613    pub purchase_order_lag: Option<LagDistributionSchemaConfig>,
7614
7615    /// Goods receipt lag configuration.
7616    #[serde(default)]
7617    pub goods_receipt_lag: Option<LagDistributionSchemaConfig>,
7618
7619    /// Invoice receipt lag configuration.
7620    #[serde(default)]
7621    pub invoice_receipt_lag: Option<LagDistributionSchemaConfig>,
7622
7623    /// Invoice issue lag configuration.
7624    #[serde(default)]
7625    pub invoice_issue_lag: Option<LagDistributionSchemaConfig>,
7626
7627    /// Payment lag configuration.
7628    #[serde(default)]
7629    pub payment_lag: Option<LagDistributionSchemaConfig>,
7630
7631    /// Journal entry lag configuration.
7632    #[serde(default)]
7633    pub journal_entry_lag: Option<LagDistributionSchemaConfig>,
7634
7635    /// Cross-day posting configuration.
7636    #[serde(default)]
7637    pub cross_day_posting: Option<CrossDayPostingSchemaConfig>,
7638}
7639
7640impl Default for ProcessingLagSchemaConfig {
7641    fn default() -> Self {
7642        Self {
7643            enabled: true,
7644            sales_order_lag: None,
7645            purchase_order_lag: None,
7646            goods_receipt_lag: None,
7647            invoice_receipt_lag: None,
7648            invoice_issue_lag: None,
7649            payment_lag: None,
7650            journal_entry_lag: None,
7651            cross_day_posting: None,
7652        }
7653    }
7654}
7655
7656/// Lag distribution configuration (log-normal parameters).
7657#[derive(Debug, Clone, Serialize, Deserialize)]
7658pub struct LagDistributionSchemaConfig {
7659    /// Log-scale mean (mu for log-normal).
7660    pub mu: f64,
7661    /// Log-scale standard deviation (sigma for log-normal).
7662    pub sigma: f64,
7663    /// Minimum lag in hours.
7664    #[serde(default)]
7665    pub min_hours: Option<f64>,
7666    /// Maximum lag in hours.
7667    #[serde(default)]
7668    pub max_hours: Option<f64>,
7669}
7670
7671/// Cross-day posting configuration.
7672#[derive(Debug, Clone, Serialize, Deserialize)]
7673pub struct CrossDayPostingSchemaConfig {
7674    /// Enable cross-day posting logic.
7675    #[serde(default = "default_true")]
7676    pub enabled: bool,
7677
7678    /// Probability of next-day posting by hour (map of hour -> probability).
7679    /// E.g., { 17: 0.7, 19: 0.9, 21: 0.99 }
7680    #[serde(default)]
7681    pub probability_by_hour: std::collections::HashMap<u8, f64>,
7682}
7683
7684impl Default for CrossDayPostingSchemaConfig {
7685    fn default() -> Self {
7686        let mut probability_by_hour = std::collections::HashMap::new();
7687        probability_by_hour.insert(17, 0.3);
7688        probability_by_hour.insert(18, 0.6);
7689        probability_by_hour.insert(19, 0.8);
7690        probability_by_hour.insert(20, 0.9);
7691        probability_by_hour.insert(21, 0.95);
7692        probability_by_hour.insert(22, 0.99);
7693
7694        Self {
7695            enabled: true,
7696            probability_by_hour,
7697        }
7698    }
7699}
7700
7701// =============================================================================
7702// Fiscal Calendar Configuration (P2)
7703// =============================================================================
7704
7705/// Fiscal calendar configuration.
7706///
7707/// Supports calendar year, custom year start, 4-4-5 retail calendar,
7708/// and 13-period calendars.
7709#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7710pub struct FiscalCalendarSchemaConfig {
7711    /// Enable non-standard fiscal calendar.
7712    #[serde(default)]
7713    pub enabled: bool,
7714
7715    /// Fiscal calendar type: "calendar_year", "custom", "four_four_five", "thirteen_period".
7716    #[serde(default = "default_fiscal_calendar_type")]
7717    pub calendar_type: String,
7718
7719    /// Month the fiscal year starts (1-12). Used for custom year start.
7720    #[serde(default)]
7721    pub year_start_month: Option<u8>,
7722
7723    /// Day the fiscal year starts (1-31). Used for custom year start.
7724    #[serde(default)]
7725    pub year_start_day: Option<u8>,
7726
7727    /// 4-4-5 calendar configuration (if calendar_type is "four_four_five").
7728    #[serde(default)]
7729    pub four_four_five: Option<FourFourFiveSchemaConfig>,
7730}
7731
7732fn default_fiscal_calendar_type() -> String {
7733    "calendar_year".to_string()
7734}
7735
7736/// 4-4-5 retail calendar configuration.
7737#[derive(Debug, Clone, Serialize, Deserialize)]
7738pub struct FourFourFiveSchemaConfig {
7739    /// Week pattern: "four_four_five", "four_five_four", "five_four_four".
7740    #[serde(default = "default_week_pattern")]
7741    pub pattern: String,
7742
7743    /// Anchor type: "first_sunday", "last_saturday", "nearest_saturday".
7744    #[serde(default = "default_anchor_type")]
7745    pub anchor_type: String,
7746
7747    /// Anchor month (1-12).
7748    #[serde(default = "default_anchor_month")]
7749    pub anchor_month: u8,
7750
7751    /// Where to place leap week: "q4_period3" or "q1_period1".
7752    #[serde(default = "default_leap_week_placement")]
7753    pub leap_week_placement: String,
7754}
7755
7756fn default_week_pattern() -> String {
7757    "four_four_five".to_string()
7758}
7759
7760fn default_anchor_type() -> String {
7761    "last_saturday".to_string()
7762}
7763
7764fn default_anchor_month() -> u8 {
7765    1 // January
7766}
7767
7768fn default_leap_week_placement() -> String {
7769    "q4_period3".to_string()
7770}
7771
7772impl Default for FourFourFiveSchemaConfig {
7773    fn default() -> Self {
7774        Self {
7775            pattern: "four_four_five".to_string(),
7776            anchor_type: "last_saturday".to_string(),
7777            anchor_month: 1,
7778            leap_week_placement: "q4_period3".to_string(),
7779        }
7780    }
7781}
7782
7783// =============================================================================
7784// Intra-Day Patterns Configuration (P2)
7785// =============================================================================
7786
7787/// Intra-day patterns configuration.
7788///
7789/// Defines time-of-day segments with different activity multipliers
7790/// for realistic modeling of morning spikes, lunch dips, and end-of-day rushes.
7791#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7792pub struct IntraDaySchemaConfig {
7793    /// Enable intra-day patterns.
7794    #[serde(default)]
7795    pub enabled: bool,
7796
7797    /// Custom intra-day segments.
7798    #[serde(default)]
7799    pub segments: Vec<IntraDaySegmentSchemaConfig>,
7800}
7801
7802/// Intra-day segment configuration.
7803#[derive(Debug, Clone, Serialize, Deserialize)]
7804pub struct IntraDaySegmentSchemaConfig {
7805    /// Name of the segment (e.g., "morning_spike", "lunch_dip").
7806    pub name: String,
7807
7808    /// Start time (HH:MM format).
7809    pub start: String,
7810
7811    /// End time (HH:MM format).
7812    pub end: String,
7813
7814    /// Activity multiplier (1.0 = normal).
7815    #[serde(default = "default_multiplier")]
7816    pub multiplier: f64,
7817
7818    /// Posting type: "human", "system", "both".
7819    #[serde(default = "default_posting_type")]
7820    pub posting_type: String,
7821}
7822
7823fn default_multiplier() -> f64 {
7824    1.0
7825}
7826
7827fn default_posting_type() -> String {
7828    "both".to_string()
7829}
7830
7831// =============================================================================
7832// Timezone Configuration
7833// =============================================================================
7834
7835/// Timezone handling configuration for multi-region entities.
7836#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7837pub struct TimezoneSchemaConfig {
7838    /// Enable timezone handling.
7839    #[serde(default)]
7840    pub enabled: bool,
7841
7842    /// Default timezone (IANA format, e.g., "America/New_York").
7843    #[serde(default = "default_timezone")]
7844    pub default_timezone: String,
7845
7846    /// Consolidation timezone for group reporting (IANA format).
7847    #[serde(default = "default_consolidation_timezone")]
7848    pub consolidation_timezone: String,
7849
7850    /// Entity-to-timezone mappings.
7851    /// Supports patterns like "EU_*" -> "Europe/London".
7852    #[serde(default)]
7853    pub entity_mappings: Vec<EntityTimezoneMapping>,
7854}
7855
7856fn default_timezone() -> String {
7857    "America/New_York".to_string()
7858}
7859
7860fn default_consolidation_timezone() -> String {
7861    "UTC".to_string()
7862}
7863
7864/// Mapping from entity pattern to timezone.
7865#[derive(Debug, Clone, Serialize, Deserialize)]
7866pub struct EntityTimezoneMapping {
7867    /// Entity code pattern (e.g., "EU_*", "*_APAC", "1000").
7868    pub pattern: String,
7869
7870    /// Timezone (IANA format, e.g., "Europe/London").
7871    pub timezone: String,
7872}
7873
7874// =============================================================================
7875// Vendor Network Configuration
7876// =============================================================================
7877
7878/// Configuration for multi-tier vendor network generation.
7879#[derive(Debug, Clone, Serialize, Deserialize)]
7880pub struct VendorNetworkSchemaConfig {
7881    /// Enable vendor network generation.
7882    #[serde(default)]
7883    pub enabled: bool,
7884
7885    /// Maximum depth of supply chain tiers (1-3).
7886    #[serde(default = "default_vendor_tier_depth")]
7887    pub depth: u8,
7888
7889    /// Tier 1 vendor count configuration.
7890    #[serde(default)]
7891    pub tier1: TierCountSchemaConfig,
7892
7893    /// Tier 2 vendors per Tier 1 parent.
7894    #[serde(default)]
7895    pub tier2_per_parent: TierCountSchemaConfig,
7896
7897    /// Tier 3 vendors per Tier 2 parent.
7898    #[serde(default)]
7899    pub tier3_per_parent: TierCountSchemaConfig,
7900
7901    /// Vendor cluster distribution.
7902    #[serde(default)]
7903    pub clusters: VendorClusterSchemaConfig,
7904
7905    /// Concentration limits.
7906    #[serde(default)]
7907    pub dependencies: DependencySchemaConfig,
7908}
7909
7910fn default_vendor_tier_depth() -> u8 {
7911    3
7912}
7913
7914impl Default for VendorNetworkSchemaConfig {
7915    fn default() -> Self {
7916        Self {
7917            enabled: false,
7918            depth: 3,
7919            tier1: TierCountSchemaConfig { min: 50, max: 100 },
7920            tier2_per_parent: TierCountSchemaConfig { min: 4, max: 10 },
7921            tier3_per_parent: TierCountSchemaConfig { min: 2, max: 5 },
7922            clusters: VendorClusterSchemaConfig::default(),
7923            dependencies: DependencySchemaConfig::default(),
7924        }
7925    }
7926}
7927
7928/// Tier count configuration.
7929#[derive(Debug, Clone, Serialize, Deserialize)]
7930pub struct TierCountSchemaConfig {
7931    /// Minimum count.
7932    #[serde(default = "default_tier_min")]
7933    pub min: usize,
7934
7935    /// Maximum count.
7936    #[serde(default = "default_tier_max")]
7937    pub max: usize,
7938}
7939
7940fn default_tier_min() -> usize {
7941    5
7942}
7943
7944fn default_tier_max() -> usize {
7945    20
7946}
7947
7948impl Default for TierCountSchemaConfig {
7949    fn default() -> Self {
7950        Self {
7951            min: default_tier_min(),
7952            max: default_tier_max(),
7953        }
7954    }
7955}
7956
7957/// Vendor cluster distribution configuration.
7958#[derive(Debug, Clone, Serialize, Deserialize)]
7959pub struct VendorClusterSchemaConfig {
7960    /// Reliable strategic vendors percentage (default: 0.20).
7961    #[serde(default = "default_reliable_strategic")]
7962    pub reliable_strategic: f64,
7963
7964    /// Standard operational vendors percentage (default: 0.50).
7965    #[serde(default = "default_standard_operational")]
7966    pub standard_operational: f64,
7967
7968    /// Transactional vendors percentage (default: 0.25).
7969    #[serde(default = "default_transactional")]
7970    pub transactional: f64,
7971
7972    /// Problematic vendors percentage (default: 0.05).
7973    #[serde(default = "default_problematic")]
7974    pub problematic: f64,
7975}
7976
7977fn default_reliable_strategic() -> f64 {
7978    0.20
7979}
7980
7981fn default_standard_operational() -> f64 {
7982    0.50
7983}
7984
7985fn default_transactional() -> f64 {
7986    0.25
7987}
7988
7989fn default_problematic() -> f64 {
7990    0.05
7991}
7992
7993impl Default for VendorClusterSchemaConfig {
7994    fn default() -> Self {
7995        Self {
7996            reliable_strategic: 0.20,
7997            standard_operational: 0.50,
7998            transactional: 0.25,
7999            problematic: 0.05,
8000        }
8001    }
8002}
8003
8004/// Dependency and concentration limits configuration.
8005#[derive(Debug, Clone, Serialize, Deserialize)]
8006pub struct DependencySchemaConfig {
8007    /// Maximum concentration for a single vendor (default: 0.15).
8008    #[serde(default = "default_max_single_vendor")]
8009    pub max_single_vendor_concentration: f64,
8010
8011    /// Maximum concentration for top 5 vendors (default: 0.45).
8012    #[serde(default = "default_max_top5")]
8013    pub top_5_concentration: f64,
8014
8015    /// Percentage of single-source vendors (default: 0.05).
8016    #[serde(default = "default_single_source_percent")]
8017    pub single_source_percent: f64,
8018}
8019
8020fn default_max_single_vendor() -> f64 {
8021    0.15
8022}
8023
8024fn default_max_top5() -> f64 {
8025    0.45
8026}
8027
8028fn default_single_source_percent() -> f64 {
8029    0.05
8030}
8031
8032impl Default for DependencySchemaConfig {
8033    fn default() -> Self {
8034        Self {
8035            max_single_vendor_concentration: 0.15,
8036            top_5_concentration: 0.45,
8037            single_source_percent: 0.05,
8038        }
8039    }
8040}
8041
8042// =============================================================================
8043// Customer Segmentation Configuration
8044// =============================================================================
8045
8046/// Configuration for customer segmentation generation.
8047#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8048pub struct CustomerSegmentationSchemaConfig {
8049    /// Enable customer segmentation generation.
8050    #[serde(default)]
8051    pub enabled: bool,
8052
8053    /// Value segment distribution.
8054    #[serde(default)]
8055    pub value_segments: ValueSegmentsSchemaConfig,
8056
8057    /// Lifecycle stage configuration.
8058    #[serde(default)]
8059    pub lifecycle: LifecycleSchemaConfig,
8060
8061    /// Network (referrals, hierarchies) configuration.
8062    #[serde(default)]
8063    pub networks: CustomerNetworksSchemaConfig,
8064}
8065
8066/// Customer value segments distribution configuration.
8067#[derive(Debug, Clone, Serialize, Deserialize)]
8068pub struct ValueSegmentsSchemaConfig {
8069    /// Enterprise segment configuration.
8070    #[serde(default)]
8071    pub enterprise: SegmentDetailSchemaConfig,
8072
8073    /// Mid-market segment configuration.
8074    #[serde(default)]
8075    pub mid_market: SegmentDetailSchemaConfig,
8076
8077    /// SMB segment configuration.
8078    #[serde(default)]
8079    pub smb: SegmentDetailSchemaConfig,
8080
8081    /// Consumer segment configuration.
8082    #[serde(default)]
8083    pub consumer: SegmentDetailSchemaConfig,
8084}
8085
8086impl Default for ValueSegmentsSchemaConfig {
8087    fn default() -> Self {
8088        Self {
8089            enterprise: SegmentDetailSchemaConfig {
8090                revenue_share: 0.40,
8091                customer_share: 0.05,
8092                avg_order_value_range: "50000+".to_string(),
8093            },
8094            mid_market: SegmentDetailSchemaConfig {
8095                revenue_share: 0.35,
8096                customer_share: 0.20,
8097                avg_order_value_range: "5000-50000".to_string(),
8098            },
8099            smb: SegmentDetailSchemaConfig {
8100                revenue_share: 0.20,
8101                customer_share: 0.50,
8102                avg_order_value_range: "500-5000".to_string(),
8103            },
8104            consumer: SegmentDetailSchemaConfig {
8105                revenue_share: 0.05,
8106                customer_share: 0.25,
8107                avg_order_value_range: "50-500".to_string(),
8108            },
8109        }
8110    }
8111}
8112
8113/// Individual segment detail configuration.
8114#[derive(Debug, Clone, Serialize, Deserialize)]
8115pub struct SegmentDetailSchemaConfig {
8116    /// Revenue share for this segment.
8117    #[serde(default)]
8118    pub revenue_share: f64,
8119
8120    /// Customer share for this segment.
8121    #[serde(default)]
8122    pub customer_share: f64,
8123
8124    /// Average order value range (e.g., "5000-50000" or "50000+").
8125    #[serde(default)]
8126    pub avg_order_value_range: String,
8127}
8128
8129impl Default for SegmentDetailSchemaConfig {
8130    fn default() -> Self {
8131        Self {
8132            revenue_share: 0.25,
8133            customer_share: 0.25,
8134            avg_order_value_range: "1000-10000".to_string(),
8135        }
8136    }
8137}
8138
8139/// Customer lifecycle stage configuration.
8140#[derive(Debug, Clone, Serialize, Deserialize)]
8141pub struct LifecycleSchemaConfig {
8142    /// Prospect stage rate.
8143    #[serde(default)]
8144    pub prospect_rate: f64,
8145
8146    /// New customer stage rate.
8147    #[serde(default = "default_new_rate")]
8148    pub new_rate: f64,
8149
8150    /// Growth stage rate.
8151    #[serde(default = "default_growth_rate")]
8152    pub growth_rate: f64,
8153
8154    /// Mature stage rate.
8155    #[serde(default = "default_mature_rate")]
8156    pub mature_rate: f64,
8157
8158    /// At-risk stage rate.
8159    #[serde(default = "default_at_risk_rate")]
8160    pub at_risk_rate: f64,
8161
8162    /// Churned stage rate.
8163    #[serde(default = "default_churned_rate")]
8164    pub churned_rate: f64,
8165
8166    /// Won-back stage rate (churned customers reacquired).
8167    #[serde(default)]
8168    pub won_back_rate: f64,
8169}
8170
8171fn default_new_rate() -> f64 {
8172    0.10
8173}
8174
8175fn default_growth_rate() -> f64 {
8176    0.15
8177}
8178
8179fn default_mature_rate() -> f64 {
8180    0.60
8181}
8182
8183fn default_at_risk_rate() -> f64 {
8184    0.10
8185}
8186
8187fn default_churned_rate() -> f64 {
8188    0.05
8189}
8190
8191impl Default for LifecycleSchemaConfig {
8192    fn default() -> Self {
8193        Self {
8194            prospect_rate: 0.0,
8195            new_rate: 0.10,
8196            growth_rate: 0.15,
8197            mature_rate: 0.60,
8198            at_risk_rate: 0.10,
8199            churned_rate: 0.05,
8200            won_back_rate: 0.0,
8201        }
8202    }
8203}
8204
8205/// Customer networks configuration (referrals, hierarchies).
8206#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8207pub struct CustomerNetworksSchemaConfig {
8208    /// Referral network configuration.
8209    #[serde(default)]
8210    pub referrals: ReferralSchemaConfig,
8211
8212    /// Corporate hierarchy configuration.
8213    #[serde(default)]
8214    pub corporate_hierarchies: HierarchySchemaConfig,
8215}
8216
8217/// Referral network configuration.
8218#[derive(Debug, Clone, Serialize, Deserialize)]
8219pub struct ReferralSchemaConfig {
8220    /// Enable referral generation.
8221    #[serde(default = "default_true")]
8222    pub enabled: bool,
8223
8224    /// Rate of customers acquired via referral.
8225    #[serde(default = "default_referral_rate")]
8226    pub referral_rate: f64,
8227}
8228
8229fn default_referral_rate() -> f64 {
8230    0.15
8231}
8232
8233impl Default for ReferralSchemaConfig {
8234    fn default() -> Self {
8235        Self {
8236            enabled: true,
8237            referral_rate: 0.15,
8238        }
8239    }
8240}
8241
8242/// Corporate hierarchy configuration.
8243#[derive(Debug, Clone, Serialize, Deserialize)]
8244pub struct HierarchySchemaConfig {
8245    /// Enable corporate hierarchy generation.
8246    #[serde(default = "default_true")]
8247    pub enabled: bool,
8248
8249    /// Rate of customers in hierarchies.
8250    #[serde(default = "default_hierarchy_rate")]
8251    pub probability: f64,
8252}
8253
8254fn default_hierarchy_rate() -> f64 {
8255    0.30
8256}
8257
8258impl Default for HierarchySchemaConfig {
8259    fn default() -> Self {
8260        Self {
8261            enabled: true,
8262            probability: 0.30,
8263        }
8264    }
8265}
8266
8267// =============================================================================
8268// Relationship Strength Configuration
8269// =============================================================================
8270
8271/// Configuration for relationship strength calculation.
8272#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8273pub struct RelationshipStrengthSchemaConfig {
8274    /// Enable relationship strength calculation.
8275    #[serde(default)]
8276    pub enabled: bool,
8277
8278    /// Calculation weights.
8279    #[serde(default)]
8280    pub calculation: StrengthCalculationSchemaConfig,
8281
8282    /// Strength thresholds for classification.
8283    #[serde(default)]
8284    pub thresholds: StrengthThresholdsSchemaConfig,
8285}
8286
8287/// Strength calculation weights configuration.
8288#[derive(Debug, Clone, Serialize, Deserialize)]
8289pub struct StrengthCalculationSchemaConfig {
8290    /// Weight for transaction volume (default: 0.30).
8291    #[serde(default = "default_volume_weight")]
8292    pub transaction_volume_weight: f64,
8293
8294    /// Weight for transaction count (default: 0.25).
8295    #[serde(default = "default_count_weight")]
8296    pub transaction_count_weight: f64,
8297
8298    /// Weight for relationship duration (default: 0.20).
8299    #[serde(default = "default_duration_weight")]
8300    pub relationship_duration_weight: f64,
8301
8302    /// Weight for recency (default: 0.15).
8303    #[serde(default = "default_recency_weight")]
8304    pub recency_weight: f64,
8305
8306    /// Weight for mutual connections (default: 0.10).
8307    #[serde(default = "default_mutual_weight")]
8308    pub mutual_connections_weight: f64,
8309
8310    /// Recency half-life in days (default: 90).
8311    #[serde(default = "default_recency_half_life")]
8312    pub recency_half_life_days: u32,
8313}
8314
8315fn default_volume_weight() -> f64 {
8316    0.30
8317}
8318
8319fn default_count_weight() -> f64 {
8320    0.25
8321}
8322
8323fn default_duration_weight() -> f64 {
8324    0.20
8325}
8326
8327fn default_recency_weight() -> f64 {
8328    0.15
8329}
8330
8331fn default_mutual_weight() -> f64 {
8332    0.10
8333}
8334
8335fn default_recency_half_life() -> u32 {
8336    90
8337}
8338
8339impl Default for StrengthCalculationSchemaConfig {
8340    fn default() -> Self {
8341        Self {
8342            transaction_volume_weight: 0.30,
8343            transaction_count_weight: 0.25,
8344            relationship_duration_weight: 0.20,
8345            recency_weight: 0.15,
8346            mutual_connections_weight: 0.10,
8347            recency_half_life_days: 90,
8348        }
8349    }
8350}
8351
8352/// Strength thresholds for relationship classification.
8353#[derive(Debug, Clone, Serialize, Deserialize)]
8354pub struct StrengthThresholdsSchemaConfig {
8355    /// Threshold for strong relationships (default: 0.7).
8356    #[serde(default = "default_strong_threshold")]
8357    pub strong: f64,
8358
8359    /// Threshold for moderate relationships (default: 0.4).
8360    #[serde(default = "default_moderate_threshold")]
8361    pub moderate: f64,
8362
8363    /// Threshold for weak relationships (default: 0.1).
8364    #[serde(default = "default_weak_threshold")]
8365    pub weak: f64,
8366}
8367
8368fn default_strong_threshold() -> f64 {
8369    0.7
8370}
8371
8372fn default_moderate_threshold() -> f64 {
8373    0.4
8374}
8375
8376fn default_weak_threshold() -> f64 {
8377    0.1
8378}
8379
8380impl Default for StrengthThresholdsSchemaConfig {
8381    fn default() -> Self {
8382        Self {
8383            strong: 0.7,
8384            moderate: 0.4,
8385            weak: 0.1,
8386        }
8387    }
8388}
8389
8390// =============================================================================
8391// Cross-Process Links Configuration
8392// =============================================================================
8393
8394/// Configuration for cross-process linkages.
8395#[derive(Debug, Clone, Serialize, Deserialize)]
8396pub struct CrossProcessLinksSchemaConfig {
8397    /// Enable cross-process link generation.
8398    #[serde(default)]
8399    pub enabled: bool,
8400
8401    /// Enable inventory links between P2P and O2C.
8402    #[serde(default = "default_true")]
8403    pub inventory_p2p_o2c: bool,
8404
8405    /// Enable payment to bank reconciliation links.
8406    #[serde(default = "default_true")]
8407    pub payment_bank_reconciliation: bool,
8408
8409    /// Enable intercompany bilateral matching.
8410    #[serde(default = "default_true")]
8411    pub intercompany_bilateral: bool,
8412
8413    /// Percentage of GR/Deliveries to link via inventory (0.0 - 1.0).
8414    #[serde(default = "default_inventory_link_rate")]
8415    pub inventory_link_rate: f64,
8416}
8417
8418fn default_inventory_link_rate() -> f64 {
8419    0.30
8420}
8421
8422impl Default for CrossProcessLinksSchemaConfig {
8423    fn default() -> Self {
8424        Self {
8425            enabled: false,
8426            inventory_p2p_o2c: true,
8427            payment_bank_reconciliation: true,
8428            intercompany_bilateral: true,
8429            inventory_link_rate: 0.30,
8430        }
8431    }
8432}
8433
8434// =============================================================================
8435// Organizational Events Configuration
8436// =============================================================================
8437
8438/// Configuration for organizational events (acquisitions, divestitures, etc.).
8439#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8440pub struct OrganizationalEventsSchemaConfig {
8441    /// Enable organizational events.
8442    #[serde(default)]
8443    pub enabled: bool,
8444
8445    /// Effect blending mode (multiplicative, additive, maximum, minimum).
8446    #[serde(default)]
8447    pub effect_blending: EffectBlendingModeConfig,
8448
8449    /// Organizational events (acquisitions, divestitures, reorganizations, etc.).
8450    #[serde(default)]
8451    pub events: Vec<OrganizationalEventSchemaConfig>,
8452
8453    /// Process evolution events.
8454    #[serde(default)]
8455    pub process_evolution: Vec<ProcessEvolutionSchemaConfig>,
8456
8457    /// Technology transition events.
8458    #[serde(default)]
8459    pub technology_transitions: Vec<TechnologyTransitionSchemaConfig>,
8460}
8461
8462/// Effect blending mode for combining multiple event effects.
8463#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
8464#[serde(rename_all = "snake_case")]
8465pub enum EffectBlendingModeConfig {
8466    /// Multiply effects together.
8467    #[default]
8468    Multiplicative,
8469    /// Add effects together.
8470    Additive,
8471    /// Take the maximum effect.
8472    Maximum,
8473    /// Take the minimum effect.
8474    Minimum,
8475}
8476
8477/// Configuration for a single organizational event.
8478#[derive(Debug, Clone, Serialize, Deserialize)]
8479pub struct OrganizationalEventSchemaConfig {
8480    /// Event ID.
8481    pub id: String,
8482
8483    /// Event type and configuration.
8484    pub event_type: OrganizationalEventTypeSchemaConfig,
8485
8486    /// Effective date.
8487    pub effective_date: String,
8488
8489    /// Transition duration in months.
8490    #[serde(default = "default_org_transition_months")]
8491    pub transition_months: u32,
8492
8493    /// Description.
8494    #[serde(default)]
8495    pub description: Option<String>,
8496}
8497
8498fn default_org_transition_months() -> u32 {
8499    6
8500}
8501
8502/// Organizational event type configuration.
8503#[derive(Debug, Clone, Serialize, Deserialize)]
8504#[serde(tag = "type", rename_all = "snake_case")]
8505pub enum OrganizationalEventTypeSchemaConfig {
8506    /// Acquisition event.
8507    Acquisition {
8508        /// Acquired entity code.
8509        acquired_entity: String,
8510        /// Volume increase multiplier.
8511        #[serde(default = "default_acquisition_volume")]
8512        volume_increase: f64,
8513        /// Integration error rate.
8514        #[serde(default = "default_acquisition_error")]
8515        integration_error_rate: f64,
8516        /// Parallel posting days.
8517        #[serde(default = "default_parallel_days")]
8518        parallel_posting_days: u32,
8519    },
8520    /// Divestiture event.
8521    Divestiture {
8522        /// Divested entity code.
8523        divested_entity: String,
8524        /// Volume reduction factor.
8525        #[serde(default = "default_divestiture_volume")]
8526        volume_reduction: f64,
8527        /// Remove entity from generation.
8528        #[serde(default = "default_true_val")]
8529        remove_entity: bool,
8530    },
8531    /// Reorganization event.
8532    Reorganization {
8533        /// Cost center remapping.
8534        #[serde(default)]
8535        cost_center_remapping: std::collections::HashMap<String, String>,
8536        /// Transition error rate.
8537        #[serde(default = "default_reorg_error")]
8538        transition_error_rate: f64,
8539    },
8540    /// Leadership change event.
8541    LeadershipChange {
8542        /// Role that changed.
8543        role: String,
8544        /// Policy changes.
8545        #[serde(default)]
8546        policy_changes: Vec<String>,
8547    },
8548    /// Workforce reduction event.
8549    WorkforceReduction {
8550        /// Reduction percentage.
8551        #[serde(default = "default_workforce_reduction")]
8552        reduction_percent: f64,
8553        /// Error rate increase.
8554        #[serde(default = "default_workforce_error")]
8555        error_rate_increase: f64,
8556    },
8557    /// Merger event.
8558    Merger {
8559        /// Merged entity code.
8560        merged_entity: String,
8561        /// Volume increase multiplier.
8562        #[serde(default = "default_merger_volume")]
8563        volume_increase: f64,
8564    },
8565}
8566
8567fn default_acquisition_volume() -> f64 {
8568    1.35
8569}
8570
8571fn default_acquisition_error() -> f64 {
8572    0.05
8573}
8574
8575fn default_parallel_days() -> u32 {
8576    30
8577}
8578
8579fn default_divestiture_volume() -> f64 {
8580    0.70
8581}
8582
8583fn default_true_val() -> bool {
8584    true
8585}
8586
8587fn default_reorg_error() -> f64 {
8588    0.04
8589}
8590
8591fn default_workforce_reduction() -> f64 {
8592    0.10
8593}
8594
8595fn default_workforce_error() -> f64 {
8596    0.05
8597}
8598
8599fn default_merger_volume() -> f64 {
8600    1.80
8601}
8602
8603/// Configuration for a process evolution event.
8604#[derive(Debug, Clone, Serialize, Deserialize)]
8605pub struct ProcessEvolutionSchemaConfig {
8606    /// Event ID.
8607    pub id: String,
8608
8609    /// Event type.
8610    pub event_type: ProcessEvolutionTypeSchemaConfig,
8611
8612    /// Effective date.
8613    pub effective_date: String,
8614
8615    /// Description.
8616    #[serde(default)]
8617    pub description: Option<String>,
8618}
8619
8620/// Process evolution type configuration.
8621#[derive(Debug, Clone, Serialize, Deserialize)]
8622#[serde(tag = "type", rename_all = "snake_case")]
8623pub enum ProcessEvolutionTypeSchemaConfig {
8624    /// Process automation.
8625    ProcessAutomation {
8626        /// Process name.
8627        process_name: String,
8628        /// Manual rate before.
8629        #[serde(default = "default_manual_before")]
8630        manual_rate_before: f64,
8631        /// Manual rate after.
8632        #[serde(default = "default_manual_after")]
8633        manual_rate_after: f64,
8634    },
8635    /// Approval workflow change.
8636    ApprovalWorkflowChange {
8637        /// Description.
8638        description: String,
8639    },
8640    /// Control enhancement.
8641    ControlEnhancement {
8642        /// Control ID.
8643        control_id: String,
8644        /// Error reduction.
8645        #[serde(default = "default_error_reduction")]
8646        error_reduction: f64,
8647    },
8648}
8649
8650fn default_manual_before() -> f64 {
8651    0.80
8652}
8653
8654fn default_manual_after() -> f64 {
8655    0.15
8656}
8657
8658fn default_error_reduction() -> f64 {
8659    0.02
8660}
8661
8662/// Configuration for a technology transition event.
8663#[derive(Debug, Clone, Serialize, Deserialize)]
8664pub struct TechnologyTransitionSchemaConfig {
8665    /// Event ID.
8666    pub id: String,
8667
8668    /// Event type.
8669    pub event_type: TechnologyTransitionTypeSchemaConfig,
8670
8671    /// Description.
8672    #[serde(default)]
8673    pub description: Option<String>,
8674}
8675
8676/// Technology transition type configuration.
8677#[derive(Debug, Clone, Serialize, Deserialize)]
8678#[serde(tag = "type", rename_all = "snake_case")]
8679pub enum TechnologyTransitionTypeSchemaConfig {
8680    /// ERP migration.
8681    ErpMigration {
8682        /// Source system.
8683        source_system: String,
8684        /// Target system.
8685        target_system: String,
8686        /// Cutover date.
8687        cutover_date: String,
8688        /// Stabilization end date.
8689        stabilization_end: String,
8690        /// Duplicate rate during migration.
8691        #[serde(default = "default_erp_duplicate_rate")]
8692        duplicate_rate: f64,
8693        /// Format mismatch rate.
8694        #[serde(default = "default_format_mismatch")]
8695        format_mismatch_rate: f64,
8696    },
8697    /// Module implementation.
8698    ModuleImplementation {
8699        /// Module name.
8700        module_name: String,
8701        /// Go-live date.
8702        go_live_date: String,
8703    },
8704}
8705
8706fn default_erp_duplicate_rate() -> f64 {
8707    0.02
8708}
8709
8710fn default_format_mismatch() -> f64 {
8711    0.03
8712}
8713
8714// =============================================================================
8715// Behavioral Drift Configuration
8716// =============================================================================
8717
8718/// Configuration for behavioral drift (vendor, customer, employee behavior).
8719///
8720/// **Deprecated (v4.1.2):** this schema section is currently
8721/// validated-but-inert — no runtime code consumes its fields. Users
8722/// who want behavioral drift-style effects should reach for
8723/// `distributions.regime_changes` (v3.5.2+), which drives the
8724/// `DriftController` via the parameter-drift path. The schema type
8725/// remains for backward-compatible YAML loading; it will be removed
8726/// in a future major version once `regime_changes` gains per-entity
8727/// (vendor / customer / employee) targeting.
8728#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8729pub struct BehavioralDriftSchemaConfig {
8730    /// Enable behavioral drift.
8731    #[serde(default)]
8732    pub enabled: bool,
8733
8734    /// Vendor behavior drift.
8735    #[serde(default)]
8736    pub vendor_behavior: VendorBehaviorSchemaConfig,
8737
8738    /// Customer behavior drift.
8739    #[serde(default)]
8740    pub customer_behavior: CustomerBehaviorSchemaConfig,
8741
8742    /// Employee behavior drift.
8743    #[serde(default)]
8744    pub employee_behavior: EmployeeBehaviorSchemaConfig,
8745
8746    /// Collective behavior drift.
8747    #[serde(default)]
8748    pub collective: CollectiveBehaviorSchemaConfig,
8749}
8750
8751/// Vendor behavior drift configuration.
8752#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8753pub struct VendorBehaviorSchemaConfig {
8754    /// Payment terms drift.
8755    #[serde(default)]
8756    pub payment_terms_drift: PaymentTermsDriftSchemaConfig,
8757
8758    /// Quality drift.
8759    #[serde(default)]
8760    pub quality_drift: QualityDriftSchemaConfig,
8761}
8762
8763/// Payment terms drift configuration.
8764#[derive(Debug, Clone, Serialize, Deserialize)]
8765pub struct PaymentTermsDriftSchemaConfig {
8766    /// Extension rate per year (days).
8767    #[serde(default = "default_extension_rate")]
8768    pub extension_rate_per_year: f64,
8769
8770    /// Economic sensitivity.
8771    #[serde(default = "default_economic_sensitivity")]
8772    pub economic_sensitivity: f64,
8773}
8774
8775fn default_extension_rate() -> f64 {
8776    2.5
8777}
8778
8779fn default_economic_sensitivity() -> f64 {
8780    1.0
8781}
8782
8783impl Default for PaymentTermsDriftSchemaConfig {
8784    fn default() -> Self {
8785        Self {
8786            extension_rate_per_year: 2.5,
8787            economic_sensitivity: 1.0,
8788        }
8789    }
8790}
8791
8792/// Quality drift configuration.
8793#[derive(Debug, Clone, Serialize, Deserialize)]
8794pub struct QualityDriftSchemaConfig {
8795    /// New vendor improvement rate (per year).
8796    #[serde(default = "default_improvement_rate")]
8797    pub new_vendor_improvement_rate: f64,
8798
8799    /// Complacency decline rate (per year after first year).
8800    #[serde(default = "default_decline_rate")]
8801    pub complacency_decline_rate: f64,
8802}
8803
8804fn default_improvement_rate() -> f64 {
8805    0.02
8806}
8807
8808fn default_decline_rate() -> f64 {
8809    0.01
8810}
8811
8812impl Default for QualityDriftSchemaConfig {
8813    fn default() -> Self {
8814        Self {
8815            new_vendor_improvement_rate: 0.02,
8816            complacency_decline_rate: 0.01,
8817        }
8818    }
8819}
8820
8821/// Customer behavior drift configuration.
8822#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8823pub struct CustomerBehaviorSchemaConfig {
8824    /// Payment drift.
8825    #[serde(default)]
8826    pub payment_drift: CustomerPaymentDriftSchemaConfig,
8827
8828    /// Order drift.
8829    #[serde(default)]
8830    pub order_drift: OrderDriftSchemaConfig,
8831}
8832
8833/// Customer payment drift configuration.
8834#[derive(Debug, Clone, Serialize, Deserialize)]
8835pub struct CustomerPaymentDriftSchemaConfig {
8836    /// Days extension during downturn (min, max).
8837    #[serde(default = "default_downturn_extension")]
8838    pub downturn_days_extension: (u32, u32),
8839
8840    /// Bad debt increase during downturn.
8841    #[serde(default = "default_bad_debt_increase")]
8842    pub downturn_bad_debt_increase: f64,
8843}
8844
8845fn default_downturn_extension() -> (u32, u32) {
8846    (5, 15)
8847}
8848
8849fn default_bad_debt_increase() -> f64 {
8850    0.02
8851}
8852
8853impl Default for CustomerPaymentDriftSchemaConfig {
8854    fn default() -> Self {
8855        Self {
8856            downturn_days_extension: (5, 15),
8857            downturn_bad_debt_increase: 0.02,
8858        }
8859    }
8860}
8861
8862/// Order drift configuration.
8863#[derive(Debug, Clone, Serialize, Deserialize)]
8864pub struct OrderDriftSchemaConfig {
8865    /// Digital shift rate (per year).
8866    #[serde(default = "default_digital_shift")]
8867    pub digital_shift_rate: f64,
8868}
8869
8870fn default_digital_shift() -> f64 {
8871    0.05
8872}
8873
8874impl Default for OrderDriftSchemaConfig {
8875    fn default() -> Self {
8876        Self {
8877            digital_shift_rate: 0.05,
8878        }
8879    }
8880}
8881
8882/// Employee behavior drift configuration.
8883#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8884pub struct EmployeeBehaviorSchemaConfig {
8885    /// Approval drift.
8886    #[serde(default)]
8887    pub approval_drift: ApprovalDriftSchemaConfig,
8888
8889    /// Error drift.
8890    #[serde(default)]
8891    pub error_drift: ErrorDriftSchemaConfig,
8892}
8893
8894/// Approval drift configuration.
8895#[derive(Debug, Clone, Serialize, Deserialize)]
8896pub struct ApprovalDriftSchemaConfig {
8897    /// EOM intensity increase per year.
8898    #[serde(default = "default_eom_intensity")]
8899    pub eom_intensity_increase_per_year: f64,
8900
8901    /// Rubber stamp volume threshold.
8902    #[serde(default = "default_rubber_stamp")]
8903    pub rubber_stamp_volume_threshold: u32,
8904}
8905
8906fn default_eom_intensity() -> f64 {
8907    0.05
8908}
8909
8910fn default_rubber_stamp() -> u32 {
8911    50
8912}
8913
8914impl Default for ApprovalDriftSchemaConfig {
8915    fn default() -> Self {
8916        Self {
8917            eom_intensity_increase_per_year: 0.05,
8918            rubber_stamp_volume_threshold: 50,
8919        }
8920    }
8921}
8922
8923/// Error drift configuration.
8924#[derive(Debug, Clone, Serialize, Deserialize)]
8925pub struct ErrorDriftSchemaConfig {
8926    /// New employee error rate.
8927    #[serde(default = "default_new_error")]
8928    pub new_employee_error_rate: f64,
8929
8930    /// Learning curve months.
8931    #[serde(default = "default_learning_months")]
8932    pub learning_curve_months: u32,
8933}
8934
8935fn default_new_error() -> f64 {
8936    0.08
8937}
8938
8939fn default_learning_months() -> u32 {
8940    6
8941}
8942
8943impl Default for ErrorDriftSchemaConfig {
8944    fn default() -> Self {
8945        Self {
8946            new_employee_error_rate: 0.08,
8947            learning_curve_months: 6,
8948        }
8949    }
8950}
8951
8952/// Collective behavior drift configuration.
8953#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8954pub struct CollectiveBehaviorSchemaConfig {
8955    /// Automation adoption configuration.
8956    #[serde(default)]
8957    pub automation_adoption: AutomationAdoptionSchemaConfig,
8958}
8959
8960/// Automation adoption configuration.
8961#[derive(Debug, Clone, Serialize, Deserialize)]
8962pub struct AutomationAdoptionSchemaConfig {
8963    /// Enable S-curve adoption model.
8964    #[serde(default)]
8965    pub s_curve_enabled: bool,
8966
8967    /// Adoption midpoint in months.
8968    #[serde(default = "default_midpoint")]
8969    pub adoption_midpoint_months: u32,
8970
8971    /// Steepness of adoption curve.
8972    #[serde(default = "default_steepness")]
8973    pub steepness: f64,
8974}
8975
8976fn default_midpoint() -> u32 {
8977    24
8978}
8979
8980fn default_steepness() -> f64 {
8981    0.15
8982}
8983
8984impl Default for AutomationAdoptionSchemaConfig {
8985    fn default() -> Self {
8986        Self {
8987            s_curve_enabled: false,
8988            adoption_midpoint_months: 24,
8989            steepness: 0.15,
8990        }
8991    }
8992}
8993
8994// =============================================================================
8995// Market Drift Configuration
8996// =============================================================================
8997
8998/// Configuration for market drift (economic cycles, commodities, price shocks).
8999///
9000/// **Deprecated (v4.1.2):** validated-but-inert. Use
9001/// `distributions.regime_changes.economic_cycle` +
9002/// `distributions.regime_changes.parameter_drifts` for the
9003/// equivalent runtime behaviour (shipped in v3.5.2). The schema
9004/// type remains for backward-compatible YAML loading; will be
9005/// removed in v5.0.
9006#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9007pub struct MarketDriftSchemaConfig {
9008    /// Enable market drift.
9009    #[serde(default)]
9010    pub enabled: bool,
9011
9012    /// Economic cycle configuration.
9013    #[serde(default)]
9014    pub economic_cycle: MarketEconomicCycleSchemaConfig,
9015
9016    /// Industry-specific cycles.
9017    #[serde(default)]
9018    pub industry_cycles: std::collections::HashMap<String, IndustryCycleSchemaConfig>,
9019
9020    /// Commodity drift configuration.
9021    #[serde(default)]
9022    pub commodities: CommoditiesSchemaConfig,
9023}
9024
9025/// Market economic cycle configuration.
9026#[derive(Debug, Clone, Serialize, Deserialize)]
9027pub struct MarketEconomicCycleSchemaConfig {
9028    /// Enable economic cycle.
9029    #[serde(default)]
9030    pub enabled: bool,
9031
9032    /// Cycle type.
9033    #[serde(default)]
9034    pub cycle_type: CycleTypeSchemaConfig,
9035
9036    /// Cycle period in months.
9037    #[serde(default = "default_market_cycle_period")]
9038    pub period_months: u32,
9039
9040    /// Amplitude.
9041    #[serde(default = "default_market_amplitude")]
9042    pub amplitude: f64,
9043
9044    /// Recession configuration.
9045    #[serde(default)]
9046    pub recession: RecessionSchemaConfig,
9047}
9048
9049fn default_market_cycle_period() -> u32 {
9050    48
9051}
9052
9053fn default_market_amplitude() -> f64 {
9054    0.15
9055}
9056
9057impl Default for MarketEconomicCycleSchemaConfig {
9058    fn default() -> Self {
9059        Self {
9060            enabled: false,
9061            cycle_type: CycleTypeSchemaConfig::Sinusoidal,
9062            period_months: 48,
9063            amplitude: 0.15,
9064            recession: RecessionSchemaConfig::default(),
9065        }
9066    }
9067}
9068
9069/// Cycle type configuration.
9070#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9071#[serde(rename_all = "snake_case")]
9072pub enum CycleTypeSchemaConfig {
9073    /// Sinusoidal cycle.
9074    #[default]
9075    Sinusoidal,
9076    /// Asymmetric cycle.
9077    Asymmetric,
9078    /// Mean-reverting cycle.
9079    MeanReverting,
9080}
9081
9082/// Recession configuration.
9083#[derive(Debug, Clone, Serialize, Deserialize)]
9084pub struct RecessionSchemaConfig {
9085    /// Enable recession simulation.
9086    #[serde(default)]
9087    pub enabled: bool,
9088
9089    /// Probability per year.
9090    #[serde(default = "default_recession_prob")]
9091    pub probability_per_year: f64,
9092
9093    /// Severity.
9094    #[serde(default)]
9095    pub severity: RecessionSeveritySchemaConfig,
9096
9097    /// Specific recession periods.
9098    #[serde(default)]
9099    pub recession_periods: Vec<RecessionPeriodSchemaConfig>,
9100}
9101
9102fn default_recession_prob() -> f64 {
9103    0.10
9104}
9105
9106impl Default for RecessionSchemaConfig {
9107    fn default() -> Self {
9108        Self {
9109            enabled: false,
9110            probability_per_year: 0.10,
9111            severity: RecessionSeveritySchemaConfig::Moderate,
9112            recession_periods: Vec::new(),
9113        }
9114    }
9115}
9116
9117/// Recession severity configuration.
9118#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9119#[serde(rename_all = "snake_case")]
9120pub enum RecessionSeveritySchemaConfig {
9121    /// Mild recession.
9122    Mild,
9123    /// Moderate recession.
9124    #[default]
9125    Moderate,
9126    /// Severe recession.
9127    Severe,
9128}
9129
9130/// Recession period configuration.
9131#[derive(Debug, Clone, Serialize, Deserialize)]
9132pub struct RecessionPeriodSchemaConfig {
9133    /// Start month.
9134    pub start_month: u32,
9135    /// Duration in months.
9136    pub duration_months: u32,
9137}
9138
9139/// Industry cycle configuration.
9140#[derive(Debug, Clone, Serialize, Deserialize)]
9141pub struct IndustryCycleSchemaConfig {
9142    /// Period in months.
9143    #[serde(default = "default_industry_period")]
9144    pub period_months: u32,
9145
9146    /// Amplitude.
9147    #[serde(default = "default_industry_amp")]
9148    pub amplitude: f64,
9149}
9150
9151fn default_industry_period() -> u32 {
9152    36
9153}
9154
9155fn default_industry_amp() -> f64 {
9156    0.20
9157}
9158
9159/// Commodities drift configuration.
9160#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9161pub struct CommoditiesSchemaConfig {
9162    /// Enable commodity drift.
9163    #[serde(default)]
9164    pub enabled: bool,
9165
9166    /// Commodity items.
9167    #[serde(default)]
9168    pub items: Vec<CommodityItemSchemaConfig>,
9169}
9170
9171/// Commodity item configuration.
9172#[derive(Debug, Clone, Serialize, Deserialize)]
9173pub struct CommodityItemSchemaConfig {
9174    /// Commodity name.
9175    pub name: String,
9176
9177    /// Volatility.
9178    #[serde(default = "default_volatility")]
9179    pub volatility: f64,
9180
9181    /// COGS pass-through.
9182    #[serde(default)]
9183    pub cogs_pass_through: f64,
9184
9185    /// Overhead pass-through.
9186    #[serde(default)]
9187    pub overhead_pass_through: f64,
9188}
9189
9190fn default_volatility() -> f64 {
9191    0.20
9192}
9193
9194// =============================================================================
9195// Drift Labeling Configuration
9196// =============================================================================
9197
9198/// Configuration for drift ground truth labeling.
9199///
9200/// **Deprecated (v4.1.2):** validated-but-inert. The v3.3.0
9201/// analytics-metadata phase (`DriftEventGenerator` +
9202/// `AnalyticsMetadataSnapshot.drift_events`) produces drift labels
9203/// at runtime — configure it via `analytics_metadata.drift_events`
9204/// instead. The schema type remains for backward-compatible YAML
9205/// loading; will be removed in v5.0.
9206#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9207pub struct DriftLabelingSchemaConfig {
9208    /// Enable drift labeling.
9209    #[serde(default)]
9210    pub enabled: bool,
9211
9212    /// Statistical drift labeling.
9213    #[serde(default)]
9214    pub statistical: StatisticalDriftLabelingSchemaConfig,
9215
9216    /// Categorical drift labeling.
9217    #[serde(default)]
9218    pub categorical: CategoricalDriftLabelingSchemaConfig,
9219
9220    /// Temporal drift labeling.
9221    #[serde(default)]
9222    pub temporal: TemporalDriftLabelingSchemaConfig,
9223
9224    /// Regulatory calendar preset.
9225    #[serde(default)]
9226    pub regulatory_calendar_preset: Option<String>,
9227}
9228
9229/// Statistical drift labeling configuration.
9230#[derive(Debug, Clone, Serialize, Deserialize)]
9231pub struct StatisticalDriftLabelingSchemaConfig {
9232    /// Enable statistical drift labeling.
9233    #[serde(default = "default_true_val")]
9234    pub enabled: bool,
9235
9236    /// Minimum magnitude threshold.
9237    #[serde(default = "default_min_magnitude")]
9238    pub min_magnitude_threshold: f64,
9239}
9240
9241fn default_min_magnitude() -> f64 {
9242    0.05
9243}
9244
9245impl Default for StatisticalDriftLabelingSchemaConfig {
9246    fn default() -> Self {
9247        Self {
9248            enabled: true,
9249            min_magnitude_threshold: 0.05,
9250        }
9251    }
9252}
9253
9254/// Categorical drift labeling configuration.
9255#[derive(Debug, Clone, Serialize, Deserialize)]
9256pub struct CategoricalDriftLabelingSchemaConfig {
9257    /// Enable categorical drift labeling.
9258    #[serde(default = "default_true_val")]
9259    pub enabled: bool,
9260}
9261
9262impl Default for CategoricalDriftLabelingSchemaConfig {
9263    fn default() -> Self {
9264        Self { enabled: true }
9265    }
9266}
9267
9268/// Temporal drift labeling configuration.
9269#[derive(Debug, Clone, Serialize, Deserialize)]
9270pub struct TemporalDriftLabelingSchemaConfig {
9271    /// Enable temporal drift labeling.
9272    #[serde(default = "default_true_val")]
9273    pub enabled: bool,
9274}
9275
9276impl Default for TemporalDriftLabelingSchemaConfig {
9277    fn default() -> Self {
9278        Self { enabled: true }
9279    }
9280}
9281
9282// =============================================================================
9283// Enhanced Anomaly Injection Configuration
9284// =============================================================================
9285
9286/// Enhanced anomaly injection configuration.
9287///
9288/// Provides comprehensive anomaly injection capabilities including:
9289/// - Multi-stage fraud schemes (embezzlement, revenue manipulation, kickbacks)
9290/// - Correlated anomaly injection (co-occurrence patterns, error cascades)
9291/// - Near-miss generation for false positive reduction
9292/// - Detection difficulty classification
9293/// - Context-aware injection based on entity behavior
9294#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9295pub struct EnhancedAnomalyConfig {
9296    /// Enable enhanced anomaly injection.
9297    #[serde(default)]
9298    pub enabled: bool,
9299
9300    /// Base anomaly rates.
9301    #[serde(default)]
9302    pub rates: AnomalyRateConfig,
9303
9304    /// Multi-stage fraud scheme configuration.
9305    #[serde(default)]
9306    pub multi_stage_schemes: MultiStageSchemeConfig,
9307
9308    /// Correlated anomaly injection configuration.
9309    #[serde(default)]
9310    pub correlated_injection: CorrelatedInjectionConfig,
9311
9312    /// Near-miss generation configuration.
9313    #[serde(default)]
9314    pub near_miss: NearMissConfig,
9315
9316    /// Detection difficulty classification configuration.
9317    #[serde(default)]
9318    pub difficulty_classification: DifficultyClassificationConfig,
9319
9320    /// Context-aware injection configuration.
9321    #[serde(default)]
9322    pub context_aware: ContextAwareConfig,
9323
9324    /// Enhanced labeling configuration.
9325    #[serde(default)]
9326    pub labeling: EnhancedLabelingConfig,
9327}
9328
9329/// Base anomaly rate configuration.
9330#[derive(Debug, Clone, Serialize, Deserialize)]
9331pub struct AnomalyRateConfig {
9332    /// Total anomaly rate (0.0 to 1.0).
9333    #[serde(default = "default_total_anomaly_rate")]
9334    pub total_rate: f64,
9335
9336    /// Fraud anomaly rate.
9337    #[serde(default = "default_fraud_anomaly_rate")]
9338    pub fraud_rate: f64,
9339
9340    /// Error anomaly rate.
9341    #[serde(default = "default_error_anomaly_rate")]
9342    pub error_rate: f64,
9343
9344    /// Process issue rate.
9345    #[serde(default = "default_process_anomaly_rate")]
9346    pub process_rate: f64,
9347}
9348
9349fn default_total_anomaly_rate() -> f64 {
9350    0.03
9351}
9352fn default_fraud_anomaly_rate() -> f64 {
9353    0.01
9354}
9355fn default_error_anomaly_rate() -> f64 {
9356    0.015
9357}
9358fn default_process_anomaly_rate() -> f64 {
9359    0.005
9360}
9361
9362impl Default for AnomalyRateConfig {
9363    fn default() -> Self {
9364        Self {
9365            total_rate: default_total_anomaly_rate(),
9366            fraud_rate: default_fraud_anomaly_rate(),
9367            error_rate: default_error_anomaly_rate(),
9368            process_rate: default_process_anomaly_rate(),
9369        }
9370    }
9371}
9372
9373/// Multi-stage fraud scheme configuration.
9374#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9375pub struct MultiStageSchemeConfig {
9376    /// Enable multi-stage fraud schemes.
9377    #[serde(default)]
9378    pub enabled: bool,
9379
9380    /// Embezzlement scheme configuration.
9381    #[serde(default)]
9382    pub embezzlement: EmbezzlementSchemeConfig,
9383
9384    /// Revenue manipulation scheme configuration.
9385    #[serde(default)]
9386    pub revenue_manipulation: RevenueManipulationSchemeConfig,
9387
9388    /// Vendor kickback scheme configuration.
9389    #[serde(default)]
9390    pub kickback: KickbackSchemeConfig,
9391}
9392
9393/// Embezzlement scheme configuration.
9394#[derive(Debug, Clone, Serialize, Deserialize)]
9395pub struct EmbezzlementSchemeConfig {
9396    /// Probability of starting an embezzlement scheme per perpetrator per year.
9397    #[serde(default = "default_embezzlement_probability")]
9398    pub probability: f64,
9399
9400    /// Testing stage configuration.
9401    #[serde(default)]
9402    pub testing_stage: SchemeStageConfig,
9403
9404    /// Escalation stage configuration.
9405    #[serde(default)]
9406    pub escalation_stage: SchemeStageConfig,
9407
9408    /// Acceleration stage configuration.
9409    #[serde(default)]
9410    pub acceleration_stage: SchemeStageConfig,
9411
9412    /// Desperation stage configuration.
9413    #[serde(default)]
9414    pub desperation_stage: SchemeStageConfig,
9415}
9416
9417fn default_embezzlement_probability() -> f64 {
9418    0.02
9419}
9420
9421impl Default for EmbezzlementSchemeConfig {
9422    fn default() -> Self {
9423        Self {
9424            probability: default_embezzlement_probability(),
9425            testing_stage: SchemeStageConfig {
9426                duration_months: 2,
9427                amount_min: 100.0,
9428                amount_max: 500.0,
9429                transaction_count_min: 2,
9430                transaction_count_max: 5,
9431                difficulty: "hard".to_string(),
9432            },
9433            escalation_stage: SchemeStageConfig {
9434                duration_months: 6,
9435                amount_min: 500.0,
9436                amount_max: 2000.0,
9437                transaction_count_min: 3,
9438                transaction_count_max: 8,
9439                difficulty: "moderate".to_string(),
9440            },
9441            acceleration_stage: SchemeStageConfig {
9442                duration_months: 3,
9443                amount_min: 2000.0,
9444                amount_max: 10000.0,
9445                transaction_count_min: 5,
9446                transaction_count_max: 12,
9447                difficulty: "easy".to_string(),
9448            },
9449            desperation_stage: SchemeStageConfig {
9450                duration_months: 1,
9451                amount_min: 10000.0,
9452                amount_max: 50000.0,
9453                transaction_count_min: 3,
9454                transaction_count_max: 6,
9455                difficulty: "trivial".to_string(),
9456            },
9457        }
9458    }
9459}
9460
9461/// Revenue manipulation scheme configuration.
9462#[derive(Debug, Clone, Serialize, Deserialize)]
9463pub struct RevenueManipulationSchemeConfig {
9464    /// Probability of starting a revenue manipulation scheme per period.
9465    #[serde(default = "default_revenue_manipulation_probability")]
9466    pub probability: f64,
9467
9468    /// Early revenue recognition inflation target (Q4).
9469    #[serde(default = "default_early_recognition_target")]
9470    pub early_recognition_target: f64,
9471
9472    /// Expense deferral inflation target (Q1).
9473    #[serde(default = "default_expense_deferral_target")]
9474    pub expense_deferral_target: f64,
9475
9476    /// Reserve release inflation target (Q2).
9477    #[serde(default = "default_reserve_release_target")]
9478    pub reserve_release_target: f64,
9479
9480    /// Channel stuffing inflation target (Q4).
9481    #[serde(default = "default_channel_stuffing_target")]
9482    pub channel_stuffing_target: f64,
9483}
9484
9485fn default_revenue_manipulation_probability() -> f64 {
9486    0.01
9487}
9488fn default_early_recognition_target() -> f64 {
9489    0.02
9490}
9491fn default_expense_deferral_target() -> f64 {
9492    0.03
9493}
9494fn default_reserve_release_target() -> f64 {
9495    0.02
9496}
9497fn default_channel_stuffing_target() -> f64 {
9498    0.05
9499}
9500
9501impl Default for RevenueManipulationSchemeConfig {
9502    fn default() -> Self {
9503        Self {
9504            probability: default_revenue_manipulation_probability(),
9505            early_recognition_target: default_early_recognition_target(),
9506            expense_deferral_target: default_expense_deferral_target(),
9507            reserve_release_target: default_reserve_release_target(),
9508            channel_stuffing_target: default_channel_stuffing_target(),
9509        }
9510    }
9511}
9512
9513/// Vendor kickback scheme configuration.
9514#[derive(Debug, Clone, Serialize, Deserialize)]
9515pub struct KickbackSchemeConfig {
9516    /// Probability of starting a kickback scheme.
9517    #[serde(default = "default_kickback_probability")]
9518    pub probability: f64,
9519
9520    /// Minimum price inflation percentage.
9521    #[serde(default = "default_kickback_inflation_min")]
9522    pub inflation_min: f64,
9523
9524    /// Maximum price inflation percentage.
9525    #[serde(default = "default_kickback_inflation_max")]
9526    pub inflation_max: f64,
9527
9528    /// Kickback percentage (of inflation).
9529    #[serde(default = "default_kickback_percent")]
9530    pub kickback_percent: f64,
9531
9532    /// Setup duration in months.
9533    #[serde(default = "default_kickback_setup_months")]
9534    pub setup_months: u32,
9535
9536    /// Main operation duration in months.
9537    #[serde(default = "default_kickback_operation_months")]
9538    pub operation_months: u32,
9539}
9540
9541fn default_kickback_probability() -> f64 {
9542    0.01
9543}
9544fn default_kickback_inflation_min() -> f64 {
9545    0.10
9546}
9547fn default_kickback_inflation_max() -> f64 {
9548    0.25
9549}
9550fn default_kickback_percent() -> f64 {
9551    0.50
9552}
9553fn default_kickback_setup_months() -> u32 {
9554    3
9555}
9556fn default_kickback_operation_months() -> u32 {
9557    12
9558}
9559
9560impl Default for KickbackSchemeConfig {
9561    fn default() -> Self {
9562        Self {
9563            probability: default_kickback_probability(),
9564            inflation_min: default_kickback_inflation_min(),
9565            inflation_max: default_kickback_inflation_max(),
9566            kickback_percent: default_kickback_percent(),
9567            setup_months: default_kickback_setup_months(),
9568            operation_months: default_kickback_operation_months(),
9569        }
9570    }
9571}
9572
9573/// Individual scheme stage configuration.
9574#[derive(Debug, Clone, Serialize, Deserialize)]
9575pub struct SchemeStageConfig {
9576    /// Duration in months.
9577    pub duration_months: u32,
9578
9579    /// Minimum transaction amount.
9580    pub amount_min: f64,
9581
9582    /// Maximum transaction amount.
9583    pub amount_max: f64,
9584
9585    /// Minimum number of transactions.
9586    pub transaction_count_min: u32,
9587
9588    /// Maximum number of transactions.
9589    pub transaction_count_max: u32,
9590
9591    /// Detection difficulty level (trivial, easy, moderate, hard, expert).
9592    pub difficulty: String,
9593}
9594
9595impl Default for SchemeStageConfig {
9596    fn default() -> Self {
9597        Self {
9598            duration_months: 3,
9599            amount_min: 100.0,
9600            amount_max: 1000.0,
9601            transaction_count_min: 2,
9602            transaction_count_max: 10,
9603            difficulty: "moderate".to_string(),
9604        }
9605    }
9606}
9607
9608/// Correlated anomaly injection configuration.
9609#[derive(Debug, Clone, Serialize, Deserialize)]
9610pub struct CorrelatedInjectionConfig {
9611    /// Enable correlated anomaly injection.
9612    #[serde(default)]
9613    pub enabled: bool,
9614
9615    /// Enable fraud concealment co-occurrence patterns.
9616    #[serde(default = "default_true_val")]
9617    pub fraud_concealment: bool,
9618
9619    /// Enable error cascade patterns.
9620    #[serde(default = "default_true_val")]
9621    pub error_cascade: bool,
9622
9623    /// Enable temporal clustering (period-end spikes).
9624    #[serde(default = "default_true_val")]
9625    pub temporal_clustering: bool,
9626
9627    /// Temporal clustering configuration.
9628    #[serde(default)]
9629    pub temporal_clustering_config: TemporalClusteringConfig,
9630
9631    /// Co-occurrence patterns.
9632    #[serde(default)]
9633    pub co_occurrence_patterns: Vec<CoOccurrencePatternConfig>,
9634}
9635
9636impl Default for CorrelatedInjectionConfig {
9637    fn default() -> Self {
9638        Self {
9639            enabled: false,
9640            fraud_concealment: true,
9641            error_cascade: true,
9642            temporal_clustering: true,
9643            temporal_clustering_config: TemporalClusteringConfig::default(),
9644            co_occurrence_patterns: Vec::new(),
9645        }
9646    }
9647}
9648
9649/// Temporal clustering configuration.
9650#[derive(Debug, Clone, Serialize, Deserialize)]
9651pub struct TemporalClusteringConfig {
9652    /// Period-end error multiplier.
9653    #[serde(default = "default_period_end_multiplier")]
9654    pub period_end_multiplier: f64,
9655
9656    /// Number of business days before period end to apply multiplier.
9657    #[serde(default = "default_period_end_days")]
9658    pub period_end_days: u32,
9659
9660    /// Quarter-end additional multiplier.
9661    #[serde(default = "default_quarter_end_multiplier")]
9662    pub quarter_end_multiplier: f64,
9663
9664    /// Year-end additional multiplier.
9665    #[serde(default = "default_year_end_multiplier")]
9666    pub year_end_multiplier: f64,
9667}
9668
9669fn default_period_end_multiplier() -> f64 {
9670    2.5
9671}
9672fn default_period_end_days() -> u32 {
9673    5
9674}
9675fn default_quarter_end_multiplier() -> f64 {
9676    1.5
9677}
9678fn default_year_end_multiplier() -> f64 {
9679    2.0
9680}
9681
9682impl Default for TemporalClusteringConfig {
9683    fn default() -> Self {
9684        Self {
9685            period_end_multiplier: default_period_end_multiplier(),
9686            period_end_days: default_period_end_days(),
9687            quarter_end_multiplier: default_quarter_end_multiplier(),
9688            year_end_multiplier: default_year_end_multiplier(),
9689        }
9690    }
9691}
9692
9693/// Co-occurrence pattern configuration.
9694#[derive(Debug, Clone, Serialize, Deserialize)]
9695pub struct CoOccurrencePatternConfig {
9696    /// Pattern name.
9697    pub name: String,
9698
9699    /// Primary anomaly type that triggers the pattern.
9700    pub primary_type: String,
9701
9702    /// Correlated anomalies.
9703    pub correlated: Vec<CorrelatedAnomalyConfig>,
9704}
9705
9706/// Correlated anomaly configuration.
9707#[derive(Debug, Clone, Serialize, Deserialize)]
9708pub struct CorrelatedAnomalyConfig {
9709    /// Anomaly type.
9710    pub anomaly_type: String,
9711
9712    /// Probability of occurrence (0.0 to 1.0).
9713    pub probability: f64,
9714
9715    /// Minimum lag in days.
9716    pub lag_days_min: i32,
9717
9718    /// Maximum lag in days.
9719    pub lag_days_max: i32,
9720}
9721
9722/// Near-miss generation configuration.
9723#[derive(Debug, Clone, Serialize, Deserialize)]
9724pub struct NearMissConfig {
9725    /// Enable near-miss generation.
9726    #[serde(default)]
9727    pub enabled: bool,
9728
9729    /// Proportion of "anomalies" that are actually near-misses (0.0 to 1.0).
9730    #[serde(default = "default_near_miss_proportion")]
9731    pub proportion: f64,
9732
9733    /// Enable near-duplicate pattern.
9734    #[serde(default = "default_true_val")]
9735    pub near_duplicate: bool,
9736
9737    /// Near-duplicate date difference range in days.
9738    #[serde(default)]
9739    pub near_duplicate_days: NearDuplicateDaysConfig,
9740
9741    /// Enable threshold proximity pattern.
9742    #[serde(default = "default_true_val")]
9743    pub threshold_proximity: bool,
9744
9745    /// Threshold proximity range (e.g., 0.90-0.99 of threshold).
9746    #[serde(default)]
9747    pub threshold_proximity_range: ThresholdProximityRangeConfig,
9748
9749    /// Enable unusual but legitimate patterns.
9750    #[serde(default = "default_true_val")]
9751    pub unusual_legitimate: bool,
9752
9753    /// Types of unusual legitimate patterns to generate.
9754    #[serde(default = "default_unusual_legitimate_types")]
9755    pub unusual_legitimate_types: Vec<String>,
9756
9757    /// Enable corrected error patterns.
9758    #[serde(default = "default_true_val")]
9759    pub corrected_errors: bool,
9760
9761    /// Corrected error correction lag range in days.
9762    #[serde(default)]
9763    pub corrected_error_lag: CorrectedErrorLagConfig,
9764}
9765
9766fn default_near_miss_proportion() -> f64 {
9767    0.30
9768}
9769
9770fn default_unusual_legitimate_types() -> Vec<String> {
9771    vec![
9772        "year_end_bonus".to_string(),
9773        "contract_prepayment".to_string(),
9774        "insurance_claim".to_string(),
9775        "settlement_payment".to_string(),
9776    ]
9777}
9778
9779impl Default for NearMissConfig {
9780    fn default() -> Self {
9781        Self {
9782            enabled: false,
9783            proportion: default_near_miss_proportion(),
9784            near_duplicate: true,
9785            near_duplicate_days: NearDuplicateDaysConfig::default(),
9786            threshold_proximity: true,
9787            threshold_proximity_range: ThresholdProximityRangeConfig::default(),
9788            unusual_legitimate: true,
9789            unusual_legitimate_types: default_unusual_legitimate_types(),
9790            corrected_errors: true,
9791            corrected_error_lag: CorrectedErrorLagConfig::default(),
9792        }
9793    }
9794}
9795
9796/// Near-duplicate days configuration.
9797#[derive(Debug, Clone, Serialize, Deserialize)]
9798pub struct NearDuplicateDaysConfig {
9799    /// Minimum days apart.
9800    #[serde(default = "default_near_duplicate_min")]
9801    pub min: u32,
9802
9803    /// Maximum days apart.
9804    #[serde(default = "default_near_duplicate_max")]
9805    pub max: u32,
9806}
9807
9808fn default_near_duplicate_min() -> u32 {
9809    1
9810}
9811fn default_near_duplicate_max() -> u32 {
9812    3
9813}
9814
9815impl Default for NearDuplicateDaysConfig {
9816    fn default() -> Self {
9817        Self {
9818            min: default_near_duplicate_min(),
9819            max: default_near_duplicate_max(),
9820        }
9821    }
9822}
9823
9824/// Threshold proximity range configuration.
9825#[derive(Debug, Clone, Serialize, Deserialize)]
9826pub struct ThresholdProximityRangeConfig {
9827    /// Minimum proximity (e.g., 0.90 = 90% of threshold).
9828    #[serde(default = "default_threshold_proximity_min")]
9829    pub min: f64,
9830
9831    /// Maximum proximity (e.g., 0.99 = 99% of threshold).
9832    #[serde(default = "default_threshold_proximity_max")]
9833    pub max: f64,
9834}
9835
9836fn default_threshold_proximity_min() -> f64 {
9837    0.90
9838}
9839fn default_threshold_proximity_max() -> f64 {
9840    0.99
9841}
9842
9843impl Default for ThresholdProximityRangeConfig {
9844    fn default() -> Self {
9845        Self {
9846            min: default_threshold_proximity_min(),
9847            max: default_threshold_proximity_max(),
9848        }
9849    }
9850}
9851
9852/// Corrected error lag configuration.
9853#[derive(Debug, Clone, Serialize, Deserialize)]
9854pub struct CorrectedErrorLagConfig {
9855    /// Minimum correction lag in days.
9856    #[serde(default = "default_corrected_error_lag_min")]
9857    pub min: u32,
9858
9859    /// Maximum correction lag in days.
9860    #[serde(default = "default_corrected_error_lag_max")]
9861    pub max: u32,
9862}
9863
9864fn default_corrected_error_lag_min() -> u32 {
9865    1
9866}
9867fn default_corrected_error_lag_max() -> u32 {
9868    5
9869}
9870
9871impl Default for CorrectedErrorLagConfig {
9872    fn default() -> Self {
9873        Self {
9874            min: default_corrected_error_lag_min(),
9875            max: default_corrected_error_lag_max(),
9876        }
9877    }
9878}
9879
9880/// Detection difficulty classification configuration.
9881#[derive(Debug, Clone, Serialize, Deserialize)]
9882pub struct DifficultyClassificationConfig {
9883    /// Enable detection difficulty classification.
9884    #[serde(default)]
9885    pub enabled: bool,
9886
9887    /// Target distribution of difficulty levels.
9888    #[serde(default)]
9889    pub target_distribution: DifficultyDistributionConfig,
9890}
9891
9892impl Default for DifficultyClassificationConfig {
9893    fn default() -> Self {
9894        Self {
9895            enabled: true,
9896            target_distribution: DifficultyDistributionConfig::default(),
9897        }
9898    }
9899}
9900
9901/// Target distribution of detection difficulty levels.
9902#[derive(Debug, Clone, Serialize, Deserialize)]
9903pub struct DifficultyDistributionConfig {
9904    /// Proportion of trivial anomalies (expected 99% detection).
9905    #[serde(default = "default_difficulty_trivial")]
9906    pub trivial: f64,
9907
9908    /// Proportion of easy anomalies (expected 90% detection).
9909    #[serde(default = "default_difficulty_easy")]
9910    pub easy: f64,
9911
9912    /// Proportion of moderate anomalies (expected 70% detection).
9913    #[serde(default = "default_difficulty_moderate")]
9914    pub moderate: f64,
9915
9916    /// Proportion of hard anomalies (expected 40% detection).
9917    #[serde(default = "default_difficulty_hard")]
9918    pub hard: f64,
9919
9920    /// Proportion of expert anomalies (expected 15% detection).
9921    #[serde(default = "default_difficulty_expert")]
9922    pub expert: f64,
9923}
9924
9925fn default_difficulty_trivial() -> f64 {
9926    0.15
9927}
9928fn default_difficulty_easy() -> f64 {
9929    0.25
9930}
9931fn default_difficulty_moderate() -> f64 {
9932    0.30
9933}
9934fn default_difficulty_hard() -> f64 {
9935    0.20
9936}
9937fn default_difficulty_expert() -> f64 {
9938    0.10
9939}
9940
9941impl Default for DifficultyDistributionConfig {
9942    fn default() -> Self {
9943        Self {
9944            trivial: default_difficulty_trivial(),
9945            easy: default_difficulty_easy(),
9946            moderate: default_difficulty_moderate(),
9947            hard: default_difficulty_hard(),
9948            expert: default_difficulty_expert(),
9949        }
9950    }
9951}
9952
9953/// Context-aware injection configuration.
9954#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9955pub struct ContextAwareConfig {
9956    /// Enable context-aware injection.
9957    #[serde(default)]
9958    pub enabled: bool,
9959
9960    /// Vendor-specific anomaly rules.
9961    #[serde(default)]
9962    pub vendor_rules: VendorAnomalyRulesConfig,
9963
9964    /// Employee-specific anomaly rules.
9965    #[serde(default)]
9966    pub employee_rules: EmployeeAnomalyRulesConfig,
9967
9968    /// Account-specific anomaly rules.
9969    #[serde(default)]
9970    pub account_rules: AccountAnomalyRulesConfig,
9971
9972    /// Behavioral baseline configuration.
9973    #[serde(default)]
9974    pub behavioral_baseline: BehavioralBaselineConfig,
9975}
9976
9977/// Vendor-specific anomaly rules configuration.
9978#[derive(Debug, Clone, Serialize, Deserialize)]
9979pub struct VendorAnomalyRulesConfig {
9980    /// Error rate multiplier for new vendors (< threshold days).
9981    #[serde(default = "default_new_vendor_multiplier")]
9982    pub new_vendor_error_multiplier: f64,
9983
9984    /// Days threshold for "new" vendor classification.
9985    #[serde(default = "default_new_vendor_threshold")]
9986    pub new_vendor_threshold_days: u32,
9987
9988    /// Error rate multiplier for international vendors.
9989    #[serde(default = "default_international_multiplier")]
9990    pub international_error_multiplier: f64,
9991
9992    /// Strategic vendor anomaly types (may differ from general vendors).
9993    #[serde(default = "default_strategic_vendor_types")]
9994    pub strategic_vendor_anomaly_types: Vec<String>,
9995}
9996
9997fn default_new_vendor_multiplier() -> f64 {
9998    2.5
9999}
10000fn default_new_vendor_threshold() -> u32 {
10001    90
10002}
10003fn default_international_multiplier() -> f64 {
10004    1.5
10005}
10006fn default_strategic_vendor_types() -> Vec<String> {
10007    vec![
10008        "pricing_dispute".to_string(),
10009        "contract_violation".to_string(),
10010    ]
10011}
10012
10013impl Default for VendorAnomalyRulesConfig {
10014    fn default() -> Self {
10015        Self {
10016            new_vendor_error_multiplier: default_new_vendor_multiplier(),
10017            new_vendor_threshold_days: default_new_vendor_threshold(),
10018            international_error_multiplier: default_international_multiplier(),
10019            strategic_vendor_anomaly_types: default_strategic_vendor_types(),
10020        }
10021    }
10022}
10023
10024/// Employee-specific anomaly rules configuration.
10025#[derive(Debug, Clone, Serialize, Deserialize)]
10026pub struct EmployeeAnomalyRulesConfig {
10027    /// Error rate for new employees (< threshold days).
10028    #[serde(default = "default_new_employee_rate")]
10029    pub new_employee_error_rate: f64,
10030
10031    /// Days threshold for "new" employee classification.
10032    #[serde(default = "default_new_employee_threshold")]
10033    pub new_employee_threshold_days: u32,
10034
10035    /// Transaction volume threshold for fatigue errors.
10036    #[serde(default = "default_volume_fatigue_threshold")]
10037    pub volume_fatigue_threshold: u32,
10038
10039    /// Error rate multiplier when primary approver is absent.
10040    #[serde(default = "default_coverage_multiplier")]
10041    pub coverage_error_multiplier: f64,
10042}
10043
10044fn default_new_employee_rate() -> f64 {
10045    0.05
10046}
10047fn default_new_employee_threshold() -> u32 {
10048    180
10049}
10050fn default_volume_fatigue_threshold() -> u32 {
10051    50
10052}
10053fn default_coverage_multiplier() -> f64 {
10054    1.8
10055}
10056
10057impl Default for EmployeeAnomalyRulesConfig {
10058    fn default() -> Self {
10059        Self {
10060            new_employee_error_rate: default_new_employee_rate(),
10061            new_employee_threshold_days: default_new_employee_threshold(),
10062            volume_fatigue_threshold: default_volume_fatigue_threshold(),
10063            coverage_error_multiplier: default_coverage_multiplier(),
10064        }
10065    }
10066}
10067
10068/// Account-specific anomaly rules configuration.
10069#[derive(Debug, Clone, Serialize, Deserialize)]
10070pub struct AccountAnomalyRulesConfig {
10071    /// Error rate multiplier for high-risk accounts.
10072    #[serde(default = "default_high_risk_multiplier")]
10073    pub high_risk_account_multiplier: f64,
10074
10075    /// Account codes considered high-risk.
10076    #[serde(default = "default_high_risk_accounts")]
10077    pub high_risk_accounts: Vec<String>,
10078
10079    /// Error rate multiplier for suspense accounts.
10080    #[serde(default = "default_suspense_multiplier")]
10081    pub suspense_account_multiplier: f64,
10082
10083    /// Account codes considered suspense accounts.
10084    #[serde(default = "default_suspense_accounts")]
10085    pub suspense_accounts: Vec<String>,
10086
10087    /// Error rate multiplier for intercompany accounts.
10088    #[serde(default = "default_intercompany_multiplier")]
10089    pub intercompany_account_multiplier: f64,
10090}
10091
10092fn default_high_risk_multiplier() -> f64 {
10093    2.0
10094}
10095fn default_high_risk_accounts() -> Vec<String> {
10096    vec![
10097        "1100".to_string(), // AR Control
10098        "2000".to_string(), // AP Control
10099        "3000".to_string(), // Cash
10100    ]
10101}
10102fn default_suspense_multiplier() -> f64 {
10103    3.0
10104}
10105fn default_suspense_accounts() -> Vec<String> {
10106    vec!["9999".to_string(), "9998".to_string()]
10107}
10108fn default_intercompany_multiplier() -> f64 {
10109    1.5
10110}
10111
10112impl Default for AccountAnomalyRulesConfig {
10113    fn default() -> Self {
10114        Self {
10115            high_risk_account_multiplier: default_high_risk_multiplier(),
10116            high_risk_accounts: default_high_risk_accounts(),
10117            suspense_account_multiplier: default_suspense_multiplier(),
10118            suspense_accounts: default_suspense_accounts(),
10119            intercompany_account_multiplier: default_intercompany_multiplier(),
10120        }
10121    }
10122}
10123
10124/// Behavioral baseline configuration.
10125#[derive(Debug, Clone, Serialize, Deserialize)]
10126pub struct BehavioralBaselineConfig {
10127    /// Enable behavioral baseline tracking.
10128    #[serde(default)]
10129    pub enabled: bool,
10130
10131    /// Number of days to build baseline from.
10132    #[serde(default = "default_baseline_period")]
10133    pub baseline_period_days: u32,
10134
10135    /// Standard deviation threshold for amount anomalies.
10136    #[serde(default = "default_deviation_threshold")]
10137    pub deviation_threshold_std: f64,
10138
10139    /// Standard deviation threshold for frequency anomalies.
10140    #[serde(default = "default_frequency_deviation")]
10141    pub frequency_deviation_threshold: f64,
10142}
10143
10144fn default_baseline_period() -> u32 {
10145    90
10146}
10147fn default_deviation_threshold() -> f64 {
10148    3.0
10149}
10150fn default_frequency_deviation() -> f64 {
10151    2.0
10152}
10153
10154impl Default for BehavioralBaselineConfig {
10155    fn default() -> Self {
10156        Self {
10157            enabled: false,
10158            baseline_period_days: default_baseline_period(),
10159            deviation_threshold_std: default_deviation_threshold(),
10160            frequency_deviation_threshold: default_frequency_deviation(),
10161        }
10162    }
10163}
10164
10165/// Enhanced labeling configuration.
10166#[derive(Debug, Clone, Serialize, Deserialize)]
10167pub struct EnhancedLabelingConfig {
10168    /// Enable severity scoring.
10169    #[serde(default = "default_true_val")]
10170    pub severity_scoring: bool,
10171
10172    /// Enable difficulty classification.
10173    #[serde(default = "default_true_val")]
10174    pub difficulty_classification: bool,
10175
10176    /// Materiality thresholds for severity classification.
10177    #[serde(default)]
10178    pub materiality_thresholds: MaterialityThresholdsConfig,
10179}
10180
10181impl Default for EnhancedLabelingConfig {
10182    fn default() -> Self {
10183        Self {
10184            severity_scoring: true,
10185            difficulty_classification: true,
10186            materiality_thresholds: MaterialityThresholdsConfig::default(),
10187        }
10188    }
10189}
10190
10191/// Materiality thresholds configuration.
10192#[derive(Debug, Clone, Serialize, Deserialize)]
10193pub struct MaterialityThresholdsConfig {
10194    /// Threshold for trivial impact (as percentage of total).
10195    #[serde(default = "default_materiality_trivial")]
10196    pub trivial: f64,
10197
10198    /// Threshold for immaterial impact.
10199    #[serde(default = "default_materiality_immaterial")]
10200    pub immaterial: f64,
10201
10202    /// Threshold for material impact.
10203    #[serde(default = "default_materiality_material")]
10204    pub material: f64,
10205
10206    /// Threshold for highly material impact.
10207    #[serde(default = "default_materiality_highly_material")]
10208    pub highly_material: f64,
10209}
10210
10211fn default_materiality_trivial() -> f64 {
10212    0.001
10213}
10214fn default_materiality_immaterial() -> f64 {
10215    0.01
10216}
10217fn default_materiality_material() -> f64 {
10218    0.05
10219}
10220fn default_materiality_highly_material() -> f64 {
10221    0.10
10222}
10223
10224impl Default for MaterialityThresholdsConfig {
10225    fn default() -> Self {
10226        Self {
10227            trivial: default_materiality_trivial(),
10228            immaterial: default_materiality_immaterial(),
10229            material: default_materiality_material(),
10230            highly_material: default_materiality_highly_material(),
10231        }
10232    }
10233}
10234
10235// =============================================================================
10236// Industry-Specific Configuration
10237// =============================================================================
10238
10239/// Industry-specific transaction and anomaly generation configuration.
10240///
10241/// This configuration enables generation of industry-authentic:
10242/// - Transaction types with appropriate terminology
10243/// - Master data (BOM, routings, clinical codes, etc.)
10244/// - Industry-specific anomaly patterns
10245/// - Regulatory framework compliance
10246#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10247pub struct IndustrySpecificConfig {
10248    /// Enable industry-specific generation.
10249    #[serde(default)]
10250    pub enabled: bool,
10251
10252    /// Manufacturing industry settings.
10253    #[serde(default)]
10254    pub manufacturing: ManufacturingConfig,
10255
10256    /// Retail industry settings.
10257    #[serde(default)]
10258    pub retail: RetailConfig,
10259
10260    /// Healthcare industry settings.
10261    #[serde(default)]
10262    pub healthcare: HealthcareConfig,
10263
10264    /// Technology industry settings.
10265    #[serde(default)]
10266    pub technology: TechnologyConfig,
10267
10268    /// Financial services industry settings.
10269    #[serde(default)]
10270    pub financial_services: FinancialServicesConfig,
10271
10272    /// Professional services industry settings.
10273    #[serde(default)]
10274    pub professional_services: ProfessionalServicesConfig,
10275}
10276
10277/// Manufacturing industry configuration.
10278#[derive(Debug, Clone, Serialize, Deserialize)]
10279pub struct ManufacturingConfig {
10280    /// Enable manufacturing-specific generation.
10281    #[serde(default)]
10282    pub enabled: bool,
10283
10284    /// Bill of Materials depth (typical: 3-7).
10285    #[serde(default = "default_bom_depth")]
10286    pub bom_depth: u32,
10287
10288    /// Whether to use just-in-time inventory.
10289    #[serde(default)]
10290    pub just_in_time: bool,
10291
10292    /// Production order types to generate.
10293    #[serde(default = "default_production_order_types")]
10294    pub production_order_types: Vec<String>,
10295
10296    /// Quality framework (ISO_9001, Six_Sigma, etc.).
10297    #[serde(default)]
10298    pub quality_framework: Option<String>,
10299
10300    /// Number of supplier tiers to model (1-3).
10301    #[serde(default = "default_supplier_tiers")]
10302    pub supplier_tiers: u32,
10303
10304    /// Standard cost update frequency.
10305    #[serde(default = "default_cost_frequency")]
10306    pub standard_cost_frequency: String,
10307
10308    /// Target yield rate (0.95-0.99 typical).
10309    #[serde(default = "default_yield_rate")]
10310    pub target_yield_rate: f64,
10311
10312    /// Scrap percentage threshold for alerts.
10313    #[serde(default = "default_scrap_threshold")]
10314    pub scrap_alert_threshold: f64,
10315
10316    /// Manufacturing anomaly injection rates.
10317    #[serde(default)]
10318    pub anomaly_rates: ManufacturingAnomalyRates,
10319
10320    /// Cost accounting configuration (WIP → FG → COGS pipeline).
10321    #[serde(default)]
10322    pub cost_accounting: ManufacturingCostAccountingConfig,
10323}
10324
10325/// Configuration for manufacturing cost accounting JE generation.
10326#[derive(Debug, Clone, Serialize, Deserialize)]
10327pub struct ManufacturingCostAccountingConfig {
10328    /// Enable multi-stage cost flow (WIP → FG → COGS) instead of flat JEs.
10329    #[serde(default = "default_true")]
10330    pub enabled: bool,
10331
10332    /// Generate standard cost variance JEs.
10333    #[serde(default = "default_true")]
10334    pub variance_accounts_enabled: bool,
10335
10336    /// Generate warranty provisions from quality inspection failures.
10337    #[serde(default = "default_true")]
10338    pub warranty_provisions_enabled: bool,
10339
10340    /// Minimum defect rate (0.0-1.0) to trigger warranty provision generation.
10341    #[serde(default = "default_warranty_defect_threshold")]
10342    pub warranty_defect_threshold: f64,
10343}
10344
10345fn default_warranty_defect_threshold() -> f64 {
10346    0.01
10347}
10348
10349impl Default for ManufacturingCostAccountingConfig {
10350    fn default() -> Self {
10351        Self {
10352            enabled: true,
10353            variance_accounts_enabled: true,
10354            warranty_provisions_enabled: true,
10355            warranty_defect_threshold: 0.01,
10356        }
10357    }
10358}
10359
10360fn default_bom_depth() -> u32 {
10361    4
10362}
10363
10364fn default_production_order_types() -> Vec<String> {
10365    vec![
10366        "standard".to_string(),
10367        "rework".to_string(),
10368        "prototype".to_string(),
10369    ]
10370}
10371
10372fn default_supplier_tiers() -> u32 {
10373    2
10374}
10375
10376fn default_cost_frequency() -> String {
10377    "quarterly".to_string()
10378}
10379
10380fn default_yield_rate() -> f64 {
10381    0.97
10382}
10383
10384fn default_scrap_threshold() -> f64 {
10385    0.03
10386}
10387
10388impl Default for ManufacturingConfig {
10389    fn default() -> Self {
10390        Self {
10391            enabled: false,
10392            bom_depth: default_bom_depth(),
10393            just_in_time: false,
10394            production_order_types: default_production_order_types(),
10395            quality_framework: Some("ISO_9001".to_string()),
10396            supplier_tiers: default_supplier_tiers(),
10397            standard_cost_frequency: default_cost_frequency(),
10398            target_yield_rate: default_yield_rate(),
10399            scrap_alert_threshold: default_scrap_threshold(),
10400            anomaly_rates: ManufacturingAnomalyRates::default(),
10401            cost_accounting: ManufacturingCostAccountingConfig::default(),
10402        }
10403    }
10404}
10405
10406/// Manufacturing anomaly injection rates.
10407#[derive(Debug, Clone, Serialize, Deserialize)]
10408pub struct ManufacturingAnomalyRates {
10409    /// Yield manipulation rate.
10410    #[serde(default = "default_mfg_yield_rate")]
10411    pub yield_manipulation: f64,
10412
10413    /// Labor misallocation rate.
10414    #[serde(default = "default_mfg_labor_rate")]
10415    pub labor_misallocation: f64,
10416
10417    /// Phantom production rate.
10418    #[serde(default = "default_mfg_phantom_rate")]
10419    pub phantom_production: f64,
10420
10421    /// Standard cost manipulation rate.
10422    #[serde(default = "default_mfg_cost_rate")]
10423    pub standard_cost_manipulation: f64,
10424
10425    /// Inventory fraud rate.
10426    #[serde(default = "default_mfg_inventory_rate")]
10427    pub inventory_fraud: f64,
10428}
10429
10430fn default_mfg_yield_rate() -> f64 {
10431    0.015
10432}
10433
10434fn default_mfg_labor_rate() -> f64 {
10435    0.02
10436}
10437
10438fn default_mfg_phantom_rate() -> f64 {
10439    0.005
10440}
10441
10442fn default_mfg_cost_rate() -> f64 {
10443    0.01
10444}
10445
10446fn default_mfg_inventory_rate() -> f64 {
10447    0.008
10448}
10449
10450impl Default for ManufacturingAnomalyRates {
10451    fn default() -> Self {
10452        Self {
10453            yield_manipulation: default_mfg_yield_rate(),
10454            labor_misallocation: default_mfg_labor_rate(),
10455            phantom_production: default_mfg_phantom_rate(),
10456            standard_cost_manipulation: default_mfg_cost_rate(),
10457            inventory_fraud: default_mfg_inventory_rate(),
10458        }
10459    }
10460}
10461
10462/// Retail industry configuration.
10463#[derive(Debug, Clone, Serialize, Deserialize)]
10464pub struct RetailConfig {
10465    /// Enable retail-specific generation.
10466    #[serde(default)]
10467    pub enabled: bool,
10468
10469    /// Store type distribution.
10470    #[serde(default)]
10471    pub store_types: RetailStoreTypeConfig,
10472
10473    /// Average daily transactions per store.
10474    #[serde(default = "default_retail_daily_txns")]
10475    pub avg_daily_transactions: u32,
10476
10477    /// Enable loss prevention tracking.
10478    #[serde(default = "default_true")]
10479    pub loss_prevention: bool,
10480
10481    /// Shrinkage rate (0.01-0.03 typical).
10482    #[serde(default = "default_shrinkage_rate")]
10483    pub shrinkage_rate: f64,
10484
10485    /// Retail anomaly injection rates.
10486    #[serde(default)]
10487    pub anomaly_rates: RetailAnomalyRates,
10488}
10489
10490fn default_retail_daily_txns() -> u32 {
10491    500
10492}
10493
10494fn default_shrinkage_rate() -> f64 {
10495    0.015
10496}
10497
10498impl Default for RetailConfig {
10499    fn default() -> Self {
10500        Self {
10501            enabled: false,
10502            store_types: RetailStoreTypeConfig::default(),
10503            avg_daily_transactions: default_retail_daily_txns(),
10504            loss_prevention: true,
10505            shrinkage_rate: default_shrinkage_rate(),
10506            anomaly_rates: RetailAnomalyRates::default(),
10507        }
10508    }
10509}
10510
10511/// Retail store type distribution.
10512#[derive(Debug, Clone, Serialize, Deserialize)]
10513pub struct RetailStoreTypeConfig {
10514    /// Percentage of flagship stores.
10515    #[serde(default = "default_flagship_pct")]
10516    pub flagship: f64,
10517
10518    /// Percentage of regional stores.
10519    #[serde(default = "default_regional_pct")]
10520    pub regional: f64,
10521
10522    /// Percentage of outlet stores.
10523    #[serde(default = "default_outlet_pct")]
10524    pub outlet: f64,
10525
10526    /// Percentage of e-commerce.
10527    #[serde(default = "default_ecommerce_pct")]
10528    pub ecommerce: f64,
10529}
10530
10531fn default_flagship_pct() -> f64 {
10532    0.10
10533}
10534
10535fn default_regional_pct() -> f64 {
10536    0.50
10537}
10538
10539fn default_outlet_pct() -> f64 {
10540    0.25
10541}
10542
10543fn default_ecommerce_pct() -> f64 {
10544    0.15
10545}
10546
10547impl Default for RetailStoreTypeConfig {
10548    fn default() -> Self {
10549        Self {
10550            flagship: default_flagship_pct(),
10551            regional: default_regional_pct(),
10552            outlet: default_outlet_pct(),
10553            ecommerce: default_ecommerce_pct(),
10554        }
10555    }
10556}
10557
10558/// Retail anomaly injection rates.
10559#[derive(Debug, Clone, Serialize, Deserialize)]
10560pub struct RetailAnomalyRates {
10561    /// Sweethearting rate.
10562    #[serde(default = "default_sweethearting_rate")]
10563    pub sweethearting: f64,
10564
10565    /// Skimming rate.
10566    #[serde(default = "default_skimming_rate")]
10567    pub skimming: f64,
10568
10569    /// Refund fraud rate.
10570    #[serde(default = "default_refund_fraud_rate")]
10571    pub refund_fraud: f64,
10572
10573    /// Void abuse rate.
10574    #[serde(default = "default_void_abuse_rate")]
10575    pub void_abuse: f64,
10576
10577    /// Gift card fraud rate.
10578    #[serde(default = "default_gift_card_rate")]
10579    pub gift_card_fraud: f64,
10580
10581    /// Vendor kickback rate.
10582    #[serde(default = "default_retail_kickback_rate")]
10583    pub vendor_kickback: f64,
10584}
10585
10586fn default_sweethearting_rate() -> f64 {
10587    0.02
10588}
10589
10590fn default_skimming_rate() -> f64 {
10591    0.005
10592}
10593
10594fn default_refund_fraud_rate() -> f64 {
10595    0.015
10596}
10597
10598fn default_void_abuse_rate() -> f64 {
10599    0.01
10600}
10601
10602fn default_gift_card_rate() -> f64 {
10603    0.008
10604}
10605
10606fn default_retail_kickback_rate() -> f64 {
10607    0.003
10608}
10609
10610impl Default for RetailAnomalyRates {
10611    fn default() -> Self {
10612        Self {
10613            sweethearting: default_sweethearting_rate(),
10614            skimming: default_skimming_rate(),
10615            refund_fraud: default_refund_fraud_rate(),
10616            void_abuse: default_void_abuse_rate(),
10617            gift_card_fraud: default_gift_card_rate(),
10618            vendor_kickback: default_retail_kickback_rate(),
10619        }
10620    }
10621}
10622
10623/// Healthcare industry configuration.
10624#[derive(Debug, Clone, Serialize, Deserialize)]
10625pub struct HealthcareConfig {
10626    /// Enable healthcare-specific generation.
10627    #[serde(default)]
10628    pub enabled: bool,
10629
10630    /// Healthcare facility type.
10631    #[serde(default = "default_facility_type")]
10632    pub facility_type: String,
10633
10634    /// Payer mix distribution.
10635    #[serde(default)]
10636    pub payer_mix: HealthcarePayerMix,
10637
10638    /// Coding systems enabled.
10639    #[serde(default)]
10640    pub coding_systems: HealthcareCodingSystems,
10641
10642    /// Healthcare compliance settings.
10643    #[serde(default)]
10644    pub compliance: HealthcareComplianceConfig,
10645
10646    /// Average daily encounters.
10647    #[serde(default = "default_daily_encounters")]
10648    pub avg_daily_encounters: u32,
10649
10650    /// Average charges per encounter.
10651    #[serde(default = "default_charges_per_encounter")]
10652    pub avg_charges_per_encounter: u32,
10653
10654    /// Denial rate (0.0-1.0).
10655    #[serde(default = "default_hc_denial_rate")]
10656    pub denial_rate: f64,
10657
10658    /// Bad debt rate (0.0-1.0).
10659    #[serde(default = "default_hc_bad_debt_rate")]
10660    pub bad_debt_rate: f64,
10661
10662    /// Charity care rate (0.0-1.0).
10663    #[serde(default = "default_hc_charity_care_rate")]
10664    pub charity_care_rate: f64,
10665
10666    /// Healthcare anomaly injection rates.
10667    #[serde(default)]
10668    pub anomaly_rates: HealthcareAnomalyRates,
10669}
10670
10671fn default_facility_type() -> String {
10672    "hospital".to_string()
10673}
10674
10675fn default_daily_encounters() -> u32 {
10676    150
10677}
10678
10679fn default_charges_per_encounter() -> u32 {
10680    8
10681}
10682
10683fn default_hc_denial_rate() -> f64 {
10684    0.05
10685}
10686
10687fn default_hc_bad_debt_rate() -> f64 {
10688    0.03
10689}
10690
10691fn default_hc_charity_care_rate() -> f64 {
10692    0.02
10693}
10694
10695impl Default for HealthcareConfig {
10696    fn default() -> Self {
10697        Self {
10698            enabled: false,
10699            facility_type: default_facility_type(),
10700            payer_mix: HealthcarePayerMix::default(),
10701            coding_systems: HealthcareCodingSystems::default(),
10702            compliance: HealthcareComplianceConfig::default(),
10703            avg_daily_encounters: default_daily_encounters(),
10704            avg_charges_per_encounter: default_charges_per_encounter(),
10705            denial_rate: default_hc_denial_rate(),
10706            bad_debt_rate: default_hc_bad_debt_rate(),
10707            charity_care_rate: default_hc_charity_care_rate(),
10708            anomaly_rates: HealthcareAnomalyRates::default(),
10709        }
10710    }
10711}
10712
10713/// Healthcare payer mix distribution.
10714#[derive(Debug, Clone, Serialize, Deserialize)]
10715pub struct HealthcarePayerMix {
10716    /// Medicare percentage.
10717    #[serde(default = "default_medicare_pct")]
10718    pub medicare: f64,
10719
10720    /// Medicaid percentage.
10721    #[serde(default = "default_medicaid_pct")]
10722    pub medicaid: f64,
10723
10724    /// Commercial insurance percentage.
10725    #[serde(default = "default_commercial_pct")]
10726    pub commercial: f64,
10727
10728    /// Self-pay percentage.
10729    #[serde(default = "default_self_pay_pct")]
10730    pub self_pay: f64,
10731}
10732
10733fn default_medicare_pct() -> f64 {
10734    0.40
10735}
10736
10737fn default_medicaid_pct() -> f64 {
10738    0.20
10739}
10740
10741fn default_commercial_pct() -> f64 {
10742    0.30
10743}
10744
10745fn default_self_pay_pct() -> f64 {
10746    0.10
10747}
10748
10749impl Default for HealthcarePayerMix {
10750    fn default() -> Self {
10751        Self {
10752            medicare: default_medicare_pct(),
10753            medicaid: default_medicaid_pct(),
10754            commercial: default_commercial_pct(),
10755            self_pay: default_self_pay_pct(),
10756        }
10757    }
10758}
10759
10760/// Healthcare coding systems configuration.
10761#[derive(Debug, Clone, Serialize, Deserialize)]
10762pub struct HealthcareCodingSystems {
10763    /// Enable ICD-10 diagnosis coding.
10764    #[serde(default = "default_true")]
10765    pub icd10: bool,
10766
10767    /// Enable CPT procedure coding.
10768    #[serde(default = "default_true")]
10769    pub cpt: bool,
10770
10771    /// Enable DRG grouping.
10772    #[serde(default = "default_true")]
10773    pub drg: bool,
10774
10775    /// Enable HCPCS Level II coding.
10776    #[serde(default = "default_true")]
10777    pub hcpcs: bool,
10778
10779    /// Enable revenue codes.
10780    #[serde(default = "default_true")]
10781    pub revenue_codes: bool,
10782}
10783
10784impl Default for HealthcareCodingSystems {
10785    fn default() -> Self {
10786        Self {
10787            icd10: true,
10788            cpt: true,
10789            drg: true,
10790            hcpcs: true,
10791            revenue_codes: true,
10792        }
10793    }
10794}
10795
10796/// Healthcare compliance configuration.
10797#[derive(Debug, Clone, Serialize, Deserialize)]
10798pub struct HealthcareComplianceConfig {
10799    /// Enable HIPAA compliance.
10800    #[serde(default = "default_true")]
10801    pub hipaa: bool,
10802
10803    /// Enable Stark Law compliance.
10804    #[serde(default = "default_true")]
10805    pub stark_law: bool,
10806
10807    /// Enable Anti-Kickback Statute compliance.
10808    #[serde(default = "default_true")]
10809    pub anti_kickback: bool,
10810
10811    /// Enable False Claims Act compliance.
10812    #[serde(default = "default_true")]
10813    pub false_claims_act: bool,
10814
10815    /// Enable EMTALA compliance (for hospitals).
10816    #[serde(default = "default_true")]
10817    pub emtala: bool,
10818}
10819
10820impl Default for HealthcareComplianceConfig {
10821    fn default() -> Self {
10822        Self {
10823            hipaa: true,
10824            stark_law: true,
10825            anti_kickback: true,
10826            false_claims_act: true,
10827            emtala: true,
10828        }
10829    }
10830}
10831
10832/// Healthcare anomaly injection rates.
10833#[derive(Debug, Clone, Serialize, Deserialize)]
10834pub struct HealthcareAnomalyRates {
10835    /// Upcoding rate.
10836    #[serde(default = "default_upcoding_rate")]
10837    pub upcoding: f64,
10838
10839    /// Unbundling rate.
10840    #[serde(default = "default_unbundling_rate")]
10841    pub unbundling: f64,
10842
10843    /// Phantom billing rate.
10844    #[serde(default = "default_phantom_billing_rate")]
10845    pub phantom_billing: f64,
10846
10847    /// Kickback rate.
10848    #[serde(default = "default_healthcare_kickback_rate")]
10849    pub kickbacks: f64,
10850
10851    /// Duplicate billing rate.
10852    #[serde(default = "default_duplicate_billing_rate")]
10853    pub duplicate_billing: f64,
10854
10855    /// Medical necessity abuse rate.
10856    #[serde(default = "default_med_necessity_rate")]
10857    pub medical_necessity_abuse: f64,
10858}
10859
10860fn default_upcoding_rate() -> f64 {
10861    0.02
10862}
10863
10864fn default_unbundling_rate() -> f64 {
10865    0.015
10866}
10867
10868fn default_phantom_billing_rate() -> f64 {
10869    0.005
10870}
10871
10872fn default_healthcare_kickback_rate() -> f64 {
10873    0.003
10874}
10875
10876fn default_duplicate_billing_rate() -> f64 {
10877    0.008
10878}
10879
10880fn default_med_necessity_rate() -> f64 {
10881    0.01
10882}
10883
10884impl Default for HealthcareAnomalyRates {
10885    fn default() -> Self {
10886        Self {
10887            upcoding: default_upcoding_rate(),
10888            unbundling: default_unbundling_rate(),
10889            phantom_billing: default_phantom_billing_rate(),
10890            kickbacks: default_healthcare_kickback_rate(),
10891            duplicate_billing: default_duplicate_billing_rate(),
10892            medical_necessity_abuse: default_med_necessity_rate(),
10893        }
10894    }
10895}
10896
10897/// Technology industry configuration.
10898#[derive(Debug, Clone, Serialize, Deserialize)]
10899pub struct TechnologyConfig {
10900    /// Enable technology-specific generation.
10901    #[serde(default)]
10902    pub enabled: bool,
10903
10904    /// Revenue model type.
10905    #[serde(default = "default_revenue_model")]
10906    pub revenue_model: String,
10907
10908    /// Subscription revenue percentage (for SaaS).
10909    #[serde(default = "default_subscription_pct")]
10910    pub subscription_revenue_pct: f64,
10911
10912    /// License revenue percentage.
10913    #[serde(default = "default_license_pct")]
10914    pub license_revenue_pct: f64,
10915
10916    /// Services revenue percentage.
10917    #[serde(default = "default_services_pct")]
10918    pub services_revenue_pct: f64,
10919
10920    /// R&D capitalization settings.
10921    #[serde(default)]
10922    pub rd_capitalization: RdCapitalizationConfig,
10923
10924    /// Technology anomaly injection rates.
10925    #[serde(default)]
10926    pub anomaly_rates: TechnologyAnomalyRates,
10927}
10928
10929fn default_revenue_model() -> String {
10930    "saas".to_string()
10931}
10932
10933fn default_subscription_pct() -> f64 {
10934    0.60
10935}
10936
10937fn default_license_pct() -> f64 {
10938    0.25
10939}
10940
10941fn default_services_pct() -> f64 {
10942    0.15
10943}
10944
10945impl Default for TechnologyConfig {
10946    fn default() -> Self {
10947        Self {
10948            enabled: false,
10949            revenue_model: default_revenue_model(),
10950            subscription_revenue_pct: default_subscription_pct(),
10951            license_revenue_pct: default_license_pct(),
10952            services_revenue_pct: default_services_pct(),
10953            rd_capitalization: RdCapitalizationConfig::default(),
10954            anomaly_rates: TechnologyAnomalyRates::default(),
10955        }
10956    }
10957}
10958
10959/// R&D capitalization configuration.
10960#[derive(Debug, Clone, Serialize, Deserialize)]
10961pub struct RdCapitalizationConfig {
10962    /// Enable R&D capitalization.
10963    #[serde(default = "default_true")]
10964    pub enabled: bool,
10965
10966    /// Capitalization rate (0.0-1.0).
10967    #[serde(default = "default_cap_rate")]
10968    pub capitalization_rate: f64,
10969
10970    /// Useful life in years.
10971    #[serde(default = "default_useful_life")]
10972    pub useful_life_years: u32,
10973}
10974
10975fn default_cap_rate() -> f64 {
10976    0.30
10977}
10978
10979fn default_useful_life() -> u32 {
10980    3
10981}
10982
10983impl Default for RdCapitalizationConfig {
10984    fn default() -> Self {
10985        Self {
10986            enabled: true,
10987            capitalization_rate: default_cap_rate(),
10988            useful_life_years: default_useful_life(),
10989        }
10990    }
10991}
10992
10993/// Technology anomaly injection rates.
10994#[derive(Debug, Clone, Serialize, Deserialize)]
10995pub struct TechnologyAnomalyRates {
10996    /// Premature revenue recognition rate.
10997    #[serde(default = "default_premature_rev_rate")]
10998    pub premature_revenue: f64,
10999
11000    /// Side letter abuse rate.
11001    #[serde(default = "default_side_letter_rate")]
11002    pub side_letter_abuse: f64,
11003
11004    /// Channel stuffing rate.
11005    #[serde(default = "default_channel_stuffing_rate")]
11006    pub channel_stuffing: f64,
11007
11008    /// Improper capitalization rate.
11009    #[serde(default = "default_improper_cap_rate")]
11010    pub improper_capitalization: f64,
11011}
11012
11013fn default_premature_rev_rate() -> f64 {
11014    0.015
11015}
11016
11017fn default_side_letter_rate() -> f64 {
11018    0.008
11019}
11020
11021fn default_channel_stuffing_rate() -> f64 {
11022    0.01
11023}
11024
11025fn default_improper_cap_rate() -> f64 {
11026    0.012
11027}
11028
11029impl Default for TechnologyAnomalyRates {
11030    fn default() -> Self {
11031        Self {
11032            premature_revenue: default_premature_rev_rate(),
11033            side_letter_abuse: default_side_letter_rate(),
11034            channel_stuffing: default_channel_stuffing_rate(),
11035            improper_capitalization: default_improper_cap_rate(),
11036        }
11037    }
11038}
11039
11040/// Financial services industry configuration.
11041#[derive(Debug, Clone, Serialize, Deserialize)]
11042pub struct FinancialServicesConfig {
11043    /// Enable financial services-specific generation.
11044    #[serde(default)]
11045    pub enabled: bool,
11046
11047    /// Financial institution type.
11048    #[serde(default = "default_fi_type")]
11049    pub institution_type: String,
11050
11051    /// Regulatory framework.
11052    #[serde(default = "default_fi_regulatory")]
11053    pub regulatory_framework: String,
11054
11055    /// Financial services anomaly injection rates.
11056    #[serde(default)]
11057    pub anomaly_rates: FinancialServicesAnomalyRates,
11058}
11059
11060fn default_fi_type() -> String {
11061    "commercial_bank".to_string()
11062}
11063
11064fn default_fi_regulatory() -> String {
11065    "us_banking".to_string()
11066}
11067
11068impl Default for FinancialServicesConfig {
11069    fn default() -> Self {
11070        Self {
11071            enabled: false,
11072            institution_type: default_fi_type(),
11073            regulatory_framework: default_fi_regulatory(),
11074            anomaly_rates: FinancialServicesAnomalyRates::default(),
11075        }
11076    }
11077}
11078
11079/// Financial services anomaly injection rates.
11080#[derive(Debug, Clone, Serialize, Deserialize)]
11081pub struct FinancialServicesAnomalyRates {
11082    /// Loan fraud rate.
11083    #[serde(default = "default_loan_fraud_rate")]
11084    pub loan_fraud: f64,
11085
11086    /// Trading fraud rate.
11087    #[serde(default = "default_trading_fraud_rate")]
11088    pub trading_fraud: f64,
11089
11090    /// Insurance fraud rate.
11091    #[serde(default = "default_insurance_fraud_rate")]
11092    pub insurance_fraud: f64,
11093
11094    /// Account manipulation rate.
11095    #[serde(default = "default_account_manip_rate")]
11096    pub account_manipulation: f64,
11097}
11098
11099fn default_loan_fraud_rate() -> f64 {
11100    0.01
11101}
11102
11103fn default_trading_fraud_rate() -> f64 {
11104    0.008
11105}
11106
11107fn default_insurance_fraud_rate() -> f64 {
11108    0.012
11109}
11110
11111fn default_account_manip_rate() -> f64 {
11112    0.005
11113}
11114
11115impl Default for FinancialServicesAnomalyRates {
11116    fn default() -> Self {
11117        Self {
11118            loan_fraud: default_loan_fraud_rate(),
11119            trading_fraud: default_trading_fraud_rate(),
11120            insurance_fraud: default_insurance_fraud_rate(),
11121            account_manipulation: default_account_manip_rate(),
11122        }
11123    }
11124}
11125
11126/// Professional services industry configuration.
11127#[derive(Debug, Clone, Serialize, Deserialize)]
11128pub struct ProfessionalServicesConfig {
11129    /// Enable professional services-specific generation.
11130    #[serde(default)]
11131    pub enabled: bool,
11132
11133    /// Firm type.
11134    #[serde(default = "default_firm_type")]
11135    pub firm_type: String,
11136
11137    /// Billing model.
11138    #[serde(default = "default_billing_model")]
11139    pub billing_model: String,
11140
11141    /// Average hourly rate.
11142    #[serde(default = "default_hourly_rate")]
11143    pub avg_hourly_rate: f64,
11144
11145    /// Trust account settings (for law firms).
11146    #[serde(default)]
11147    pub trust_accounting: TrustAccountingConfig,
11148
11149    /// Professional services anomaly injection rates.
11150    #[serde(default)]
11151    pub anomaly_rates: ProfessionalServicesAnomalyRates,
11152}
11153
11154fn default_firm_type() -> String {
11155    "consulting".to_string()
11156}
11157
11158fn default_billing_model() -> String {
11159    "time_and_materials".to_string()
11160}
11161
11162fn default_hourly_rate() -> f64 {
11163    250.0
11164}
11165
11166impl Default for ProfessionalServicesConfig {
11167    fn default() -> Self {
11168        Self {
11169            enabled: false,
11170            firm_type: default_firm_type(),
11171            billing_model: default_billing_model(),
11172            avg_hourly_rate: default_hourly_rate(),
11173            trust_accounting: TrustAccountingConfig::default(),
11174            anomaly_rates: ProfessionalServicesAnomalyRates::default(),
11175        }
11176    }
11177}
11178
11179/// Trust accounting configuration for law firms.
11180#[derive(Debug, Clone, Serialize, Deserialize)]
11181pub struct TrustAccountingConfig {
11182    /// Enable trust accounting.
11183    #[serde(default)]
11184    pub enabled: bool,
11185
11186    /// Require three-way reconciliation.
11187    #[serde(default = "default_true")]
11188    pub require_three_way_reconciliation: bool,
11189}
11190
11191impl Default for TrustAccountingConfig {
11192    fn default() -> Self {
11193        Self {
11194            enabled: false,
11195            require_three_way_reconciliation: true,
11196        }
11197    }
11198}
11199
11200/// Professional services anomaly injection rates.
11201#[derive(Debug, Clone, Serialize, Deserialize)]
11202pub struct ProfessionalServicesAnomalyRates {
11203    /// Time billing fraud rate.
11204    #[serde(default = "default_time_fraud_rate")]
11205    pub time_billing_fraud: f64,
11206
11207    /// Expense report fraud rate.
11208    #[serde(default = "default_expense_fraud_rate")]
11209    pub expense_fraud: f64,
11210
11211    /// Trust misappropriation rate.
11212    #[serde(default = "default_trust_misappropriation_rate")]
11213    pub trust_misappropriation: f64,
11214}
11215
11216fn default_time_fraud_rate() -> f64 {
11217    0.02
11218}
11219
11220fn default_expense_fraud_rate() -> f64 {
11221    0.015
11222}
11223
11224fn default_trust_misappropriation_rate() -> f64 {
11225    0.003
11226}
11227
11228impl Default for ProfessionalServicesAnomalyRates {
11229    fn default() -> Self {
11230        Self {
11231            time_billing_fraud: default_time_fraud_rate(),
11232            expense_fraud: default_expense_fraud_rate(),
11233            trust_misappropriation: default_trust_misappropriation_rate(),
11234        }
11235    }
11236}
11237
11238/// Fingerprint privacy configuration for extraction and synthesis.
11239///
11240/// Controls the privacy parameters used when extracting fingerprints
11241/// from sensitive data. Supports predefined levels or custom (epsilon, delta) tuples.
11242///
11243/// ```yaml
11244/// fingerprint_privacy:
11245///   level: custom
11246///   epsilon: 0.5
11247///   delta: 1.0e-5
11248///   k_anonymity: 10
11249///   composition_method: renyi_dp
11250/// ```
11251#[derive(Debug, Clone, Serialize, Deserialize)]
11252pub struct FingerprintPrivacyConfig {
11253    /// Privacy level preset. Use "custom" for user-specified epsilon/delta.
11254    #[serde(default)]
11255    pub level: String,
11256    /// Custom epsilon value (only used when level = "custom").
11257    #[serde(default = "default_epsilon")]
11258    pub epsilon: f64,
11259    /// Custom delta value for (epsilon, delta)-DP (only used with RDP/zCDP).
11260    #[serde(default = "default_delta")]
11261    pub delta: f64,
11262    /// K-anonymity threshold.
11263    #[serde(default = "default_k_anonymity")]
11264    pub k_anonymity: u32,
11265    /// Composition method: "naive", "advanced", "renyi_dp", "zcdp".
11266    #[serde(default)]
11267    pub composition_method: String,
11268}
11269
11270fn default_epsilon() -> f64 {
11271    1.0
11272}
11273
11274fn default_delta() -> f64 {
11275    1e-5
11276}
11277
11278fn default_k_anonymity() -> u32 {
11279    5
11280}
11281
11282impl Default for FingerprintPrivacyConfig {
11283    fn default() -> Self {
11284        Self {
11285            level: "standard".to_string(),
11286            epsilon: default_epsilon(),
11287            delta: default_delta(),
11288            k_anonymity: default_k_anonymity(),
11289            composition_method: "naive".to_string(),
11290        }
11291    }
11292}
11293
11294/// Quality gates configuration for pass/fail thresholds on generation runs.
11295///
11296/// ```yaml
11297/// quality_gates:
11298///   enabled: true
11299///   profile: strict  # strict, default, lenient, custom
11300///   fail_on_violation: true
11301///   custom_gates:
11302///     - name: benford_compliance
11303///       metric: benford_mad
11304///       threshold: 0.015
11305///       comparison: lte
11306/// ```
11307#[derive(Debug, Clone, Serialize, Deserialize)]
11308pub struct QualityGatesSchemaConfig {
11309    /// Enable quality gate evaluation.
11310    #[serde(default)]
11311    pub enabled: bool,
11312    /// Gate profile: "strict", "default", "lenient", or "custom".
11313    #[serde(default = "default_gate_profile_name")]
11314    pub profile: String,
11315    /// Whether to fail the generation on gate violations.
11316    #[serde(default)]
11317    pub fail_on_violation: bool,
11318    /// Custom gate definitions (used when profile = "custom").
11319    #[serde(default)]
11320    pub custom_gates: Vec<QualityGateEntry>,
11321}
11322
11323fn default_gate_profile_name() -> String {
11324    "default".to_string()
11325}
11326
11327impl Default for QualityGatesSchemaConfig {
11328    fn default() -> Self {
11329        Self {
11330            enabled: false,
11331            profile: default_gate_profile_name(),
11332            fail_on_violation: false,
11333            custom_gates: Vec::new(),
11334        }
11335    }
11336}
11337
11338/// A single quality gate entry in configuration.
11339#[derive(Debug, Clone, Serialize, Deserialize)]
11340pub struct QualityGateEntry {
11341    /// Gate name.
11342    pub name: String,
11343    /// Metric to check: benford_mad, balance_coherence, document_chain_integrity,
11344    /// correlation_preservation, temporal_consistency, privacy_mia_auc,
11345    /// completion_rate, duplicate_rate, referential_integrity, ic_match_rate.
11346    pub metric: String,
11347    /// Threshold value.
11348    pub threshold: f64,
11349    /// Upper threshold for "between" comparison.
11350    #[serde(default)]
11351    pub upper_threshold: Option<f64>,
11352    /// Comparison operator: "gte", "lte", "eq", "between".
11353    #[serde(default = "default_gate_comparison")]
11354    pub comparison: String,
11355}
11356
11357fn default_gate_comparison() -> String {
11358    "gte".to_string()
11359}
11360
11361/// Compliance configuration for regulatory requirements.
11362///
11363/// ```yaml
11364/// compliance:
11365///   content_marking:
11366///     enabled: true
11367///     format: embedded  # embedded, sidecar, both
11368///   article10_report: true
11369/// ```
11370#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11371pub struct ComplianceSchemaConfig {
11372    /// Synthetic content marking configuration (EU AI Act Article 50).
11373    #[serde(default)]
11374    pub content_marking: ContentMarkingSchemaConfig,
11375    /// Generate Article 10 data governance report.
11376    #[serde(default)]
11377    pub article10_report: bool,
11378    /// Certificate configuration for proving DP guarantees.
11379    #[serde(default)]
11380    pub certificates: CertificateSchemaConfig,
11381}
11382
11383/// Configuration for synthetic data certificates.
11384#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11385pub struct CertificateSchemaConfig {
11386    /// Whether certificate generation is enabled.
11387    #[serde(default)]
11388    pub enabled: bool,
11389    /// Environment variable name for the signing key.
11390    #[serde(default)]
11391    pub signing_key_env: Option<String>,
11392    /// Whether to include quality metrics in the certificate.
11393    #[serde(default)]
11394    pub include_quality_metrics: bool,
11395}
11396
11397/// Content marking configuration for synthetic data output.
11398#[derive(Debug, Clone, Serialize, Deserialize)]
11399pub struct ContentMarkingSchemaConfig {
11400    /// Whether content marking is enabled.
11401    #[serde(default = "default_true")]
11402    pub enabled: bool,
11403    /// Marking format: "embedded", "sidecar", or "both".
11404    #[serde(default = "default_marking_format")]
11405    pub format: String,
11406}
11407
11408fn default_marking_format() -> String {
11409    "embedded".to_string()
11410}
11411
11412impl Default for ContentMarkingSchemaConfig {
11413    fn default() -> Self {
11414        Self {
11415            enabled: true,
11416            format: default_marking_format(),
11417        }
11418    }
11419}
11420
11421/// Webhook notification configuration.
11422#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11423pub struct WebhookSchemaConfig {
11424    /// Whether webhooks are enabled.
11425    #[serde(default)]
11426    pub enabled: bool,
11427    /// Webhook endpoint configurations.
11428    #[serde(default)]
11429    pub endpoints: Vec<WebhookEndpointConfig>,
11430}
11431
11432/// Configuration for a single webhook endpoint.
11433#[derive(Debug, Clone, Serialize, Deserialize)]
11434pub struct WebhookEndpointConfig {
11435    /// Target URL for the webhook.
11436    pub url: String,
11437    /// Event types this endpoint subscribes to.
11438    #[serde(default)]
11439    pub events: Vec<String>,
11440    /// Optional secret for HMAC-SHA256 signature.
11441    #[serde(default)]
11442    pub secret: Option<String>,
11443    /// Maximum retry attempts (default: 3).
11444    #[serde(default = "default_webhook_retries")]
11445    pub max_retries: u32,
11446    /// Timeout in seconds (default: 10).
11447    #[serde(default = "default_webhook_timeout")]
11448    pub timeout_secs: u64,
11449}
11450
11451fn default_webhook_retries() -> u32 {
11452    3
11453}
11454fn default_webhook_timeout() -> u64 {
11455    10
11456}
11457
11458// ===== Enterprise Process Chain Config Structs =====
11459
11460// ----- Source-to-Pay (S2C/S2P) -----
11461
11462/// Source-to-Pay configuration covering the entire sourcing lifecycle.
11463#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11464pub struct SourceToPayConfig {
11465    /// Enable source-to-pay generation
11466    #[serde(default)]
11467    pub enabled: bool,
11468    /// Spend analysis configuration
11469    #[serde(default)]
11470    pub spend_analysis: SpendAnalysisConfig,
11471    /// Sourcing project configuration
11472    #[serde(default)]
11473    pub sourcing: SourcingConfig,
11474    /// Supplier qualification configuration
11475    #[serde(default)]
11476    pub qualification: QualificationConfig,
11477    /// RFx event configuration
11478    #[serde(default)]
11479    pub rfx: RfxConfig,
11480    /// Contract configuration
11481    #[serde(default)]
11482    pub contracts: ContractConfig,
11483    /// Catalog configuration
11484    #[serde(default)]
11485    pub catalog: CatalogConfig,
11486    /// Scorecard configuration
11487    #[serde(default)]
11488    pub scorecards: ScorecardConfig,
11489    /// P2P integration settings
11490    #[serde(default)]
11491    pub p2p_integration: P2PIntegrationConfig,
11492}
11493
11494/// Spend analysis configuration.
11495#[derive(Debug, Clone, Serialize, Deserialize)]
11496pub struct SpendAnalysisConfig {
11497    /// HHI threshold for triggering sourcing project
11498    #[serde(default = "default_hhi_threshold")]
11499    pub hhi_threshold: f64,
11500    /// Target spend coverage under contracts
11501    #[serde(default = "default_contract_coverage_target")]
11502    pub contract_coverage_target: f64,
11503}
11504
11505impl Default for SpendAnalysisConfig {
11506    fn default() -> Self {
11507        Self {
11508            hhi_threshold: default_hhi_threshold(),
11509            contract_coverage_target: default_contract_coverage_target(),
11510        }
11511    }
11512}
11513
11514fn default_hhi_threshold() -> f64 {
11515    2500.0
11516}
11517fn default_contract_coverage_target() -> f64 {
11518    0.80
11519}
11520
11521/// Sourcing project configuration.
11522#[derive(Debug, Clone, Serialize, Deserialize)]
11523pub struct SourcingConfig {
11524    /// Number of sourcing projects per year
11525    #[serde(default = "default_sourcing_projects_per_year")]
11526    pub projects_per_year: u32,
11527    /// Months before expiry to trigger renewal project
11528    #[serde(default = "default_renewal_horizon_months")]
11529    pub renewal_horizon_months: u32,
11530    /// Average project duration in months
11531    #[serde(default = "default_project_duration_months")]
11532    pub project_duration_months: u32,
11533}
11534
11535impl Default for SourcingConfig {
11536    fn default() -> Self {
11537        Self {
11538            projects_per_year: default_sourcing_projects_per_year(),
11539            renewal_horizon_months: default_renewal_horizon_months(),
11540            project_duration_months: default_project_duration_months(),
11541        }
11542    }
11543}
11544
11545fn default_sourcing_projects_per_year() -> u32 {
11546    10
11547}
11548fn default_renewal_horizon_months() -> u32 {
11549    3
11550}
11551fn default_project_duration_months() -> u32 {
11552    4
11553}
11554
11555/// Supplier qualification configuration.
11556#[derive(Debug, Clone, Serialize, Deserialize)]
11557pub struct QualificationConfig {
11558    /// Pass rate for qualification
11559    #[serde(default = "default_qualification_pass_rate")]
11560    pub pass_rate: f64,
11561    /// Qualification validity in days
11562    #[serde(default = "default_qualification_validity_days")]
11563    pub validity_days: u32,
11564    /// Financial stability weight
11565    #[serde(default = "default_financial_weight")]
11566    pub financial_weight: f64,
11567    /// Quality management weight
11568    #[serde(default = "default_quality_weight")]
11569    pub quality_weight: f64,
11570    /// Delivery performance weight
11571    #[serde(default = "default_delivery_weight")]
11572    pub delivery_weight: f64,
11573    /// Compliance weight
11574    #[serde(default = "default_compliance_weight")]
11575    pub compliance_weight: f64,
11576}
11577
11578impl Default for QualificationConfig {
11579    fn default() -> Self {
11580        Self {
11581            pass_rate: default_qualification_pass_rate(),
11582            validity_days: default_qualification_validity_days(),
11583            financial_weight: default_financial_weight(),
11584            quality_weight: default_quality_weight(),
11585            delivery_weight: default_delivery_weight(),
11586            compliance_weight: default_compliance_weight(),
11587        }
11588    }
11589}
11590
11591fn default_qualification_pass_rate() -> f64 {
11592    0.75
11593}
11594fn default_qualification_validity_days() -> u32 {
11595    365
11596}
11597fn default_financial_weight() -> f64 {
11598    0.25
11599}
11600fn default_quality_weight() -> f64 {
11601    0.30
11602}
11603fn default_delivery_weight() -> f64 {
11604    0.25
11605}
11606fn default_compliance_weight() -> f64 {
11607    0.20
11608}
11609
11610/// RFx event configuration.
11611#[derive(Debug, Clone, Serialize, Deserialize)]
11612pub struct RfxConfig {
11613    /// Spend threshold above which RFI is required before RFP
11614    #[serde(default = "default_rfi_threshold")]
11615    pub rfi_threshold: f64,
11616    /// Minimum vendors invited per RFx
11617    #[serde(default = "default_min_invited_vendors")]
11618    pub min_invited_vendors: u32,
11619    /// Maximum vendors invited per RFx
11620    #[serde(default = "default_max_invited_vendors")]
11621    pub max_invited_vendors: u32,
11622    /// Response rate (% of invited vendors that submit bids)
11623    #[serde(default = "default_response_rate")]
11624    pub response_rate: f64,
11625    /// Default price weight in evaluation
11626    #[serde(default = "default_price_weight")]
11627    pub default_price_weight: f64,
11628    /// Default quality weight in evaluation
11629    #[serde(default = "default_rfx_quality_weight")]
11630    pub default_quality_weight: f64,
11631    /// Default delivery weight in evaluation
11632    #[serde(default = "default_rfx_delivery_weight")]
11633    pub default_delivery_weight: f64,
11634}
11635
11636impl Default for RfxConfig {
11637    fn default() -> Self {
11638        Self {
11639            rfi_threshold: default_rfi_threshold(),
11640            min_invited_vendors: default_min_invited_vendors(),
11641            max_invited_vendors: default_max_invited_vendors(),
11642            response_rate: default_response_rate(),
11643            default_price_weight: default_price_weight(),
11644            default_quality_weight: default_rfx_quality_weight(),
11645            default_delivery_weight: default_rfx_delivery_weight(),
11646        }
11647    }
11648}
11649
11650fn default_rfi_threshold() -> f64 {
11651    100_000.0
11652}
11653fn default_min_invited_vendors() -> u32 {
11654    3
11655}
11656fn default_max_invited_vendors() -> u32 {
11657    8
11658}
11659fn default_response_rate() -> f64 {
11660    0.70
11661}
11662fn default_price_weight() -> f64 {
11663    0.40
11664}
11665fn default_rfx_quality_weight() -> f64 {
11666    0.35
11667}
11668fn default_rfx_delivery_weight() -> f64 {
11669    0.25
11670}
11671
11672/// Contract configuration.
11673#[derive(Debug, Clone, Serialize, Deserialize)]
11674pub struct ContractConfig {
11675    /// Minimum contract duration in months
11676    #[serde(default = "default_min_contract_months")]
11677    pub min_duration_months: u32,
11678    /// Maximum contract duration in months
11679    #[serde(default = "default_max_contract_months")]
11680    pub max_duration_months: u32,
11681    /// Auto-renewal rate
11682    #[serde(default = "default_auto_renewal_rate")]
11683    pub auto_renewal_rate: f64,
11684    /// Amendment rate (% of contracts with at least one amendment)
11685    #[serde(default = "default_amendment_rate")]
11686    pub amendment_rate: f64,
11687    /// Distribution of contract types
11688    #[serde(default)]
11689    pub type_distribution: ContractTypeDistribution,
11690}
11691
11692impl Default for ContractConfig {
11693    fn default() -> Self {
11694        Self {
11695            min_duration_months: default_min_contract_months(),
11696            max_duration_months: default_max_contract_months(),
11697            auto_renewal_rate: default_auto_renewal_rate(),
11698            amendment_rate: default_amendment_rate(),
11699            type_distribution: ContractTypeDistribution::default(),
11700        }
11701    }
11702}
11703
11704fn default_min_contract_months() -> u32 {
11705    12
11706}
11707fn default_max_contract_months() -> u32 {
11708    36
11709}
11710fn default_auto_renewal_rate() -> f64 {
11711    0.40
11712}
11713fn default_amendment_rate() -> f64 {
11714    0.20
11715}
11716
11717/// Distribution of contract types.
11718#[derive(Debug, Clone, Serialize, Deserialize)]
11719pub struct ContractTypeDistribution {
11720    /// Fixed price percentage
11721    #[serde(default = "default_fixed_price_pct")]
11722    pub fixed_price: f64,
11723    /// Blanket/framework percentage
11724    #[serde(default = "default_blanket_pct")]
11725    pub blanket: f64,
11726    /// Time and materials percentage
11727    #[serde(default = "default_time_materials_pct")]
11728    pub time_and_materials: f64,
11729    /// Service agreement percentage
11730    #[serde(default = "default_service_agreement_pct")]
11731    pub service_agreement: f64,
11732}
11733
11734impl Default for ContractTypeDistribution {
11735    fn default() -> Self {
11736        Self {
11737            fixed_price: default_fixed_price_pct(),
11738            blanket: default_blanket_pct(),
11739            time_and_materials: default_time_materials_pct(),
11740            service_agreement: default_service_agreement_pct(),
11741        }
11742    }
11743}
11744
11745fn default_fixed_price_pct() -> f64 {
11746    0.40
11747}
11748fn default_blanket_pct() -> f64 {
11749    0.30
11750}
11751fn default_time_materials_pct() -> f64 {
11752    0.15
11753}
11754fn default_service_agreement_pct() -> f64 {
11755    0.15
11756}
11757
11758/// Catalog configuration.
11759#[derive(Debug, Clone, Serialize, Deserialize)]
11760pub struct CatalogConfig {
11761    /// Percentage of catalog items marked as preferred
11762    #[serde(default = "default_preferred_vendor_flag_rate")]
11763    pub preferred_vendor_flag_rate: f64,
11764    /// Rate of materials with multiple sources in catalog
11765    #[serde(default = "default_multi_source_rate")]
11766    pub multi_source_rate: f64,
11767}
11768
11769impl Default for CatalogConfig {
11770    fn default() -> Self {
11771        Self {
11772            preferred_vendor_flag_rate: default_preferred_vendor_flag_rate(),
11773            multi_source_rate: default_multi_source_rate(),
11774        }
11775    }
11776}
11777
11778fn default_preferred_vendor_flag_rate() -> f64 {
11779    0.70
11780}
11781fn default_multi_source_rate() -> f64 {
11782    0.25
11783}
11784
11785/// Scorecard configuration.
11786#[derive(Debug, Clone, Serialize, Deserialize)]
11787pub struct ScorecardConfig {
11788    /// Scorecard review frequency (quarterly, monthly)
11789    #[serde(default = "default_scorecard_frequency")]
11790    pub frequency: String,
11791    /// On-time delivery weight in overall score
11792    #[serde(default = "default_otd_weight")]
11793    pub on_time_delivery_weight: f64,
11794    /// Quality weight in overall score
11795    #[serde(default = "default_quality_score_weight")]
11796    pub quality_weight: f64,
11797    /// Price competitiveness weight
11798    #[serde(default = "default_price_score_weight")]
11799    pub price_weight: f64,
11800    /// Responsiveness weight
11801    #[serde(default = "default_responsiveness_weight")]
11802    pub responsiveness_weight: f64,
11803    /// Grade A threshold (score >= this)
11804    #[serde(default = "default_grade_a_threshold")]
11805    pub grade_a_threshold: f64,
11806    /// Grade B threshold
11807    #[serde(default = "default_grade_b_threshold")]
11808    pub grade_b_threshold: f64,
11809    /// Grade C threshold
11810    #[serde(default = "default_grade_c_threshold")]
11811    pub grade_c_threshold: f64,
11812}
11813
11814impl Default for ScorecardConfig {
11815    fn default() -> Self {
11816        Self {
11817            frequency: default_scorecard_frequency(),
11818            on_time_delivery_weight: default_otd_weight(),
11819            quality_weight: default_quality_score_weight(),
11820            price_weight: default_price_score_weight(),
11821            responsiveness_weight: default_responsiveness_weight(),
11822            grade_a_threshold: default_grade_a_threshold(),
11823            grade_b_threshold: default_grade_b_threshold(),
11824            grade_c_threshold: default_grade_c_threshold(),
11825        }
11826    }
11827}
11828
11829fn default_scorecard_frequency() -> String {
11830    "quarterly".to_string()
11831}
11832fn default_otd_weight() -> f64 {
11833    0.30
11834}
11835fn default_quality_score_weight() -> f64 {
11836    0.30
11837}
11838fn default_price_score_weight() -> f64 {
11839    0.25
11840}
11841fn default_responsiveness_weight() -> f64 {
11842    0.15
11843}
11844fn default_grade_a_threshold() -> f64 {
11845    90.0
11846}
11847fn default_grade_b_threshold() -> f64 {
11848    75.0
11849}
11850fn default_grade_c_threshold() -> f64 {
11851    60.0
11852}
11853
11854/// P2P integration settings for contract enforcement.
11855#[derive(Debug, Clone, Serialize, Deserialize)]
11856pub struct P2PIntegrationConfig {
11857    /// Rate of off-contract (maverick) purchases
11858    #[serde(default = "default_off_contract_rate")]
11859    pub off_contract_rate: f64,
11860    /// Price tolerance for contract price validation
11861    #[serde(default = "default_price_tolerance")]
11862    pub price_tolerance: f64,
11863    /// Whether to enforce catalog ordering
11864    #[serde(default)]
11865    pub catalog_enforcement: bool,
11866}
11867
11868impl Default for P2PIntegrationConfig {
11869    fn default() -> Self {
11870        Self {
11871            off_contract_rate: default_off_contract_rate(),
11872            price_tolerance: default_price_tolerance(),
11873            catalog_enforcement: false,
11874        }
11875    }
11876}
11877
11878fn default_off_contract_rate() -> f64 {
11879    0.15
11880}
11881fn default_price_tolerance() -> f64 {
11882    0.02
11883}
11884
11885// ----- Financial Reporting -----
11886
11887/// Financial reporting configuration.
11888#[derive(Debug, Clone, Serialize, Deserialize)]
11889pub struct FinancialReportingConfig {
11890    /// Enable financial reporting generation
11891    #[serde(default)]
11892    pub enabled: bool,
11893    /// Generate balance sheet
11894    #[serde(default = "default_true")]
11895    pub generate_balance_sheet: bool,
11896    /// Generate income statement
11897    #[serde(default = "default_true")]
11898    pub generate_income_statement: bool,
11899    /// Generate cash flow statement
11900    #[serde(default = "default_true")]
11901    pub generate_cash_flow: bool,
11902    /// Generate changes in equity statement
11903    #[serde(default = "default_true")]
11904    pub generate_changes_in_equity: bool,
11905    /// Number of comparative periods
11906    #[serde(default = "default_comparative_periods")]
11907    pub comparative_periods: u32,
11908    /// Management KPIs configuration
11909    #[serde(default)]
11910    pub management_kpis: ManagementKpisConfig,
11911    /// Budget configuration
11912    #[serde(default)]
11913    pub budgets: BudgetConfig,
11914}
11915
11916impl Default for FinancialReportingConfig {
11917    fn default() -> Self {
11918        Self {
11919            enabled: false,
11920            generate_balance_sheet: true,
11921            generate_income_statement: true,
11922            generate_cash_flow: true,
11923            generate_changes_in_equity: true,
11924            comparative_periods: default_comparative_periods(),
11925            management_kpis: ManagementKpisConfig::default(),
11926            budgets: BudgetConfig::default(),
11927        }
11928    }
11929}
11930
11931fn default_comparative_periods() -> u32 {
11932    1
11933}
11934
11935/// Management KPIs configuration.
11936#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11937pub struct ManagementKpisConfig {
11938    /// Enable KPI generation
11939    #[serde(default)]
11940    pub enabled: bool,
11941    /// KPI calculation frequency (monthly, quarterly)
11942    #[serde(default = "default_kpi_frequency")]
11943    pub frequency: String,
11944}
11945
11946fn default_kpi_frequency() -> String {
11947    "monthly".to_string()
11948}
11949
11950/// Budget configuration.
11951#[derive(Debug, Clone, Serialize, Deserialize)]
11952pub struct BudgetConfig {
11953    /// Enable budget generation
11954    #[serde(default)]
11955    pub enabled: bool,
11956    /// Expected revenue growth rate for budgeting
11957    #[serde(default = "default_revenue_growth_rate")]
11958    pub revenue_growth_rate: f64,
11959    /// Expected expense inflation rate
11960    #[serde(default = "default_expense_inflation_rate")]
11961    pub expense_inflation_rate: f64,
11962    /// Random noise to add to budget vs actual
11963    #[serde(default = "default_variance_noise")]
11964    pub variance_noise: f64,
11965}
11966
11967impl Default for BudgetConfig {
11968    fn default() -> Self {
11969        Self {
11970            enabled: false,
11971            revenue_growth_rate: default_revenue_growth_rate(),
11972            expense_inflation_rate: default_expense_inflation_rate(),
11973            variance_noise: default_variance_noise(),
11974        }
11975    }
11976}
11977
11978fn default_revenue_growth_rate() -> f64 {
11979    0.05
11980}
11981fn default_expense_inflation_rate() -> f64 {
11982    0.03
11983}
11984fn default_variance_noise() -> f64 {
11985    0.10
11986}
11987
11988// ----- HR Configuration -----
11989
11990/// HR (Hire-to-Retire) process configuration.
11991#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11992pub struct HrConfig {
11993    /// Enable HR generation
11994    #[serde(default)]
11995    pub enabled: bool,
11996    /// Payroll configuration
11997    #[serde(default)]
11998    pub payroll: PayrollConfig,
11999    /// Time and attendance configuration
12000    #[serde(default)]
12001    pub time_attendance: TimeAttendanceConfig,
12002    /// Expense management configuration
12003    #[serde(default)]
12004    pub expenses: ExpenseConfig,
12005}
12006
12007/// Payroll configuration.
12008#[derive(Debug, Clone, Serialize, Deserialize)]
12009pub struct PayrollConfig {
12010    /// Enable payroll generation
12011    #[serde(default = "default_true")]
12012    pub enabled: bool,
12013    /// Pay frequency (monthly, biweekly, weekly)
12014    #[serde(default = "default_pay_frequency")]
12015    pub pay_frequency: String,
12016    /// Salary ranges by job level
12017    #[serde(default)]
12018    pub salary_ranges: PayrollSalaryRanges,
12019    /// Effective tax rates
12020    #[serde(default)]
12021    pub tax_rates: PayrollTaxRates,
12022    /// Benefits enrollment rate
12023    #[serde(default = "default_benefits_enrollment_rate")]
12024    pub benefits_enrollment_rate: f64,
12025    /// Retirement plan participation rate
12026    #[serde(default = "default_retirement_participation_rate")]
12027    pub retirement_participation_rate: f64,
12028}
12029
12030impl Default for PayrollConfig {
12031    fn default() -> Self {
12032        Self {
12033            enabled: true,
12034            pay_frequency: default_pay_frequency(),
12035            salary_ranges: PayrollSalaryRanges::default(),
12036            tax_rates: PayrollTaxRates::default(),
12037            benefits_enrollment_rate: default_benefits_enrollment_rate(),
12038            retirement_participation_rate: default_retirement_participation_rate(),
12039        }
12040    }
12041}
12042
12043fn default_pay_frequency() -> String {
12044    "monthly".to_string()
12045}
12046fn default_benefits_enrollment_rate() -> f64 {
12047    0.60
12048}
12049fn default_retirement_participation_rate() -> f64 {
12050    0.45
12051}
12052
12053/// Salary ranges by job level.
12054#[derive(Debug, Clone, Serialize, Deserialize)]
12055pub struct PayrollSalaryRanges {
12056    /// Staff level min/max
12057    #[serde(default = "default_staff_min")]
12058    pub staff_min: f64,
12059    #[serde(default = "default_staff_max")]
12060    pub staff_max: f64,
12061    /// Manager level min/max
12062    #[serde(default = "default_manager_min")]
12063    pub manager_min: f64,
12064    #[serde(default = "default_manager_max")]
12065    pub manager_max: f64,
12066    /// Director level min/max
12067    #[serde(default = "default_director_min")]
12068    pub director_min: f64,
12069    #[serde(default = "default_director_max")]
12070    pub director_max: f64,
12071    /// Executive level min/max
12072    #[serde(default = "default_executive_min")]
12073    pub executive_min: f64,
12074    #[serde(default = "default_executive_max")]
12075    pub executive_max: f64,
12076}
12077
12078impl Default for PayrollSalaryRanges {
12079    fn default() -> Self {
12080        Self {
12081            staff_min: default_staff_min(),
12082            staff_max: default_staff_max(),
12083            manager_min: default_manager_min(),
12084            manager_max: default_manager_max(),
12085            director_min: default_director_min(),
12086            director_max: default_director_max(),
12087            executive_min: default_executive_min(),
12088            executive_max: default_executive_max(),
12089        }
12090    }
12091}
12092
12093fn default_staff_min() -> f64 {
12094    50_000.0
12095}
12096fn default_staff_max() -> f64 {
12097    70_000.0
12098}
12099fn default_manager_min() -> f64 {
12100    80_000.0
12101}
12102fn default_manager_max() -> f64 {
12103    120_000.0
12104}
12105fn default_director_min() -> f64 {
12106    120_000.0
12107}
12108fn default_director_max() -> f64 {
12109    180_000.0
12110}
12111fn default_executive_min() -> f64 {
12112    180_000.0
12113}
12114fn default_executive_max() -> f64 {
12115    350_000.0
12116}
12117
12118/// Effective tax rates for payroll.
12119#[derive(Debug, Clone, Serialize, Deserialize)]
12120pub struct PayrollTaxRates {
12121    /// Federal effective tax rate
12122    #[serde(default = "default_federal_rate")]
12123    pub federal_effective: f64,
12124    /// State effective tax rate
12125    #[serde(default = "default_state_rate")]
12126    pub state_effective: f64,
12127    /// FICA/social security rate
12128    #[serde(default = "default_fica_rate")]
12129    pub fica: f64,
12130}
12131
12132impl Default for PayrollTaxRates {
12133    fn default() -> Self {
12134        Self {
12135            federal_effective: default_federal_rate(),
12136            state_effective: default_state_rate(),
12137            fica: default_fica_rate(),
12138        }
12139    }
12140}
12141
12142fn default_federal_rate() -> f64 {
12143    0.22
12144}
12145fn default_state_rate() -> f64 {
12146    0.05
12147}
12148fn default_fica_rate() -> f64 {
12149    0.0765
12150}
12151
12152/// Time and attendance configuration.
12153#[derive(Debug, Clone, Serialize, Deserialize)]
12154pub struct TimeAttendanceConfig {
12155    /// Enable time tracking
12156    #[serde(default = "default_true")]
12157    pub enabled: bool,
12158    /// Overtime rate (% of employees with overtime in a period)
12159    #[serde(default = "default_overtime_rate")]
12160    pub overtime_rate: f64,
12161}
12162
12163impl Default for TimeAttendanceConfig {
12164    fn default() -> Self {
12165        Self {
12166            enabled: true,
12167            overtime_rate: default_overtime_rate(),
12168        }
12169    }
12170}
12171
12172fn default_overtime_rate() -> f64 {
12173    0.10
12174}
12175
12176/// Expense management configuration.
12177#[derive(Debug, Clone, Serialize, Deserialize)]
12178pub struct ExpenseConfig {
12179    /// Enable expense report generation
12180    #[serde(default = "default_true")]
12181    pub enabled: bool,
12182    /// Rate of employees submitting expenses per month
12183    #[serde(default = "default_expense_submission_rate")]
12184    pub submission_rate: f64,
12185    /// Rate of policy violations
12186    #[serde(default = "default_policy_violation_rate")]
12187    pub policy_violation_rate: f64,
12188}
12189
12190impl Default for ExpenseConfig {
12191    fn default() -> Self {
12192        Self {
12193            enabled: true,
12194            submission_rate: default_expense_submission_rate(),
12195            policy_violation_rate: default_policy_violation_rate(),
12196        }
12197    }
12198}
12199
12200fn default_expense_submission_rate() -> f64 {
12201    0.30
12202}
12203fn default_policy_violation_rate() -> f64 {
12204    0.08
12205}
12206
12207// ----- Manufacturing Configuration -----
12208
12209/// Manufacturing process configuration (production orders, WIP, routing).
12210#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12211pub struct ManufacturingProcessConfig {
12212    /// Enable manufacturing generation
12213    #[serde(default)]
12214    pub enabled: bool,
12215    /// Production order configuration
12216    #[serde(default)]
12217    pub production_orders: ProductionOrderConfig,
12218    /// Costing configuration
12219    #[serde(default)]
12220    pub costing: ManufacturingCostingConfig,
12221    /// Routing configuration
12222    #[serde(default)]
12223    pub routing: RoutingConfig,
12224}
12225
12226/// Production order configuration.
12227#[derive(Debug, Clone, Serialize, Deserialize)]
12228pub struct ProductionOrderConfig {
12229    /// Orders per month
12230    #[serde(default = "default_prod_orders_per_month")]
12231    pub orders_per_month: u32,
12232    /// Average batch size
12233    #[serde(default = "default_prod_avg_batch_size")]
12234    pub avg_batch_size: u32,
12235    /// Yield rate
12236    #[serde(default = "default_prod_yield_rate")]
12237    pub yield_rate: f64,
12238    /// Make-to-order rate (vs make-to-stock)
12239    #[serde(default = "default_prod_make_to_order_rate")]
12240    pub make_to_order_rate: f64,
12241    /// Rework rate
12242    #[serde(default = "default_prod_rework_rate")]
12243    pub rework_rate: f64,
12244}
12245
12246impl Default for ProductionOrderConfig {
12247    fn default() -> Self {
12248        Self {
12249            orders_per_month: default_prod_orders_per_month(),
12250            avg_batch_size: default_prod_avg_batch_size(),
12251            yield_rate: default_prod_yield_rate(),
12252            make_to_order_rate: default_prod_make_to_order_rate(),
12253            rework_rate: default_prod_rework_rate(),
12254        }
12255    }
12256}
12257
12258fn default_prod_orders_per_month() -> u32 {
12259    50
12260}
12261fn default_prod_avg_batch_size() -> u32 {
12262    100
12263}
12264fn default_prod_yield_rate() -> f64 {
12265    0.97
12266}
12267fn default_prod_make_to_order_rate() -> f64 {
12268    0.20
12269}
12270fn default_prod_rework_rate() -> f64 {
12271    0.03
12272}
12273
12274/// Manufacturing costing configuration.
12275#[derive(Debug, Clone, Serialize, Deserialize)]
12276pub struct ManufacturingCostingConfig {
12277    /// Labor rate per hour
12278    #[serde(default = "default_labor_rate")]
12279    pub labor_rate_per_hour: f64,
12280    /// Overhead application rate (multiplier on direct labor)
12281    #[serde(default = "default_overhead_rate")]
12282    pub overhead_rate: f64,
12283    /// Standard cost update frequency
12284    #[serde(default = "default_cost_update_frequency")]
12285    pub standard_cost_update_frequency: String,
12286}
12287
12288impl Default for ManufacturingCostingConfig {
12289    fn default() -> Self {
12290        Self {
12291            labor_rate_per_hour: default_labor_rate(),
12292            overhead_rate: default_overhead_rate(),
12293            standard_cost_update_frequency: default_cost_update_frequency(),
12294        }
12295    }
12296}
12297
12298fn default_labor_rate() -> f64 {
12299    35.0
12300}
12301fn default_overhead_rate() -> f64 {
12302    1.50
12303}
12304fn default_cost_update_frequency() -> String {
12305    "quarterly".to_string()
12306}
12307
12308/// Routing configuration for production operations.
12309#[derive(Debug, Clone, Serialize, Deserialize)]
12310pub struct RoutingConfig {
12311    /// Average number of operations per routing
12312    #[serde(default = "default_avg_operations")]
12313    pub avg_operations: u32,
12314    /// Average setup time in hours
12315    #[serde(default = "default_setup_time")]
12316    pub setup_time_hours: f64,
12317    /// Run time variation coefficient
12318    #[serde(default = "default_run_time_variation")]
12319    pub run_time_variation: f64,
12320}
12321
12322impl Default for RoutingConfig {
12323    fn default() -> Self {
12324        Self {
12325            avg_operations: default_avg_operations(),
12326            setup_time_hours: default_setup_time(),
12327            run_time_variation: default_run_time_variation(),
12328        }
12329    }
12330}
12331
12332fn default_avg_operations() -> u32 {
12333    4
12334}
12335fn default_setup_time() -> f64 {
12336    1.5
12337}
12338fn default_run_time_variation() -> f64 {
12339    0.15
12340}
12341
12342// ----- Sales Quote Configuration -----
12343
12344/// Sales quote (quote-to-order) pipeline configuration.
12345#[derive(Debug, Clone, Serialize, Deserialize)]
12346pub struct SalesQuoteConfig {
12347    /// Enable sales quote generation
12348    #[serde(default)]
12349    pub enabled: bool,
12350    /// Quotes per month
12351    #[serde(default = "default_quotes_per_month")]
12352    pub quotes_per_month: u32,
12353    /// Win rate (fraction of quotes that convert to orders)
12354    #[serde(default = "default_quote_win_rate")]
12355    pub win_rate: f64,
12356    /// Average quote validity in days
12357    #[serde(default = "default_quote_validity_days")]
12358    pub validity_days: u32,
12359}
12360
12361impl Default for SalesQuoteConfig {
12362    fn default() -> Self {
12363        Self {
12364            enabled: false,
12365            quotes_per_month: default_quotes_per_month(),
12366            win_rate: default_quote_win_rate(),
12367            validity_days: default_quote_validity_days(),
12368        }
12369    }
12370}
12371
12372fn default_quotes_per_month() -> u32 {
12373    30
12374}
12375fn default_quote_win_rate() -> f64 {
12376    0.35
12377}
12378fn default_quote_validity_days() -> u32 {
12379    30
12380}
12381
12382// =============================================================================
12383// Tax Accounting Configuration
12384// =============================================================================
12385
12386/// Tax accounting configuration.
12387///
12388/// Controls generation of tax-related data including VAT/GST, sales tax,
12389/// withholding tax, tax provisions, and payroll tax across multiple jurisdictions.
12390#[derive(Debug, Clone, Serialize, Deserialize)]
12391pub struct TaxConfig {
12392    /// Whether tax generation is enabled.
12393    #[serde(default)]
12394    pub enabled: bool,
12395    /// Tax jurisdiction configuration.
12396    #[serde(default)]
12397    pub jurisdictions: TaxJurisdictionConfig,
12398    /// VAT/GST configuration.
12399    #[serde(default)]
12400    pub vat_gst: VatGstConfig,
12401    /// Sales tax configuration.
12402    #[serde(default)]
12403    pub sales_tax: SalesTaxConfig,
12404    /// Withholding tax configuration.
12405    #[serde(default)]
12406    pub withholding: WithholdingTaxSchemaConfig,
12407    /// Tax provision configuration.
12408    #[serde(default)]
12409    pub provisions: TaxProvisionSchemaConfig,
12410    /// Payroll tax configuration.
12411    #[serde(default)]
12412    pub payroll_tax: PayrollTaxSchemaConfig,
12413    /// Anomaly injection rate for tax data (0.0 to 1.0).
12414    #[serde(default = "default_tax_anomaly_rate")]
12415    pub anomaly_rate: f64,
12416}
12417
12418fn default_tax_anomaly_rate() -> f64 {
12419    0.03
12420}
12421
12422impl Default for TaxConfig {
12423    fn default() -> Self {
12424        Self {
12425            enabled: false,
12426            jurisdictions: TaxJurisdictionConfig::default(),
12427            vat_gst: VatGstConfig::default(),
12428            sales_tax: SalesTaxConfig::default(),
12429            withholding: WithholdingTaxSchemaConfig::default(),
12430            provisions: TaxProvisionSchemaConfig::default(),
12431            payroll_tax: PayrollTaxSchemaConfig::default(),
12432            anomaly_rate: default_tax_anomaly_rate(),
12433        }
12434    }
12435}
12436
12437/// Tax jurisdiction configuration.
12438///
12439/// Specifies which countries and subnational jurisdictions to include
12440/// when generating tax data.
12441#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12442pub struct TaxJurisdictionConfig {
12443    /// List of country codes to include (e.g., ["US", "DE", "GB"]).
12444    #[serde(default)]
12445    pub countries: Vec<String>,
12446    /// Whether to include subnational jurisdictions (e.g., US states, Canadian provinces).
12447    #[serde(default)]
12448    pub include_subnational: bool,
12449}
12450
12451/// VAT/GST configuration.
12452///
12453/// Controls generation of Value Added Tax / Goods and Services Tax data,
12454/// including standard and reduced rates, exempt categories, and reverse charge.
12455#[derive(Debug, Clone, Serialize, Deserialize)]
12456pub struct VatGstConfig {
12457    /// Whether VAT/GST generation is enabled.
12458    #[serde(default)]
12459    pub enabled: bool,
12460    /// Standard VAT/GST rates by country code (e.g., {"DE": 0.19, "GB": 0.20}).
12461    #[serde(default)]
12462    pub standard_rates: std::collections::HashMap<String, f64>,
12463    /// Reduced VAT/GST rates by country code (e.g., {"DE": 0.07, "GB": 0.05}).
12464    #[serde(default)]
12465    pub reduced_rates: std::collections::HashMap<String, f64>,
12466    /// Categories exempt from VAT/GST (e.g., ["financial_services", "healthcare"]).
12467    #[serde(default)]
12468    pub exempt_categories: Vec<String>,
12469    /// Whether to apply reverse charge mechanism for cross-border B2B transactions.
12470    #[serde(default = "default_true")]
12471    pub reverse_charge: bool,
12472}
12473
12474impl Default for VatGstConfig {
12475    fn default() -> Self {
12476        Self {
12477            enabled: false,
12478            standard_rates: std::collections::HashMap::new(),
12479            reduced_rates: std::collections::HashMap::new(),
12480            exempt_categories: Vec::new(),
12481            reverse_charge: true,
12482        }
12483    }
12484}
12485
12486/// Sales tax configuration.
12487///
12488/// Controls generation of US-style sales tax data including nexus determination.
12489#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12490pub struct SalesTaxConfig {
12491    /// Whether sales tax generation is enabled.
12492    #[serde(default)]
12493    pub enabled: bool,
12494    /// US states where the company has nexus (e.g., ["CA", "NY", "TX"]).
12495    #[serde(default)]
12496    pub nexus_states: Vec<String>,
12497}
12498
12499/// Withholding tax configuration.
12500///
12501/// Controls generation of withholding tax data for cross-border payments,
12502/// including treaty network and rate overrides.
12503#[derive(Debug, Clone, Serialize, Deserialize)]
12504pub struct WithholdingTaxSchemaConfig {
12505    /// Whether withholding tax generation is enabled.
12506    #[serde(default)]
12507    pub enabled: bool,
12508    /// Whether to simulate a treaty network with reduced rates.
12509    #[serde(default = "default_true")]
12510    pub treaty_network: bool,
12511    /// Default withholding tax rate for non-treaty countries (0.0 to 1.0).
12512    #[serde(default = "default_withholding_rate")]
12513    pub default_rate: f64,
12514    /// Reduced withholding tax rate for treaty countries (0.0 to 1.0).
12515    #[serde(default = "default_treaty_reduced_rate")]
12516    pub treaty_reduced_rate: f64,
12517}
12518
12519fn default_withholding_rate() -> f64 {
12520    0.30
12521}
12522
12523fn default_treaty_reduced_rate() -> f64 {
12524    0.15
12525}
12526
12527impl Default for WithholdingTaxSchemaConfig {
12528    fn default() -> Self {
12529        Self {
12530            enabled: false,
12531            treaty_network: true,
12532            default_rate: default_withholding_rate(),
12533            treaty_reduced_rate: default_treaty_reduced_rate(),
12534        }
12535    }
12536}
12537
12538/// Tax provision configuration.
12539///
12540/// Controls generation of tax provision data including statutory rates
12541/// and uncertain tax positions (ASC 740 / IAS 12).
12542#[derive(Debug, Clone, Serialize, Deserialize)]
12543pub struct TaxProvisionSchemaConfig {
12544    /// Whether tax provision generation is enabled.
12545    /// Defaults to true when tax is enabled, as provisions are typically required.
12546    #[serde(default = "default_true")]
12547    pub enabled: bool,
12548    /// Statutory corporate tax rate (0.0 to 1.0).
12549    #[serde(default = "default_statutory_rate")]
12550    pub statutory_rate: f64,
12551    /// Whether to generate uncertain tax positions (FIN 48 / IFRIC 23).
12552    #[serde(default = "default_true")]
12553    pub uncertain_positions: bool,
12554}
12555
12556fn default_statutory_rate() -> f64 {
12557    0.21
12558}
12559
12560impl Default for TaxProvisionSchemaConfig {
12561    fn default() -> Self {
12562        Self {
12563            enabled: true,
12564            statutory_rate: default_statutory_rate(),
12565            uncertain_positions: true,
12566        }
12567    }
12568}
12569
12570/// Payroll tax configuration.
12571///
12572/// Controls generation of payroll tax data (employer/employee contributions,
12573/// social security, Medicare, etc.).
12574#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12575pub struct PayrollTaxSchemaConfig {
12576    /// Whether payroll tax generation is enabled.
12577    #[serde(default)]
12578    pub enabled: bool,
12579}
12580
12581// ---------------------------------------------------------------------------
12582// Treasury & Cash Management Configuration
12583// ---------------------------------------------------------------------------
12584
12585/// Treasury and cash management configuration.
12586///
12587/// Controls generation of cash positions, forecasts, pooling, hedging
12588/// instruments (ASC 815 / IFRS 9), debt instruments with covenants,
12589/// bank guarantees, and intercompany netting runs.
12590#[derive(Debug, Clone, Serialize, Deserialize)]
12591pub struct TreasuryConfig {
12592    /// Whether treasury generation is enabled.
12593    #[serde(default)]
12594    pub enabled: bool,
12595    /// Cash positioning configuration.
12596    #[serde(default)]
12597    pub cash_positioning: CashPositioningConfig,
12598    /// Cash forecasting configuration.
12599    #[serde(default)]
12600    pub cash_forecasting: CashForecastingConfig,
12601    /// Cash pooling configuration.
12602    #[serde(default)]
12603    pub cash_pooling: CashPoolingConfig,
12604    /// Hedging configuration (FX forwards, IR swaps, etc.).
12605    #[serde(default)]
12606    pub hedging: HedgingSchemaConfig,
12607    /// Debt instrument and covenant configuration.
12608    #[serde(default)]
12609    pub debt: DebtSchemaConfig,
12610    /// Intercompany netting configuration.
12611    #[serde(default)]
12612    pub netting: NettingSchemaConfig,
12613    /// Bank guarantee / letter of credit configuration.
12614    #[serde(default)]
12615    pub bank_guarantees: BankGuaranteeSchemaConfig,
12616    /// Anomaly injection rate for treasury data (0.0 to 1.0).
12617    #[serde(default = "default_treasury_anomaly_rate")]
12618    pub anomaly_rate: f64,
12619}
12620
12621fn default_treasury_anomaly_rate() -> f64 {
12622    0.02
12623}
12624
12625impl Default for TreasuryConfig {
12626    fn default() -> Self {
12627        Self {
12628            enabled: false,
12629            cash_positioning: CashPositioningConfig::default(),
12630            cash_forecasting: CashForecastingConfig::default(),
12631            cash_pooling: CashPoolingConfig::default(),
12632            hedging: HedgingSchemaConfig::default(),
12633            debt: DebtSchemaConfig::default(),
12634            netting: NettingSchemaConfig::default(),
12635            bank_guarantees: BankGuaranteeSchemaConfig::default(),
12636            anomaly_rate: default_treasury_anomaly_rate(),
12637        }
12638    }
12639}
12640
12641/// Cash positioning configuration.
12642///
12643/// Controls daily cash position generation per entity/bank account.
12644#[derive(Debug, Clone, Serialize, Deserialize)]
12645pub struct CashPositioningConfig {
12646    /// Whether cash positioning is enabled.
12647    #[serde(default = "default_true")]
12648    pub enabled: bool,
12649    /// Position generation frequency.
12650    #[serde(default = "default_cash_frequency")]
12651    pub frequency: String,
12652    /// Minimum cash balance policy threshold.
12653    #[serde(default = "default_minimum_balance_policy")]
12654    pub minimum_balance_policy: f64,
12655}
12656
12657fn default_cash_frequency() -> String {
12658    "daily".to_string()
12659}
12660
12661fn default_minimum_balance_policy() -> f64 {
12662    100_000.0
12663}
12664
12665impl Default for CashPositioningConfig {
12666    fn default() -> Self {
12667        Self {
12668            enabled: true,
12669            frequency: default_cash_frequency(),
12670            minimum_balance_policy: default_minimum_balance_policy(),
12671        }
12672    }
12673}
12674
12675/// Cash forecasting configuration.
12676///
12677/// Controls forward-looking cash forecast generation with probability-weighted items.
12678#[derive(Debug, Clone, Serialize, Deserialize)]
12679pub struct CashForecastingConfig {
12680    /// Whether cash forecasting is enabled.
12681    #[serde(default = "default_true")]
12682    pub enabled: bool,
12683    /// Number of days to forecast into the future.
12684    #[serde(default = "default_horizon_days")]
12685    pub horizon_days: u32,
12686    /// AR collection probability curve type ("aging" or "flat").
12687    #[serde(default = "default_ar_probability_curve")]
12688    pub ar_collection_probability_curve: String,
12689    /// Confidence interval for the forecast (0.0 to 1.0).
12690    #[serde(default = "default_confidence_interval")]
12691    pub confidence_interval: f64,
12692}
12693
12694fn default_horizon_days() -> u32 {
12695    90
12696}
12697
12698fn default_ar_probability_curve() -> String {
12699    "aging".to_string()
12700}
12701
12702fn default_confidence_interval() -> f64 {
12703    0.90
12704}
12705
12706impl Default for CashForecastingConfig {
12707    fn default() -> Self {
12708        Self {
12709            enabled: true,
12710            horizon_days: default_horizon_days(),
12711            ar_collection_probability_curve: default_ar_probability_curve(),
12712            confidence_interval: default_confidence_interval(),
12713        }
12714    }
12715}
12716
12717/// Cash pooling configuration.
12718///
12719/// Controls cash pool structure generation (physical, notional, zero-balancing).
12720#[derive(Debug, Clone, Serialize, Deserialize)]
12721pub struct CashPoolingConfig {
12722    /// Whether cash pooling is enabled.
12723    #[serde(default)]
12724    pub enabled: bool,
12725    /// Pool type: "physical_pooling", "notional_pooling", or "zero_balancing".
12726    #[serde(default = "default_pool_type")]
12727    pub pool_type: String,
12728    /// Time of day when sweeps occur (HH:MM format).
12729    #[serde(default = "default_sweep_time")]
12730    pub sweep_time: String,
12731}
12732
12733fn default_pool_type() -> String {
12734    "zero_balancing".to_string()
12735}
12736
12737fn default_sweep_time() -> String {
12738    "16:00".to_string()
12739}
12740
12741impl Default for CashPoolingConfig {
12742    fn default() -> Self {
12743        Self {
12744            enabled: false,
12745            pool_type: default_pool_type(),
12746            sweep_time: default_sweep_time(),
12747        }
12748    }
12749}
12750
12751/// Hedging configuration.
12752///
12753/// Controls generation of hedging instruments and hedge relationship designations
12754/// under ASC 815 / IFRS 9.
12755#[derive(Debug, Clone, Serialize, Deserialize)]
12756pub struct HedgingSchemaConfig {
12757    /// Whether hedging generation is enabled.
12758    #[serde(default)]
12759    pub enabled: bool,
12760    /// Target hedge ratio (0.0 to 1.0). Proportion of FX exposure to hedge.
12761    #[serde(default = "default_hedge_ratio")]
12762    pub hedge_ratio: f64,
12763    /// Types of instruments to generate (e.g., ["fx_forward", "interest_rate_swap"]).
12764    #[serde(default = "default_hedge_instruments")]
12765    pub instruments: Vec<String>,
12766    /// Whether to designate formal hedge accounting relationships.
12767    #[serde(default = "default_true")]
12768    pub hedge_accounting: bool,
12769    /// Effectiveness testing method: "dollar_offset", "regression", or "critical_terms".
12770    #[serde(default = "default_effectiveness_method")]
12771    pub effectiveness_method: String,
12772}
12773
12774fn default_hedge_ratio() -> f64 {
12775    0.75
12776}
12777
12778fn default_hedge_instruments() -> Vec<String> {
12779    vec!["fx_forward".to_string(), "interest_rate_swap".to_string()]
12780}
12781
12782fn default_effectiveness_method() -> String {
12783    "regression".to_string()
12784}
12785
12786impl Default for HedgingSchemaConfig {
12787    fn default() -> Self {
12788        Self {
12789            enabled: false,
12790            hedge_ratio: default_hedge_ratio(),
12791            instruments: default_hedge_instruments(),
12792            hedge_accounting: true,
12793            effectiveness_method: default_effectiveness_method(),
12794        }
12795    }
12796}
12797
12798/// Debt instrument configuration.
12799///
12800/// Controls generation of debt instruments (term loans, revolving credit, bonds)
12801/// with amortization schedules and financial covenants.
12802#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12803pub struct DebtSchemaConfig {
12804    /// Whether debt instrument generation is enabled.
12805    #[serde(default)]
12806    pub enabled: bool,
12807    /// Debt instrument definitions.
12808    #[serde(default)]
12809    pub instruments: Vec<DebtInstrumentDef>,
12810    /// Covenant definitions.
12811    #[serde(default)]
12812    pub covenants: Vec<CovenantDef>,
12813}
12814
12815/// Definition of a debt instrument in configuration.
12816#[derive(Debug, Clone, Serialize, Deserialize)]
12817pub struct DebtInstrumentDef {
12818    /// Instrument type: "term_loan", "revolving_credit", "bond", "commercial_paper", "bridge_loan".
12819    #[serde(rename = "type")]
12820    pub instrument_type: String,
12821    /// Principal amount (for term loans, bonds).
12822    #[serde(default)]
12823    pub principal: Option<f64>,
12824    /// Interest rate (annual, as decimal fraction).
12825    #[serde(default)]
12826    pub rate: Option<f64>,
12827    /// Maturity in months.
12828    #[serde(default)]
12829    pub maturity_months: Option<u32>,
12830    /// Facility limit (for revolving credit).
12831    #[serde(default)]
12832    pub facility: Option<f64>,
12833}
12834
12835/// Definition of a debt covenant in configuration.
12836#[derive(Debug, Clone, Serialize, Deserialize)]
12837pub struct CovenantDef {
12838    /// Covenant type: "debt_to_equity", "interest_coverage", "current_ratio",
12839    /// "net_worth", "debt_to_ebitda", "fixed_charge_coverage".
12840    #[serde(rename = "type")]
12841    pub covenant_type: String,
12842    /// Covenant threshold value.
12843    pub threshold: f64,
12844}
12845
12846/// Intercompany netting configuration.
12847///
12848/// Controls generation of multilateral netting runs.
12849#[derive(Debug, Clone, Serialize, Deserialize)]
12850pub struct NettingSchemaConfig {
12851    /// Whether netting generation is enabled.
12852    #[serde(default)]
12853    pub enabled: bool,
12854    /// Netting cycle: "daily", "weekly", or "monthly".
12855    #[serde(default = "default_netting_cycle")]
12856    pub cycle: String,
12857}
12858
12859fn default_netting_cycle() -> String {
12860    "monthly".to_string()
12861}
12862
12863impl Default for NettingSchemaConfig {
12864    fn default() -> Self {
12865        Self {
12866            enabled: false,
12867            cycle: default_netting_cycle(),
12868        }
12869    }
12870}
12871
12872/// Bank guarantee and letter of credit configuration.
12873///
12874/// Controls generation of bank guarantees, standby LCs, and performance bonds.
12875#[derive(Debug, Clone, Serialize, Deserialize)]
12876pub struct BankGuaranteeSchemaConfig {
12877    /// Whether bank guarantee generation is enabled.
12878    #[serde(default)]
12879    pub enabled: bool,
12880    /// Number of guarantees to generate.
12881    #[serde(default = "default_guarantee_count")]
12882    pub count: u32,
12883}
12884
12885fn default_guarantee_count() -> u32 {
12886    5
12887}
12888
12889impl Default for BankGuaranteeSchemaConfig {
12890    fn default() -> Self {
12891        Self {
12892            enabled: false,
12893            count: default_guarantee_count(),
12894        }
12895    }
12896}
12897
12898// ===========================================================================
12899// Project Accounting Configuration
12900// ===========================================================================
12901
12902/// Project accounting configuration.
12903///
12904/// Controls generation of project cost lines, revenue recognition,
12905/// milestones, change orders, retainage, and earned value metrics.
12906#[derive(Debug, Clone, Serialize, Deserialize)]
12907pub struct ProjectAccountingConfig {
12908    /// Whether project accounting is enabled.
12909    #[serde(default)]
12910    pub enabled: bool,
12911    /// Number of projects to generate.
12912    #[serde(default = "default_project_count")]
12913    pub project_count: u32,
12914    /// Distribution of project types (capital, internal, customer, r_and_d, maintenance, technology).
12915    #[serde(default)]
12916    pub project_types: ProjectTypeDistribution,
12917    /// WBS structure configuration.
12918    #[serde(default)]
12919    pub wbs: WbsSchemaConfig,
12920    /// Cost allocation rates (what % of source documents get project-tagged).
12921    #[serde(default)]
12922    pub cost_allocation: CostAllocationConfig,
12923    /// Revenue recognition configuration for project accounting.
12924    #[serde(default)]
12925    pub revenue_recognition: ProjectRevenueRecognitionConfig,
12926    /// Milestone configuration.
12927    #[serde(default)]
12928    pub milestones: MilestoneSchemaConfig,
12929    /// Change order configuration.
12930    #[serde(default)]
12931    pub change_orders: ChangeOrderSchemaConfig,
12932    /// Retainage configuration.
12933    #[serde(default)]
12934    pub retainage: RetainageSchemaConfig,
12935    /// Earned value management configuration.
12936    #[serde(default)]
12937    pub earned_value: EarnedValueSchemaConfig,
12938    /// Anomaly injection rate for project accounting data (0.0 to 1.0).
12939    #[serde(default = "default_project_anomaly_rate")]
12940    pub anomaly_rate: f64,
12941}
12942
12943fn default_project_count() -> u32 {
12944    10
12945}
12946
12947fn default_project_anomaly_rate() -> f64 {
12948    0.03
12949}
12950
12951impl Default for ProjectAccountingConfig {
12952    fn default() -> Self {
12953        Self {
12954            enabled: false,
12955            project_count: default_project_count(),
12956            project_types: ProjectTypeDistribution::default(),
12957            wbs: WbsSchemaConfig::default(),
12958            cost_allocation: CostAllocationConfig::default(),
12959            revenue_recognition: ProjectRevenueRecognitionConfig::default(),
12960            milestones: MilestoneSchemaConfig::default(),
12961            change_orders: ChangeOrderSchemaConfig::default(),
12962            retainage: RetainageSchemaConfig::default(),
12963            earned_value: EarnedValueSchemaConfig::default(),
12964            anomaly_rate: default_project_anomaly_rate(),
12965        }
12966    }
12967}
12968
12969/// Distribution of project types by weight.
12970#[derive(Debug, Clone, Serialize, Deserialize)]
12971pub struct ProjectTypeDistribution {
12972    /// Weight for capital projects (default 0.25).
12973    #[serde(default = "default_capital_weight")]
12974    pub capital: f64,
12975    /// Weight for internal projects (default 0.20).
12976    #[serde(default = "default_internal_weight")]
12977    pub internal: f64,
12978    /// Weight for customer projects (default 0.30).
12979    #[serde(default = "default_customer_weight")]
12980    pub customer: f64,
12981    /// Weight for R&D projects (default 0.10).
12982    #[serde(default = "default_rnd_weight")]
12983    pub r_and_d: f64,
12984    /// Weight for maintenance projects (default 0.10).
12985    #[serde(default = "default_maintenance_weight")]
12986    pub maintenance: f64,
12987    /// Weight for technology projects (default 0.05).
12988    #[serde(default = "default_technology_weight")]
12989    pub technology: f64,
12990}
12991
12992fn default_capital_weight() -> f64 {
12993    0.25
12994}
12995fn default_internal_weight() -> f64 {
12996    0.20
12997}
12998fn default_customer_weight() -> f64 {
12999    0.30
13000}
13001fn default_rnd_weight() -> f64 {
13002    0.10
13003}
13004fn default_maintenance_weight() -> f64 {
13005    0.10
13006}
13007fn default_technology_weight() -> f64 {
13008    0.05
13009}
13010
13011impl Default for ProjectTypeDistribution {
13012    fn default() -> Self {
13013        Self {
13014            capital: default_capital_weight(),
13015            internal: default_internal_weight(),
13016            customer: default_customer_weight(),
13017            r_and_d: default_rnd_weight(),
13018            maintenance: default_maintenance_weight(),
13019            technology: default_technology_weight(),
13020        }
13021    }
13022}
13023
13024/// WBS structure configuration.
13025#[derive(Debug, Clone, Serialize, Deserialize)]
13026pub struct WbsSchemaConfig {
13027    /// Maximum depth of WBS hierarchy (default 3).
13028    #[serde(default = "default_wbs_max_depth")]
13029    pub max_depth: u32,
13030    /// Minimum elements per level-1 WBS (default 2).
13031    #[serde(default = "default_wbs_min_elements")]
13032    pub min_elements_per_level: u32,
13033    /// Maximum elements per level-1 WBS (default 6).
13034    #[serde(default = "default_wbs_max_elements")]
13035    pub max_elements_per_level: u32,
13036}
13037
13038fn default_wbs_max_depth() -> u32 {
13039    3
13040}
13041fn default_wbs_min_elements() -> u32 {
13042    2
13043}
13044fn default_wbs_max_elements() -> u32 {
13045    6
13046}
13047
13048impl Default for WbsSchemaConfig {
13049    fn default() -> Self {
13050        Self {
13051            max_depth: default_wbs_max_depth(),
13052            min_elements_per_level: default_wbs_min_elements(),
13053            max_elements_per_level: default_wbs_max_elements(),
13054        }
13055    }
13056}
13057
13058/// Cost allocation rates — what fraction of each document type gets linked to a project.
13059#[derive(Debug, Clone, Serialize, Deserialize)]
13060pub struct CostAllocationConfig {
13061    /// Fraction of time entries assigned to projects (0.0 to 1.0).
13062    #[serde(default = "default_time_entry_rate")]
13063    pub time_entry_project_rate: f64,
13064    /// Fraction of expense reports assigned to projects (0.0 to 1.0).
13065    #[serde(default = "default_expense_rate")]
13066    pub expense_project_rate: f64,
13067    /// Fraction of purchase orders assigned to projects (0.0 to 1.0).
13068    #[serde(default = "default_po_rate")]
13069    pub purchase_order_project_rate: f64,
13070    /// Fraction of vendor invoices assigned to projects (0.0 to 1.0).
13071    #[serde(default = "default_vi_rate")]
13072    pub vendor_invoice_project_rate: f64,
13073}
13074
13075fn default_time_entry_rate() -> f64 {
13076    0.60
13077}
13078fn default_expense_rate() -> f64 {
13079    0.30
13080}
13081fn default_po_rate() -> f64 {
13082    0.40
13083}
13084fn default_vi_rate() -> f64 {
13085    0.35
13086}
13087
13088impl Default for CostAllocationConfig {
13089    fn default() -> Self {
13090        Self {
13091            time_entry_project_rate: default_time_entry_rate(),
13092            expense_project_rate: default_expense_rate(),
13093            purchase_order_project_rate: default_po_rate(),
13094            vendor_invoice_project_rate: default_vi_rate(),
13095        }
13096    }
13097}
13098
13099/// Revenue recognition configuration for project accounting.
13100#[derive(Debug, Clone, Serialize, Deserialize)]
13101pub struct ProjectRevenueRecognitionConfig {
13102    /// Whether revenue recognition is enabled for customer projects.
13103    #[serde(default = "default_true")]
13104    pub enabled: bool,
13105    /// Default method: "percentage_of_completion", "completed_contract", "milestone_based".
13106    #[serde(default = "default_revenue_method")]
13107    pub method: String,
13108    /// Default completion measure: "cost_to_cost", "labor_hours", "physical_completion".
13109    #[serde(default = "default_completion_measure")]
13110    pub completion_measure: String,
13111    /// Average contract value for customer projects.
13112    #[serde(default = "default_avg_contract_value")]
13113    pub avg_contract_value: f64,
13114}
13115
13116fn default_revenue_method() -> String {
13117    "percentage_of_completion".to_string()
13118}
13119fn default_completion_measure() -> String {
13120    "cost_to_cost".to_string()
13121}
13122fn default_avg_contract_value() -> f64 {
13123    500_000.0
13124}
13125
13126impl Default for ProjectRevenueRecognitionConfig {
13127    fn default() -> Self {
13128        Self {
13129            enabled: true,
13130            method: default_revenue_method(),
13131            completion_measure: default_completion_measure(),
13132            avg_contract_value: default_avg_contract_value(),
13133        }
13134    }
13135}
13136
13137/// Milestone configuration.
13138#[derive(Debug, Clone, Serialize, Deserialize)]
13139pub struct MilestoneSchemaConfig {
13140    /// Whether milestone generation is enabled.
13141    #[serde(default = "default_true")]
13142    pub enabled: bool,
13143    /// Average number of milestones per project.
13144    #[serde(default = "default_milestones_per_project")]
13145    pub avg_per_project: u32,
13146    /// Fraction of milestones that are payment milestones (0.0 to 1.0).
13147    #[serde(default = "default_payment_milestone_rate")]
13148    pub payment_milestone_rate: f64,
13149}
13150
13151fn default_milestones_per_project() -> u32 {
13152    4
13153}
13154fn default_payment_milestone_rate() -> f64 {
13155    0.50
13156}
13157
13158impl Default for MilestoneSchemaConfig {
13159    fn default() -> Self {
13160        Self {
13161            enabled: true,
13162            avg_per_project: default_milestones_per_project(),
13163            payment_milestone_rate: default_payment_milestone_rate(),
13164        }
13165    }
13166}
13167
13168/// Change order configuration.
13169#[derive(Debug, Clone, Serialize, Deserialize)]
13170pub struct ChangeOrderSchemaConfig {
13171    /// Whether change order generation is enabled.
13172    #[serde(default = "default_true")]
13173    pub enabled: bool,
13174    /// Probability that a project will have at least one change order (0.0 to 1.0).
13175    #[serde(default = "default_change_order_probability")]
13176    pub probability: f64,
13177    /// Maximum change orders per project.
13178    #[serde(default = "default_max_change_orders")]
13179    pub max_per_project: u32,
13180    /// Approval rate for change orders (0.0 to 1.0).
13181    #[serde(default = "default_change_order_approval_rate")]
13182    pub approval_rate: f64,
13183}
13184
13185fn default_change_order_probability() -> f64 {
13186    0.40
13187}
13188fn default_max_change_orders() -> u32 {
13189    3
13190}
13191fn default_change_order_approval_rate() -> f64 {
13192    0.75
13193}
13194
13195impl Default for ChangeOrderSchemaConfig {
13196    fn default() -> Self {
13197        Self {
13198            enabled: true,
13199            probability: default_change_order_probability(),
13200            max_per_project: default_max_change_orders(),
13201            approval_rate: default_change_order_approval_rate(),
13202        }
13203    }
13204}
13205
13206/// Retainage configuration.
13207#[derive(Debug, Clone, Serialize, Deserialize)]
13208pub struct RetainageSchemaConfig {
13209    /// Whether retainage is enabled.
13210    #[serde(default)]
13211    pub enabled: bool,
13212    /// Default retainage percentage (0.0 to 1.0, e.g., 0.10 for 10%).
13213    #[serde(default = "default_retainage_pct")]
13214    pub default_percentage: f64,
13215}
13216
13217fn default_retainage_pct() -> f64 {
13218    0.10
13219}
13220
13221impl Default for RetainageSchemaConfig {
13222    fn default() -> Self {
13223        Self {
13224            enabled: false,
13225            default_percentage: default_retainage_pct(),
13226        }
13227    }
13228}
13229
13230/// Earned value management (EVM) configuration.
13231#[derive(Debug, Clone, Serialize, Deserialize)]
13232pub struct EarnedValueSchemaConfig {
13233    /// Whether EVM metrics are generated.
13234    #[serde(default = "default_true")]
13235    pub enabled: bool,
13236    /// Measurement frequency: "weekly", "biweekly", "monthly".
13237    #[serde(default = "default_evm_frequency")]
13238    pub frequency: String,
13239}
13240
13241fn default_evm_frequency() -> String {
13242    "monthly".to_string()
13243}
13244
13245impl Default for EarnedValueSchemaConfig {
13246    fn default() -> Self {
13247        Self {
13248            enabled: true,
13249            frequency: default_evm_frequency(),
13250        }
13251    }
13252}
13253
13254// =============================================================================
13255// ESG / Sustainability Configuration
13256// =============================================================================
13257
13258/// Top-level ESG / sustainability reporting configuration.
13259#[derive(Debug, Clone, Serialize, Deserialize)]
13260pub struct EsgConfig {
13261    /// Whether ESG generation is enabled.
13262    #[serde(default)]
13263    pub enabled: bool,
13264    /// Environmental metrics (emissions, energy, water, waste).
13265    #[serde(default)]
13266    pub environmental: EnvironmentalConfig,
13267    /// Social metrics (diversity, pay equity, safety).
13268    #[serde(default)]
13269    pub social: SocialConfig,
13270    /// Governance metrics (board composition, ethics, compliance).
13271    #[serde(default)]
13272    pub governance: GovernanceSchemaConfig,
13273    /// Supply-chain ESG assessment settings.
13274    #[serde(default)]
13275    pub supply_chain_esg: SupplyChainEsgConfig,
13276    /// ESG reporting / disclosure framework settings.
13277    #[serde(default)]
13278    pub reporting: EsgReportingConfig,
13279    /// Climate scenario analysis settings.
13280    #[serde(default)]
13281    pub climate_scenarios: ClimateScenarioConfig,
13282    /// Anomaly injection rate for ESG data (0.0 to 1.0).
13283    #[serde(default = "default_esg_anomaly_rate")]
13284    pub anomaly_rate: f64,
13285}
13286
13287fn default_esg_anomaly_rate() -> f64 {
13288    0.02
13289}
13290
13291impl Default for EsgConfig {
13292    fn default() -> Self {
13293        Self {
13294            enabled: false,
13295            environmental: EnvironmentalConfig::default(),
13296            social: SocialConfig::default(),
13297            governance: GovernanceSchemaConfig::default(),
13298            supply_chain_esg: SupplyChainEsgConfig::default(),
13299            reporting: EsgReportingConfig::default(),
13300            climate_scenarios: ClimateScenarioConfig::default(),
13301            anomaly_rate: default_esg_anomaly_rate(),
13302        }
13303    }
13304}
13305
13306/// Country pack configuration.
13307///
13308/// Controls where to load additional country packs and per-country overrides.
13309/// When omitted, only the built-in packs (_default, US, DE, GB) are used.
13310#[derive(Debug, Clone, Serialize, Deserialize, Default)]
13311pub struct CountryPacksSchemaConfig {
13312    /// Optional directory containing additional `*.json` country packs.
13313    #[serde(default)]
13314    pub external_dir: Option<PathBuf>,
13315    /// Per-country overrides applied after loading.
13316    /// Keys are ISO 3166-1 alpha-2 codes; values are partial JSON objects
13317    /// that are deep-merged on top of the loaded pack.
13318    #[serde(default)]
13319    pub overrides: std::collections::HashMap<String, serde_json::Value>,
13320}
13321
13322/// Environmental metrics configuration.
13323#[derive(Debug, Clone, Serialize, Deserialize)]
13324pub struct EnvironmentalConfig {
13325    /// Whether environmental metrics are generated.
13326    #[serde(default = "default_true")]
13327    pub enabled: bool,
13328    /// Scope 1 (direct) emission generation settings.
13329    #[serde(default)]
13330    pub scope1: EmissionScopeConfig,
13331    /// Scope 2 (purchased energy) emission generation settings.
13332    #[serde(default)]
13333    pub scope2: EmissionScopeConfig,
13334    /// Scope 3 (value chain) emission generation settings.
13335    #[serde(default)]
13336    pub scope3: Scope3Config,
13337    /// Energy consumption tracking settings.
13338    #[serde(default)]
13339    pub energy: EnergySchemaConfig,
13340    /// Water usage tracking settings.
13341    #[serde(default)]
13342    pub water: WaterSchemaConfig,
13343    /// Waste management tracking settings.
13344    #[serde(default)]
13345    pub waste: WasteSchemaConfig,
13346}
13347
13348impl Default for EnvironmentalConfig {
13349    fn default() -> Self {
13350        Self {
13351            enabled: true,
13352            scope1: EmissionScopeConfig::default(),
13353            scope2: EmissionScopeConfig::default(),
13354            scope3: Scope3Config::default(),
13355            energy: EnergySchemaConfig::default(),
13356            water: WaterSchemaConfig::default(),
13357            waste: WasteSchemaConfig::default(),
13358        }
13359    }
13360}
13361
13362/// Configuration for a single emission scope (Scope 1 or 2).
13363#[derive(Debug, Clone, Serialize, Deserialize)]
13364pub struct EmissionScopeConfig {
13365    /// Whether this scope is enabled.
13366    #[serde(default = "default_true")]
13367    pub enabled: bool,
13368    /// Emission factor region (e.g., "US", "EU", "global").
13369    #[serde(default = "default_emission_region")]
13370    pub factor_region: String,
13371}
13372
13373fn default_emission_region() -> String {
13374    "US".to_string()
13375}
13376
13377impl Default for EmissionScopeConfig {
13378    fn default() -> Self {
13379        Self {
13380            enabled: true,
13381            factor_region: default_emission_region(),
13382        }
13383    }
13384}
13385
13386/// Scope 3 (value chain) emission configuration.
13387#[derive(Debug, Clone, Serialize, Deserialize)]
13388pub struct Scope3Config {
13389    /// Whether Scope 3 emissions are generated.
13390    #[serde(default = "default_true")]
13391    pub enabled: bool,
13392    /// Categories to include (e.g., "purchased_goods", "business_travel", "commuting").
13393    #[serde(default = "default_scope3_categories")]
13394    pub categories: Vec<String>,
13395    /// Spend-based emission intensity (kg CO2e per USD).
13396    #[serde(default = "default_spend_intensity")]
13397    pub default_spend_intensity_kg_per_usd: f64,
13398}
13399
13400fn default_scope3_categories() -> Vec<String> {
13401    vec![
13402        "purchased_goods".to_string(),
13403        "business_travel".to_string(),
13404        "employee_commuting".to_string(),
13405    ]
13406}
13407
13408fn default_spend_intensity() -> f64 {
13409    0.5
13410}
13411
13412impl Default for Scope3Config {
13413    fn default() -> Self {
13414        Self {
13415            enabled: true,
13416            categories: default_scope3_categories(),
13417            default_spend_intensity_kg_per_usd: default_spend_intensity(),
13418        }
13419    }
13420}
13421
13422/// Energy consumption configuration.
13423#[derive(Debug, Clone, Serialize, Deserialize)]
13424pub struct EnergySchemaConfig {
13425    /// Whether energy consumption tracking is enabled.
13426    #[serde(default = "default_true")]
13427    pub enabled: bool,
13428    /// Number of facilities to generate.
13429    #[serde(default = "default_facility_count")]
13430    pub facility_count: u32,
13431    /// Target percentage of energy from renewable sources (0.0 to 1.0).
13432    #[serde(default = "default_renewable_target")]
13433    pub renewable_target: f64,
13434}
13435
13436fn default_facility_count() -> u32 {
13437    5
13438}
13439
13440fn default_renewable_target() -> f64 {
13441    0.30
13442}
13443
13444impl Default for EnergySchemaConfig {
13445    fn default() -> Self {
13446        Self {
13447            enabled: true,
13448            facility_count: default_facility_count(),
13449            renewable_target: default_renewable_target(),
13450        }
13451    }
13452}
13453
13454/// Water usage configuration.
13455#[derive(Debug, Clone, Serialize, Deserialize)]
13456pub struct WaterSchemaConfig {
13457    /// Whether water usage tracking is enabled.
13458    #[serde(default = "default_true")]
13459    pub enabled: bool,
13460    /// Number of facilities with water tracking.
13461    #[serde(default = "default_water_facility_count")]
13462    pub facility_count: u32,
13463}
13464
13465fn default_water_facility_count() -> u32 {
13466    3
13467}
13468
13469impl Default for WaterSchemaConfig {
13470    fn default() -> Self {
13471        Self {
13472            enabled: true,
13473            facility_count: default_water_facility_count(),
13474        }
13475    }
13476}
13477
13478/// Waste management configuration.
13479#[derive(Debug, Clone, Serialize, Deserialize)]
13480pub struct WasteSchemaConfig {
13481    /// Whether waste tracking is enabled.
13482    #[serde(default = "default_true")]
13483    pub enabled: bool,
13484    /// Target diversion rate (0.0 to 1.0).
13485    #[serde(default = "default_diversion_target")]
13486    pub diversion_target: f64,
13487}
13488
13489fn default_diversion_target() -> f64 {
13490    0.50
13491}
13492
13493impl Default for WasteSchemaConfig {
13494    fn default() -> Self {
13495        Self {
13496            enabled: true,
13497            diversion_target: default_diversion_target(),
13498        }
13499    }
13500}
13501
13502/// Social metrics configuration.
13503#[derive(Debug, Clone, Serialize, Deserialize)]
13504pub struct SocialConfig {
13505    /// Whether social metrics are generated.
13506    #[serde(default = "default_true")]
13507    pub enabled: bool,
13508    /// Workforce diversity tracking settings.
13509    #[serde(default)]
13510    pub diversity: DiversitySchemaConfig,
13511    /// Pay equity analysis settings.
13512    #[serde(default)]
13513    pub pay_equity: PayEquitySchemaConfig,
13514    /// Safety incident and metrics settings.
13515    #[serde(default)]
13516    pub safety: SafetySchemaConfig,
13517}
13518
13519impl Default for SocialConfig {
13520    fn default() -> Self {
13521        Self {
13522            enabled: true,
13523            diversity: DiversitySchemaConfig::default(),
13524            pay_equity: PayEquitySchemaConfig::default(),
13525            safety: SafetySchemaConfig::default(),
13526        }
13527    }
13528}
13529
13530/// Workforce diversity configuration.
13531#[derive(Debug, Clone, Serialize, Deserialize)]
13532pub struct DiversitySchemaConfig {
13533    /// Whether diversity metrics are generated.
13534    #[serde(default = "default_true")]
13535    pub enabled: bool,
13536    /// Dimensions to track (e.g., "gender", "ethnicity", "age_group").
13537    #[serde(default = "default_diversity_dimensions")]
13538    pub dimensions: Vec<String>,
13539}
13540
13541fn default_diversity_dimensions() -> Vec<String> {
13542    vec![
13543        "gender".to_string(),
13544        "ethnicity".to_string(),
13545        "age_group".to_string(),
13546    ]
13547}
13548
13549impl Default for DiversitySchemaConfig {
13550    fn default() -> Self {
13551        Self {
13552            enabled: true,
13553            dimensions: default_diversity_dimensions(),
13554        }
13555    }
13556}
13557
13558/// Pay equity analysis configuration.
13559#[derive(Debug, Clone, Serialize, Deserialize)]
13560pub struct PayEquitySchemaConfig {
13561    /// Whether pay equity analysis is generated.
13562    #[serde(default = "default_true")]
13563    pub enabled: bool,
13564    /// Target pay gap threshold for flagging (e.g., 0.05 = 5% gap).
13565    #[serde(default = "default_pay_gap_threshold")]
13566    pub gap_threshold: f64,
13567}
13568
13569fn default_pay_gap_threshold() -> f64 {
13570    0.05
13571}
13572
13573impl Default for PayEquitySchemaConfig {
13574    fn default() -> Self {
13575        Self {
13576            enabled: true,
13577            gap_threshold: default_pay_gap_threshold(),
13578        }
13579    }
13580}
13581
13582/// Safety metrics configuration.
13583#[derive(Debug, Clone, Serialize, Deserialize)]
13584pub struct SafetySchemaConfig {
13585    /// Whether safety metrics are generated.
13586    #[serde(default = "default_true")]
13587    pub enabled: bool,
13588    /// Average annual recordable incidents per 200,000 hours.
13589    #[serde(default = "default_trir_target")]
13590    pub target_trir: f64,
13591    /// Number of safety incidents to generate.
13592    #[serde(default = "default_incident_count")]
13593    pub incident_count: u32,
13594}
13595
13596fn default_trir_target() -> f64 {
13597    2.5
13598}
13599
13600fn default_incident_count() -> u32 {
13601    20
13602}
13603
13604impl Default for SafetySchemaConfig {
13605    fn default() -> Self {
13606        Self {
13607            enabled: true,
13608            target_trir: default_trir_target(),
13609            incident_count: default_incident_count(),
13610        }
13611    }
13612}
13613
13614/// Governance metrics configuration.
13615#[derive(Debug, Clone, Serialize, Deserialize)]
13616pub struct GovernanceSchemaConfig {
13617    /// Whether governance metrics are generated.
13618    #[serde(default = "default_true")]
13619    pub enabled: bool,
13620    /// Number of board members.
13621    #[serde(default = "default_board_size")]
13622    pub board_size: u32,
13623    /// Target independent director ratio (0.0 to 1.0).
13624    #[serde(default = "default_independence_target")]
13625    pub independence_target: f64,
13626}
13627
13628fn default_board_size() -> u32 {
13629    11
13630}
13631
13632fn default_independence_target() -> f64 {
13633    0.67
13634}
13635
13636impl Default for GovernanceSchemaConfig {
13637    fn default() -> Self {
13638        Self {
13639            enabled: true,
13640            board_size: default_board_size(),
13641            independence_target: default_independence_target(),
13642        }
13643    }
13644}
13645
13646/// Supply-chain ESG assessment configuration.
13647#[derive(Debug, Clone, Serialize, Deserialize)]
13648pub struct SupplyChainEsgConfig {
13649    /// Whether supply chain ESG assessments are generated.
13650    #[serde(default = "default_true")]
13651    pub enabled: bool,
13652    /// Proportion of vendors to assess (0.0 to 1.0).
13653    #[serde(default = "default_assessment_coverage")]
13654    pub assessment_coverage: f64,
13655    /// High-risk country codes for automatic flagging.
13656    #[serde(default = "default_high_risk_countries")]
13657    pub high_risk_countries: Vec<String>,
13658}
13659
13660fn default_assessment_coverage() -> f64 {
13661    0.80
13662}
13663
13664fn default_high_risk_countries() -> Vec<String> {
13665    vec!["CN".to_string(), "BD".to_string(), "MM".to_string()]
13666}
13667
13668impl Default for SupplyChainEsgConfig {
13669    fn default() -> Self {
13670        Self {
13671            enabled: true,
13672            assessment_coverage: default_assessment_coverage(),
13673            high_risk_countries: default_high_risk_countries(),
13674        }
13675    }
13676}
13677
13678/// ESG reporting / disclosure framework configuration.
13679#[derive(Debug, Clone, Serialize, Deserialize)]
13680pub struct EsgReportingConfig {
13681    /// Whether ESG disclosures are generated.
13682    #[serde(default = "default_true")]
13683    pub enabled: bool,
13684    /// Frameworks to generate disclosures for.
13685    #[serde(default = "default_esg_frameworks")]
13686    pub frameworks: Vec<String>,
13687    /// Whether materiality assessment is performed.
13688    #[serde(default = "default_true")]
13689    pub materiality_assessment: bool,
13690    /// Materiality threshold for impact dimension (0.0 to 1.0).
13691    #[serde(default = "default_materiality_threshold")]
13692    pub impact_threshold: f64,
13693    /// Materiality threshold for financial dimension (0.0 to 1.0).
13694    #[serde(default = "default_materiality_threshold")]
13695    pub financial_threshold: f64,
13696}
13697
13698fn default_esg_frameworks() -> Vec<String> {
13699    vec!["GRI".to_string(), "ESRS".to_string()]
13700}
13701
13702fn default_materiality_threshold() -> f64 {
13703    0.6
13704}
13705
13706impl Default for EsgReportingConfig {
13707    fn default() -> Self {
13708        Self {
13709            enabled: true,
13710            frameworks: default_esg_frameworks(),
13711            materiality_assessment: true,
13712            impact_threshold: default_materiality_threshold(),
13713            financial_threshold: default_materiality_threshold(),
13714        }
13715    }
13716}
13717
13718/// Climate scenario analysis configuration.
13719#[derive(Debug, Clone, Serialize, Deserialize)]
13720pub struct ClimateScenarioConfig {
13721    /// Whether climate scenario analysis is generated.
13722    #[serde(default)]
13723    pub enabled: bool,
13724    /// Scenarios to model (e.g., "net_zero_2050", "stated_policies", "current_trajectory").
13725    #[serde(default = "default_climate_scenarios")]
13726    pub scenarios: Vec<String>,
13727    /// Time horizons in years to project.
13728    #[serde(default = "default_time_horizons")]
13729    pub time_horizons: Vec<u32>,
13730}
13731
13732fn default_climate_scenarios() -> Vec<String> {
13733    vec![
13734        "net_zero_2050".to_string(),
13735        "stated_policies".to_string(),
13736        "current_trajectory".to_string(),
13737    ]
13738}
13739
13740fn default_time_horizons() -> Vec<u32> {
13741    vec![5, 10, 30]
13742}
13743
13744impl Default for ClimateScenarioConfig {
13745    fn default() -> Self {
13746        Self {
13747            enabled: false,
13748            scenarios: default_climate_scenarios(),
13749            time_horizons: default_time_horizons(),
13750        }
13751    }
13752}
13753
13754// ===== Counterfactual Simulation Scenarios =====
13755
13756/// Configuration for counterfactual simulation scenarios.
13757#[derive(Debug, Clone, Serialize, Deserialize, Default)]
13758pub struct ScenariosConfig {
13759    /// Whether scenario generation is enabled.
13760    #[serde(default)]
13761    pub enabled: bool,
13762    /// List of scenario definitions.
13763    #[serde(default)]
13764    pub scenarios: Vec<ScenarioSchemaConfig>,
13765    /// Causal model configuration.
13766    #[serde(default)]
13767    pub causal_model: CausalModelSchemaConfig,
13768    /// Default settings applied to all scenarios.
13769    #[serde(default)]
13770    pub defaults: ScenarioDefaultsConfig,
13771    /// Generate counterfactual (original, mutated) JE pairs for ML training.
13772    /// When true, the orchestrator produces paired clean/anomalous journal entries.
13773    #[serde(default)]
13774    pub generate_counterfactuals: bool,
13775}
13776
13777/// A single scenario definition in the config.
13778#[derive(Debug, Clone, Serialize, Deserialize)]
13779pub struct ScenarioSchemaConfig {
13780    /// Scenario name (must be unique).
13781    pub name: String,
13782    /// Human-readable description.
13783    #[serde(default)]
13784    pub description: String,
13785    /// Tags for categorization.
13786    #[serde(default)]
13787    pub tags: Vec<String>,
13788    /// Base scenario name (None = default config).
13789    pub base: Option<String>,
13790    /// IFRS 9-style probability weight.
13791    pub probability_weight: Option<f64>,
13792    /// List of interventions to apply.
13793    #[serde(default)]
13794    pub interventions: Vec<InterventionSchemaConfig>,
13795    /// Constraint overrides for this scenario.
13796    #[serde(default)]
13797    pub constraints: ScenarioConstraintsSchemaConfig,
13798    /// Output configuration for this scenario.
13799    #[serde(default)]
13800    pub output: ScenarioOutputSchemaConfig,
13801    /// Arbitrary metadata.
13802    #[serde(default)]
13803    pub metadata: std::collections::HashMap<String, String>,
13804}
13805
13806/// An intervention definition in the config.
13807#[derive(Debug, Clone, Serialize, Deserialize)]
13808pub struct InterventionSchemaConfig {
13809    /// Intervention type and parameters (flattened tagged enum).
13810    #[serde(flatten)]
13811    pub intervention_type: serde_json::Value,
13812    /// Timing configuration.
13813    #[serde(default)]
13814    pub timing: InterventionTimingSchemaConfig,
13815    /// Human-readable label.
13816    pub label: Option<String>,
13817    /// Priority for conflict resolution (higher wins).
13818    #[serde(default)]
13819    pub priority: u32,
13820}
13821
13822/// Timing configuration for an intervention.
13823#[derive(Debug, Clone, Serialize, Deserialize)]
13824pub struct InterventionTimingSchemaConfig {
13825    /// Month offset from start (1-indexed).
13826    #[serde(default = "default_start_month")]
13827    pub start_month: u32,
13828    /// Duration in months.
13829    pub duration_months: Option<u32>,
13830    /// Onset type: "sudden", "gradual", "oscillating", "custom".
13831    #[serde(default = "default_onset")]
13832    pub onset: String,
13833    /// Ramp period in months.
13834    pub ramp_months: Option<u32>,
13835}
13836
13837fn default_start_month() -> u32 {
13838    1
13839}
13840
13841fn default_onset() -> String {
13842    "sudden".to_string()
13843}
13844
13845impl Default for InterventionTimingSchemaConfig {
13846    fn default() -> Self {
13847        Self {
13848            start_month: 1,
13849            duration_months: None,
13850            onset: "sudden".to_string(),
13851            ramp_months: None,
13852        }
13853    }
13854}
13855
13856/// Scenario constraint overrides.
13857#[derive(Debug, Clone, Serialize, Deserialize)]
13858pub struct ScenarioConstraintsSchemaConfig {
13859    #[serde(default = "default_true")]
13860    pub preserve_accounting_identity: bool,
13861    #[serde(default = "default_true")]
13862    pub preserve_document_chains: bool,
13863    #[serde(default = "default_true")]
13864    pub preserve_period_close: bool,
13865    #[serde(default = "default_true")]
13866    pub preserve_balance_coherence: bool,
13867    #[serde(default)]
13868    pub custom: Vec<CustomConstraintSchemaConfig>,
13869}
13870
13871impl Default for ScenarioConstraintsSchemaConfig {
13872    fn default() -> Self {
13873        Self {
13874            preserve_accounting_identity: true,
13875            preserve_document_chains: true,
13876            preserve_period_close: true,
13877            preserve_balance_coherence: true,
13878            custom: Vec::new(),
13879        }
13880    }
13881}
13882
13883/// Custom constraint in config.
13884#[derive(Debug, Clone, Serialize, Deserialize)]
13885pub struct CustomConstraintSchemaConfig {
13886    pub config_path: String,
13887    pub min: Option<f64>,
13888    pub max: Option<f64>,
13889    #[serde(default)]
13890    pub description: String,
13891}
13892
13893/// Output configuration for a scenario.
13894#[derive(Debug, Clone, Serialize, Deserialize)]
13895pub struct ScenarioOutputSchemaConfig {
13896    #[serde(default = "default_true")]
13897    pub paired: bool,
13898    #[serde(default = "default_diff_formats_schema")]
13899    pub diff_formats: Vec<String>,
13900    #[serde(default)]
13901    pub diff_scope: Vec<String>,
13902}
13903
13904fn default_diff_formats_schema() -> Vec<String> {
13905    vec!["summary".to_string(), "aggregate".to_string()]
13906}
13907
13908impl Default for ScenarioOutputSchemaConfig {
13909    fn default() -> Self {
13910        Self {
13911            paired: true,
13912            diff_formats: default_diff_formats_schema(),
13913            diff_scope: Vec::new(),
13914        }
13915    }
13916}
13917
13918/// Causal model configuration.
13919#[derive(Debug, Clone, Serialize, Deserialize)]
13920pub struct CausalModelSchemaConfig {
13921    /// Preset name: "default", "minimal", or "custom".
13922    #[serde(default = "default_causal_preset")]
13923    pub preset: String,
13924    /// Custom nodes (merged with preset).
13925    #[serde(default)]
13926    pub nodes: Vec<serde_json::Value>,
13927    /// Custom edges (merged with preset).
13928    #[serde(default)]
13929    pub edges: Vec<serde_json::Value>,
13930}
13931
13932fn default_causal_preset() -> String {
13933    "default".to_string()
13934}
13935
13936impl Default for CausalModelSchemaConfig {
13937    fn default() -> Self {
13938        Self {
13939            preset: "default".to_string(),
13940            nodes: Vec::new(),
13941            edges: Vec::new(),
13942        }
13943    }
13944}
13945
13946/// Default settings applied to all scenarios.
13947#[derive(Debug, Clone, Serialize, Deserialize, Default)]
13948pub struct ScenarioDefaultsConfig {
13949    #[serde(default)]
13950    pub constraints: ScenarioConstraintsSchemaConfig,
13951    #[serde(default)]
13952    pub output: ScenarioOutputSchemaConfig,
13953}
13954
13955// =====================================================================
13956// Compliance Regulations Framework Configuration
13957// =====================================================================
13958
13959/// Top-level configuration for the compliance regulations framework.
13960///
13961/// Controls standards registry, jurisdiction profiles, temporal versioning,
13962/// audit procedure templates, compliance graph integration, and output settings.
13963///
13964/// # Example
13965///
13966/// ```yaml
13967/// compliance_regulations:
13968///   enabled: true
13969///   jurisdictions: [US, DE, GB]
13970///   reference_date: "2025-06-30"
13971///   standards_selection:
13972///     categories: [accounting, auditing, regulatory]
13973///     include: ["IFRS-16", "ASC-606"]
13974///   audit_procedures:
13975///     enabled: true
13976///     procedures_per_standard: 3
13977///   findings:
13978///     enabled: true
13979///     finding_rate: 0.05
13980///   filings:
13981///     enabled: true
13982///   graph:
13983///     enabled: true
13984///     include_compliance_nodes: true
13985///     include_compliance_edges: true
13986/// ```
13987#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13988pub struct ComplianceRegulationsConfig {
13989    /// Master switch for the compliance regulations framework.
13990    #[serde(default)]
13991    pub enabled: bool,
13992    /// Jurisdictions to generate compliance data for (ISO 3166-1 alpha-2 codes).
13993    /// If empty, inferred from company countries in the config.
13994    #[serde(default)]
13995    pub jurisdictions: Vec<String>,
13996    /// Reference date for temporal standard resolution (YYYY-MM-DD).
13997    /// Defaults to the global start_date if not set.
13998    #[serde(default)]
13999    pub reference_date: Option<String>,
14000    /// Standards selection filters.
14001    #[serde(default)]
14002    pub standards_selection: StandardsSelectionConfig,
14003    /// Audit procedure generation settings.
14004    #[serde(default)]
14005    pub audit_procedures: AuditProcedureGenConfig,
14006    /// Compliance finding generation settings.
14007    #[serde(default)]
14008    pub findings: ComplianceFindingGenConfig,
14009    /// Regulatory filing generation settings.
14010    #[serde(default)]
14011    pub filings: ComplianceFilingGenConfig,
14012    /// Compliance graph integration settings.
14013    #[serde(default)]
14014    pub graph: ComplianceGraphConfig,
14015    /// Output settings for compliance-specific files.
14016    #[serde(default)]
14017    pub output: ComplianceOutputConfig,
14018    /// v3.3.0: legal-document generation (engagement letters,
14019    /// management reps, legal opinions, regulatory filings, board
14020    /// resolutions). Requires `compliance_regulations.enabled = true`
14021    /// AND `legal_documents.enabled = true` to take effect.
14022    #[serde(default)]
14023    pub legal_documents: LegalDocumentsConfig,
14024}
14025
14026/// Legal-document generation settings (v3.3.0+).
14027///
14028/// Wires `LegalDocumentGenerator` into the orchestrator. Generates one
14029/// batch per audit engagement when enabled.
14030#[derive(Debug, Clone, Serialize, Deserialize)]
14031pub struct LegalDocumentsConfig {
14032    /// Master switch.
14033    #[serde(default)]
14034    pub enabled: bool,
14035    /// Probability of including a legal-opinion document in an engagement.
14036    #[serde(default = "default_legal_opinion_probability")]
14037    pub legal_opinion_probability: f64,
14038}
14039
14040fn default_legal_opinion_probability() -> f64 {
14041    0.40
14042}
14043
14044impl Default for LegalDocumentsConfig {
14045    fn default() -> Self {
14046        Self {
14047            enabled: false,
14048            legal_opinion_probability: default_legal_opinion_probability(),
14049        }
14050    }
14051}
14052
14053/// Filters which standards are included in the generation.
14054#[derive(Debug, Clone, Default, Serialize, Deserialize)]
14055pub struct StandardsSelectionConfig {
14056    /// Standard categories to include (accounting, auditing, regulatory, tax, esg).
14057    /// Empty = all categories.
14058    #[serde(default)]
14059    pub categories: Vec<String>,
14060    /// Explicit standard IDs to include (e.g., ["IFRS-16", "ASC-606"]).
14061    /// When non-empty, only these standards (plus mandatory ones for selected jurisdictions) are used.
14062    #[serde(default)]
14063    pub include: Vec<String>,
14064    /// Standard IDs to exclude.
14065    #[serde(default)]
14066    pub exclude: Vec<String>,
14067    /// Include superseded standards in the output (for historical analysis).
14068    #[serde(default)]
14069    pub include_superseded: bool,
14070}
14071
14072/// Configuration for audit procedure template generation.
14073#[derive(Debug, Clone, Serialize, Deserialize)]
14074pub struct AuditProcedureGenConfig {
14075    /// Whether audit procedure generation is enabled.
14076    #[serde(default)]
14077    pub enabled: bool,
14078    /// Number of procedures to generate per applicable standard.
14079    #[serde(default = "default_procedures_per_standard")]
14080    pub procedures_per_standard: usize,
14081    /// Sampling methodology: "statistical", "non_statistical", "mixed".
14082    #[serde(default = "default_sampling_method")]
14083    pub sampling_method: String,
14084    /// Confidence level for statistical sampling (0.0-1.0).
14085    #[serde(default = "default_confidence_level")]
14086    pub confidence_level: f64,
14087    /// Tolerable misstatement rate for sampling (0.0-1.0).
14088    #[serde(default = "default_tolerable_misstatement")]
14089    pub tolerable_misstatement: f64,
14090}
14091
14092fn default_procedures_per_standard() -> usize {
14093    3
14094}
14095
14096fn default_sampling_method() -> String {
14097    "statistical".to_string()
14098}
14099
14100fn default_confidence_level() -> f64 {
14101    0.95
14102}
14103
14104fn default_tolerable_misstatement() -> f64 {
14105    0.05
14106}
14107
14108impl Default for AuditProcedureGenConfig {
14109    fn default() -> Self {
14110        Self {
14111            enabled: false,
14112            procedures_per_standard: default_procedures_per_standard(),
14113            sampling_method: default_sampling_method(),
14114            confidence_level: default_confidence_level(),
14115            tolerable_misstatement: default_tolerable_misstatement(),
14116        }
14117    }
14118}
14119
14120/// Configuration for compliance finding generation.
14121#[derive(Debug, Clone, Serialize, Deserialize)]
14122pub struct ComplianceFindingGenConfig {
14123    /// Whether finding generation is enabled.
14124    #[serde(default)]
14125    pub enabled: bool,
14126    /// Rate of findings per audit procedure (0.0-1.0).
14127    #[serde(default = "default_finding_rate")]
14128    pub finding_rate: f64,
14129    /// Rate of material weakness findings among all findings (0.0-1.0).
14130    #[serde(default = "default_cr_material_weakness_rate")]
14131    pub material_weakness_rate: f64,
14132    /// Rate of significant deficiency findings among all findings (0.0-1.0).
14133    #[serde(default = "default_cr_significant_deficiency_rate")]
14134    pub significant_deficiency_rate: f64,
14135    /// Whether to generate remediation plans for findings.
14136    #[serde(default = "default_true")]
14137    pub generate_remediation: bool,
14138}
14139
14140fn default_finding_rate() -> f64 {
14141    0.05
14142}
14143
14144fn default_cr_material_weakness_rate() -> f64 {
14145    0.02
14146}
14147
14148fn default_cr_significant_deficiency_rate() -> f64 {
14149    0.08
14150}
14151
14152impl Default for ComplianceFindingGenConfig {
14153    fn default() -> Self {
14154        Self {
14155            enabled: false,
14156            finding_rate: default_finding_rate(),
14157            material_weakness_rate: default_cr_material_weakness_rate(),
14158            significant_deficiency_rate: default_cr_significant_deficiency_rate(),
14159            generate_remediation: true,
14160        }
14161    }
14162}
14163
14164/// Configuration for regulatory filing generation.
14165#[derive(Debug, Clone, Serialize, Deserialize)]
14166pub struct ComplianceFilingGenConfig {
14167    /// Whether filing generation is enabled.
14168    #[serde(default)]
14169    pub enabled: bool,
14170    /// Filing types to include (e.g., ["10-K", "10-Q", "Jahresabschluss"]).
14171    /// Empty = all applicable filings for the selected jurisdictions.
14172    #[serde(default)]
14173    pub filing_types: Vec<String>,
14174    /// Generate filing status progression (draft → filed → accepted).
14175    #[serde(default = "default_true")]
14176    pub generate_status_progression: bool,
14177}
14178
14179impl Default for ComplianceFilingGenConfig {
14180    fn default() -> Self {
14181        Self {
14182            enabled: false,
14183            filing_types: Vec::new(),
14184            generate_status_progression: true,
14185        }
14186    }
14187}
14188
14189/// Configuration for compliance graph integration.
14190#[derive(Debug, Clone, Serialize, Deserialize)]
14191pub struct ComplianceGraphConfig {
14192    /// Whether compliance graph integration is enabled.
14193    #[serde(default)]
14194    pub enabled: bool,
14195    /// Include compliance nodes (Standard, Regulation, Jurisdiction, etc.).
14196    #[serde(default = "default_true")]
14197    pub include_compliance_nodes: bool,
14198    /// Include compliance edges (MapsToStandard, TestsControl, etc.).
14199    #[serde(default = "default_true")]
14200    pub include_compliance_edges: bool,
14201    /// Include cross-reference edges between standards.
14202    #[serde(default = "default_true")]
14203    pub include_cross_references: bool,
14204    /// Include temporal supersession edges.
14205    #[serde(default)]
14206    pub include_supersession_edges: bool,
14207    /// Include edges linking standards to the GL account types they govern.
14208    #[serde(default = "default_true")]
14209    pub include_account_links: bool,
14210    /// Include edges linking standards to the internal controls that implement them.
14211    #[serde(default = "default_true")]
14212    pub include_control_links: bool,
14213    /// Include edges linking filings and jurisdictions to the originating company.
14214    #[serde(default = "default_true")]
14215    pub include_company_links: bool,
14216}
14217
14218impl Default for ComplianceGraphConfig {
14219    fn default() -> Self {
14220        Self {
14221            enabled: false,
14222            include_compliance_nodes: true,
14223            include_compliance_edges: true,
14224            include_cross_references: true,
14225            include_supersession_edges: false,
14226            include_account_links: true,
14227            include_control_links: true,
14228            include_company_links: true,
14229        }
14230    }
14231}
14232
14233/// Output settings for compliance-specific data files.
14234#[derive(Debug, Clone, Serialize, Deserialize)]
14235pub struct ComplianceOutputConfig {
14236    /// Export the standards registry catalog.
14237    #[serde(default = "default_true")]
14238    pub export_registry: bool,
14239    /// Export jurisdiction profiles.
14240    #[serde(default = "default_true")]
14241    pub export_jurisdictions: bool,
14242    /// Export cross-reference map.
14243    #[serde(default = "default_true")]
14244    pub export_cross_references: bool,
14245    /// Export temporal version history.
14246    #[serde(default)]
14247    pub export_version_history: bool,
14248}
14249
14250impl Default for ComplianceOutputConfig {
14251    fn default() -> Self {
14252        Self {
14253            export_registry: true,
14254            export_jurisdictions: true,
14255            export_cross_references: true,
14256            export_version_history: false,
14257        }
14258    }
14259}
14260
14261#[cfg(test)]
14262#[allow(clippy::unwrap_used)]
14263mod tests {
14264    use super::*;
14265    use crate::presets::demo_preset;
14266
14267    // ==========================================================================
14268    // Serialization/Deserialization Tests
14269    // ==========================================================================
14270
14271    #[test]
14272    fn test_config_yaml_roundtrip() {
14273        let config = demo_preset();
14274        let yaml = serde_yaml::to_string(&config).expect("Failed to serialize to YAML");
14275        let deserialized: GeneratorConfig =
14276            serde_yaml::from_str(&yaml).expect("Failed to deserialize from YAML");
14277
14278        assert_eq!(
14279            config.global.period_months,
14280            deserialized.global.period_months
14281        );
14282        assert_eq!(config.global.industry, deserialized.global.industry);
14283        assert_eq!(config.companies.len(), deserialized.companies.len());
14284        assert_eq!(config.companies[0].code, deserialized.companies[0].code);
14285    }
14286
14287    #[test]
14288    fn test_config_json_roundtrip() {
14289        // Create a config without infinity values (JSON can't serialize f64::INFINITY)
14290        let mut config = demo_preset();
14291        // Replace infinity with a large but finite value for JSON compatibility
14292        config.master_data.employees.approval_limits.executive = 1e12;
14293
14294        let json = serde_json::to_string(&config).expect("Failed to serialize to JSON");
14295        let deserialized: GeneratorConfig =
14296            serde_json::from_str(&json).expect("Failed to deserialize from JSON");
14297
14298        assert_eq!(
14299            config.global.period_months,
14300            deserialized.global.period_months
14301        );
14302        assert_eq!(config.global.industry, deserialized.global.industry);
14303        assert_eq!(config.companies.len(), deserialized.companies.len());
14304    }
14305
14306    #[test]
14307    fn test_transaction_volume_serialization() {
14308        // Test various transaction volumes serialize correctly
14309        let volumes = vec![
14310            (TransactionVolume::TenK, "ten_k"),
14311            (TransactionVolume::HundredK, "hundred_k"),
14312            (TransactionVolume::OneM, "one_m"),
14313            (TransactionVolume::TenM, "ten_m"),
14314            (TransactionVolume::HundredM, "hundred_m"),
14315        ];
14316
14317        for (volume, expected_key) in volumes {
14318            let json = serde_json::to_string(&volume).expect("Failed to serialize");
14319            assert!(
14320                json.contains(expected_key),
14321                "Expected {} in JSON: {}",
14322                expected_key,
14323                json
14324            );
14325        }
14326    }
14327
14328    #[test]
14329    fn test_transaction_volume_custom_serialization() {
14330        let volume = TransactionVolume::Custom(12345);
14331        let json = serde_json::to_string(&volume).expect("Failed to serialize");
14332        let deserialized: TransactionVolume =
14333            serde_json::from_str(&json).expect("Failed to deserialize");
14334        assert_eq!(deserialized.count(), 12345);
14335    }
14336
14337    #[test]
14338    fn test_output_mode_serialization() {
14339        let modes = vec![
14340            OutputMode::Streaming,
14341            OutputMode::FlatFile,
14342            OutputMode::Both,
14343        ];
14344
14345        for mode in modes {
14346            let json = serde_json::to_string(&mode).expect("Failed to serialize");
14347            let deserialized: OutputMode =
14348                serde_json::from_str(&json).expect("Failed to deserialize");
14349            assert!(format!("{:?}", mode) == format!("{:?}", deserialized));
14350        }
14351    }
14352
14353    #[test]
14354    fn test_file_format_serialization() {
14355        let formats = vec![
14356            FileFormat::Csv,
14357            FileFormat::Parquet,
14358            FileFormat::Json,
14359            FileFormat::JsonLines,
14360        ];
14361
14362        for format in formats {
14363            let json = serde_json::to_string(&format).expect("Failed to serialize");
14364            let deserialized: FileFormat =
14365                serde_json::from_str(&json).expect("Failed to deserialize");
14366            assert!(format!("{:?}", format) == format!("{:?}", deserialized));
14367        }
14368    }
14369
14370    #[test]
14371    fn test_compression_algorithm_serialization() {
14372        let algos = vec![
14373            CompressionAlgorithm::Gzip,
14374            CompressionAlgorithm::Zstd,
14375            CompressionAlgorithm::Lz4,
14376            CompressionAlgorithm::Snappy,
14377        ];
14378
14379        for algo in algos {
14380            let json = serde_json::to_string(&algo).expect("Failed to serialize");
14381            let deserialized: CompressionAlgorithm =
14382                serde_json::from_str(&json).expect("Failed to deserialize");
14383            assert!(format!("{:?}", algo) == format!("{:?}", deserialized));
14384        }
14385    }
14386
14387    #[test]
14388    fn test_transfer_pricing_method_serialization() {
14389        let methods = vec![
14390            TransferPricingMethod::CostPlus,
14391            TransferPricingMethod::ComparableUncontrolled,
14392            TransferPricingMethod::ResalePrice,
14393            TransferPricingMethod::TransactionalNetMargin,
14394            TransferPricingMethod::ProfitSplit,
14395        ];
14396
14397        for method in methods {
14398            let json = serde_json::to_string(&method).expect("Failed to serialize");
14399            let deserialized: TransferPricingMethod =
14400                serde_json::from_str(&json).expect("Failed to deserialize");
14401            assert!(format!("{:?}", method) == format!("{:?}", deserialized));
14402        }
14403    }
14404
14405    #[test]
14406    fn test_benford_exemption_serialization() {
14407        let exemptions = vec![
14408            BenfordExemption::Recurring,
14409            BenfordExemption::Payroll,
14410            BenfordExemption::FixedFees,
14411            BenfordExemption::RoundAmounts,
14412        ];
14413
14414        for exemption in exemptions {
14415            let json = serde_json::to_string(&exemption).expect("Failed to serialize");
14416            let deserialized: BenfordExemption =
14417                serde_json::from_str(&json).expect("Failed to deserialize");
14418            assert!(format!("{:?}", exemption) == format!("{:?}", deserialized));
14419        }
14420    }
14421
14422    // ==========================================================================
14423    // Default Value Tests
14424    // ==========================================================================
14425
14426    #[test]
14427    fn test_global_config_defaults() {
14428        let yaml = r#"
14429            industry: manufacturing
14430            start_date: "2024-01-01"
14431            period_months: 6
14432        "#;
14433        let config: GlobalConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14434        assert_eq!(config.group_currency, "USD");
14435        assert!(config.parallel);
14436        assert_eq!(config.worker_threads, 0);
14437        assert_eq!(config.memory_limit_mb, 0);
14438    }
14439
14440    #[test]
14441    fn test_fraud_config_defaults() {
14442        let config = FraudConfig::default();
14443        assert!(!config.enabled);
14444        assert_eq!(config.fraud_rate, 0.005);
14445        assert!(!config.clustering_enabled);
14446    }
14447
14448    #[test]
14449    fn test_internal_controls_config_defaults() {
14450        let config = InternalControlsConfig::default();
14451        assert!(!config.enabled);
14452        assert_eq!(config.exception_rate, 0.02);
14453        assert_eq!(config.sod_violation_rate, 0.01);
14454        assert!(config.export_control_master_data);
14455        assert_eq!(config.sox_materiality_threshold, 10000.0);
14456        // COSO fields
14457        assert!(config.coso_enabled);
14458        assert!(!config.include_entity_level_controls);
14459        assert_eq!(config.target_maturity_level, "mixed");
14460    }
14461
14462    #[test]
14463    fn test_output_config_defaults() {
14464        let config = OutputConfig::default();
14465        assert!(matches!(config.mode, OutputMode::FlatFile));
14466        assert_eq!(config.formats, vec![FileFormat::Parquet]);
14467        assert!(config.compression.enabled);
14468        assert!(matches!(
14469            config.compression.algorithm,
14470            CompressionAlgorithm::Zstd
14471        ));
14472        assert!(config.include_acdoca);
14473        assert!(!config.include_bseg);
14474        assert!(config.partition_by_period);
14475        assert!(!config.partition_by_company);
14476    }
14477
14478    #[test]
14479    fn test_approval_config_defaults() {
14480        let config = ApprovalConfig::default();
14481        assert!(!config.enabled);
14482        assert_eq!(config.auto_approve_threshold, 1000.0);
14483        assert_eq!(config.rejection_rate, 0.02);
14484        assert_eq!(config.revision_rate, 0.05);
14485        assert_eq!(config.average_approval_delay_hours, 4.0);
14486        assert_eq!(config.thresholds.len(), 4);
14487    }
14488
14489    #[test]
14490    fn test_p2p_flow_config_defaults() {
14491        let config = P2PFlowConfig::default();
14492        assert!(config.enabled);
14493        assert_eq!(config.three_way_match_rate, 0.95);
14494        assert_eq!(config.partial_delivery_rate, 0.15);
14495        assert_eq!(config.average_po_to_gr_days, 14);
14496    }
14497
14498    #[test]
14499    fn test_o2c_flow_config_defaults() {
14500        let config = O2CFlowConfig::default();
14501        assert!(config.enabled);
14502        assert_eq!(config.credit_check_failure_rate, 0.02);
14503        assert_eq!(config.return_rate, 0.03);
14504        assert_eq!(config.bad_debt_rate, 0.01);
14505    }
14506
14507    #[test]
14508    fn test_balance_config_defaults() {
14509        let config = BalanceConfig::default();
14510        assert!(!config.generate_opening_balances);
14511        assert!(config.generate_trial_balances);
14512        assert_eq!(config.target_gross_margin, 0.35);
14513        assert!(config.validate_balance_equation);
14514        assert!(config.reconcile_subledgers);
14515    }
14516
14517    // ==========================================================================
14518    // Partial Config Deserialization Tests
14519    // ==========================================================================
14520
14521    #[test]
14522    fn test_partial_config_with_defaults() {
14523        // Minimal config that should use all defaults
14524        let yaml = r#"
14525            global:
14526              industry: manufacturing
14527              start_date: "2024-01-01"
14528              period_months: 3
14529            companies:
14530              - code: "TEST"
14531                name: "Test Company"
14532                currency: "USD"
14533                country: "US"
14534                annual_transaction_volume: ten_k
14535            chart_of_accounts:
14536              complexity: small
14537            output:
14538              output_directory: "./output"
14539        "#;
14540
14541        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14542        assert_eq!(config.global.period_months, 3);
14543        assert_eq!(config.companies.len(), 1);
14544        assert!(!config.fraud.enabled); // Default
14545        assert!(!config.internal_controls.enabled); // Default
14546    }
14547
14548    #[test]
14549    fn test_config_with_fraud_enabled() {
14550        let yaml = r#"
14551            global:
14552              industry: retail
14553              start_date: "2024-01-01"
14554              period_months: 12
14555            companies:
14556              - code: "RETAIL"
14557                name: "Retail Co"
14558                currency: "USD"
14559                country: "US"
14560                annual_transaction_volume: hundred_k
14561            chart_of_accounts:
14562              complexity: medium
14563            output:
14564              output_directory: "./output"
14565            fraud:
14566              enabled: true
14567              fraud_rate: 0.05
14568              clustering_enabled: true
14569        "#;
14570
14571        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14572        assert!(config.fraud.enabled);
14573        assert_eq!(config.fraud.fraud_rate, 0.05);
14574        assert!(config.fraud.clustering_enabled);
14575    }
14576
14577    #[test]
14578    fn test_config_with_multiple_companies() {
14579        let yaml = r#"
14580            global:
14581              industry: manufacturing
14582              start_date: "2024-01-01"
14583              period_months: 6
14584            companies:
14585              - code: "HQ"
14586                name: "Headquarters"
14587                currency: "USD"
14588                country: "US"
14589                annual_transaction_volume: hundred_k
14590                volume_weight: 1.0
14591              - code: "EU"
14592                name: "European Subsidiary"
14593                currency: "EUR"
14594                country: "DE"
14595                annual_transaction_volume: hundred_k
14596                volume_weight: 0.5
14597              - code: "APAC"
14598                name: "Asia Pacific"
14599                currency: "JPY"
14600                country: "JP"
14601                annual_transaction_volume: ten_k
14602                volume_weight: 0.3
14603            chart_of_accounts:
14604              complexity: large
14605            output:
14606              output_directory: "./output"
14607        "#;
14608
14609        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14610        assert_eq!(config.companies.len(), 3);
14611        assert_eq!(config.companies[0].code, "HQ");
14612        assert_eq!(config.companies[1].currency, "EUR");
14613        assert_eq!(config.companies[2].volume_weight, 0.3);
14614    }
14615
14616    #[test]
14617    fn test_intercompany_config() {
14618        let yaml = r#"
14619            enabled: true
14620            ic_transaction_rate: 0.20
14621            transfer_pricing_method: cost_plus
14622            markup_percent: 0.08
14623            generate_matched_pairs: true
14624            generate_eliminations: true
14625        "#;
14626
14627        let config: IntercompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14628        assert!(config.enabled);
14629        assert_eq!(config.ic_transaction_rate, 0.20);
14630        assert!(matches!(
14631            config.transfer_pricing_method,
14632            TransferPricingMethod::CostPlus
14633        ));
14634        assert_eq!(config.markup_percent, 0.08);
14635        assert!(config.generate_eliminations);
14636    }
14637
14638    // ==========================================================================
14639    // Company Config Tests
14640    // ==========================================================================
14641
14642    #[test]
14643    fn test_company_config_defaults() {
14644        let yaml = r#"
14645            code: "TEST"
14646            name: "Test Company"
14647            currency: "USD"
14648            country: "US"
14649            annual_transaction_volume: ten_k
14650        "#;
14651
14652        let config: CompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14653        assert_eq!(config.fiscal_year_variant, "K4"); // Default
14654        assert_eq!(config.volume_weight, 1.0); // Default
14655    }
14656
14657    // ==========================================================================
14658    // Chart of Accounts Config Tests
14659    // ==========================================================================
14660
14661    #[test]
14662    fn test_coa_config_defaults() {
14663        let yaml = r#"
14664            complexity: medium
14665        "#;
14666
14667        let config: ChartOfAccountsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14668        assert!(config.industry_specific); // Default true
14669        assert!(config.custom_accounts.is_none());
14670        assert_eq!(config.min_hierarchy_depth, 2); // Default
14671        assert_eq!(config.max_hierarchy_depth, 5); // Default
14672    }
14673
14674    // ==========================================================================
14675    // Accounting Standards Config Tests
14676    // ==========================================================================
14677
14678    #[test]
14679    fn test_accounting_standards_config_defaults() {
14680        let config = AccountingStandardsConfig::default();
14681        assert!(!config.enabled);
14682        assert!(config.framework.is_none());
14683        assert!(!config.revenue_recognition.enabled);
14684        assert!(!config.leases.enabled);
14685        assert!(!config.fair_value.enabled);
14686        assert!(!config.impairment.enabled);
14687        assert!(!config.generate_differences);
14688    }
14689
14690    #[test]
14691    fn test_accounting_standards_config_yaml() {
14692        let yaml = r#"
14693            enabled: true
14694            framework: ifrs
14695            revenue_recognition:
14696              enabled: true
14697              generate_contracts: true
14698              avg_obligations_per_contract: 2.5
14699              variable_consideration_rate: 0.20
14700              over_time_recognition_rate: 0.35
14701              contract_count: 150
14702            leases:
14703              enabled: true
14704              lease_count: 75
14705              finance_lease_percent: 0.25
14706              avg_lease_term_months: 48
14707            generate_differences: true
14708        "#;
14709
14710        let config: AccountingStandardsConfig =
14711            serde_yaml::from_str(yaml).expect("Failed to parse");
14712        assert!(config.enabled);
14713        assert!(matches!(
14714            config.framework,
14715            Some(AccountingFrameworkConfig::Ifrs)
14716        ));
14717        assert!(config.revenue_recognition.enabled);
14718        assert_eq!(config.revenue_recognition.contract_count, 150);
14719        assert_eq!(config.revenue_recognition.avg_obligations_per_contract, 2.5);
14720        assert!(config.leases.enabled);
14721        assert_eq!(config.leases.lease_count, 75);
14722        assert_eq!(config.leases.finance_lease_percent, 0.25);
14723        assert!(config.generate_differences);
14724    }
14725
14726    #[test]
14727    fn test_accounting_framework_serialization() {
14728        let frameworks = [
14729            AccountingFrameworkConfig::UsGaap,
14730            AccountingFrameworkConfig::Ifrs,
14731            AccountingFrameworkConfig::DualReporting,
14732            AccountingFrameworkConfig::FrenchGaap,
14733            AccountingFrameworkConfig::GermanGaap,
14734        ];
14735
14736        for framework in frameworks {
14737            let json = serde_json::to_string(&framework).expect("Failed to serialize");
14738            let deserialized: AccountingFrameworkConfig =
14739                serde_json::from_str(&json).expect("Failed to deserialize");
14740            assert!(format!("{:?}", framework) == format!("{:?}", deserialized));
14741        }
14742    }
14743
14744    #[test]
14745    fn test_revenue_recognition_config_defaults() {
14746        let config = RevenueRecognitionConfig::default();
14747        assert!(!config.enabled);
14748        assert!(config.generate_contracts);
14749        assert_eq!(config.avg_obligations_per_contract, 2.0);
14750        assert_eq!(config.variable_consideration_rate, 0.15);
14751        assert_eq!(config.over_time_recognition_rate, 0.30);
14752        assert_eq!(config.contract_count, 100);
14753    }
14754
14755    #[test]
14756    fn test_lease_accounting_config_defaults() {
14757        let config = LeaseAccountingConfig::default();
14758        assert!(!config.enabled);
14759        assert_eq!(config.lease_count, 50);
14760        assert_eq!(config.finance_lease_percent, 0.30);
14761        assert_eq!(config.avg_lease_term_months, 60);
14762        assert!(config.generate_amortization);
14763        assert_eq!(config.real_estate_percent, 0.40);
14764    }
14765
14766    #[test]
14767    fn test_fair_value_config_defaults() {
14768        let config = FairValueConfig::default();
14769        assert!(!config.enabled);
14770        assert_eq!(config.measurement_count, 25);
14771        assert_eq!(config.level1_percent, 0.40);
14772        assert_eq!(config.level2_percent, 0.35);
14773        assert_eq!(config.level3_percent, 0.25);
14774        assert!(!config.include_sensitivity_analysis);
14775    }
14776
14777    #[test]
14778    fn test_impairment_config_defaults() {
14779        let config = ImpairmentConfig::default();
14780        assert!(!config.enabled);
14781        assert_eq!(config.test_count, 15);
14782        assert_eq!(config.impairment_rate, 0.10);
14783        assert!(config.generate_projections);
14784        assert!(!config.include_goodwill);
14785    }
14786
14787    // ==========================================================================
14788    // Audit Standards Config Tests
14789    // ==========================================================================
14790
14791    #[test]
14792    fn test_audit_standards_config_defaults() {
14793        let config = AuditStandardsConfig::default();
14794        assert!(!config.enabled);
14795        assert!(!config.isa_compliance.enabled);
14796        assert!(!config.analytical_procedures.enabled);
14797        assert!(!config.confirmations.enabled);
14798        assert!(!config.opinion.enabled);
14799        assert!(!config.generate_audit_trail);
14800        assert!(!config.sox.enabled);
14801        assert!(!config.pcaob.enabled);
14802    }
14803
14804    #[test]
14805    fn test_audit_standards_config_yaml() {
14806        let yaml = r#"
14807            enabled: true
14808            isa_compliance:
14809              enabled: true
14810              compliance_level: comprehensive
14811              generate_isa_mappings: true
14812              include_pcaob: true
14813              framework: dual
14814            analytical_procedures:
14815              enabled: true
14816              procedures_per_account: 5
14817              variance_probability: 0.25
14818            confirmations:
14819              enabled: true
14820              confirmation_count: 75
14821              positive_response_rate: 0.90
14822              exception_rate: 0.08
14823            opinion:
14824              enabled: true
14825              generate_kam: true
14826              average_kam_count: 4
14827            sox:
14828              enabled: true
14829              generate_302_certifications: true
14830              generate_404_assessments: true
14831              material_weakness_rate: 0.03
14832            pcaob:
14833              enabled: true
14834              is_pcaob_audit: true
14835              include_icfr_opinion: true
14836            generate_audit_trail: true
14837        "#;
14838
14839        let config: AuditStandardsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14840        assert!(config.enabled);
14841        assert!(config.isa_compliance.enabled);
14842        assert_eq!(config.isa_compliance.compliance_level, "comprehensive");
14843        assert!(config.isa_compliance.include_pcaob);
14844        assert_eq!(config.isa_compliance.framework, "dual");
14845        assert!(config.analytical_procedures.enabled);
14846        assert_eq!(config.analytical_procedures.procedures_per_account, 5);
14847        assert!(config.confirmations.enabled);
14848        assert_eq!(config.confirmations.confirmation_count, 75);
14849        assert!(config.opinion.enabled);
14850        assert_eq!(config.opinion.average_kam_count, 4);
14851        assert!(config.sox.enabled);
14852        assert!(config.sox.generate_302_certifications);
14853        assert_eq!(config.sox.material_weakness_rate, 0.03);
14854        assert!(config.pcaob.enabled);
14855        assert!(config.pcaob.is_pcaob_audit);
14856        assert!(config.pcaob.include_icfr_opinion);
14857        assert!(config.generate_audit_trail);
14858    }
14859
14860    #[test]
14861    fn test_isa_compliance_config_defaults() {
14862        let config = IsaComplianceConfig::default();
14863        assert!(!config.enabled);
14864        assert_eq!(config.compliance_level, "standard");
14865        assert!(config.generate_isa_mappings);
14866        assert!(config.generate_coverage_summary);
14867        assert!(!config.include_pcaob);
14868        assert_eq!(config.framework, "isa");
14869    }
14870
14871    #[test]
14872    fn test_sox_compliance_config_defaults() {
14873        let config = SoxComplianceConfig::default();
14874        assert!(!config.enabled);
14875        assert!(config.generate_302_certifications);
14876        assert!(config.generate_404_assessments);
14877        assert_eq!(config.materiality_threshold, 10000.0);
14878        assert_eq!(config.material_weakness_rate, 0.02);
14879        assert_eq!(config.significant_deficiency_rate, 0.08);
14880    }
14881
14882    #[test]
14883    fn test_pcaob_config_defaults() {
14884        let config = PcaobConfig::default();
14885        assert!(!config.enabled);
14886        assert!(!config.is_pcaob_audit);
14887        assert!(config.generate_cam);
14888        assert!(!config.include_icfr_opinion);
14889        assert!(!config.generate_standard_mappings);
14890    }
14891
14892    #[test]
14893    fn test_config_with_standards_enabled() {
14894        let yaml = r#"
14895            global:
14896              industry: financial_services
14897              start_date: "2024-01-01"
14898              period_months: 12
14899            companies:
14900              - code: "BANK"
14901                name: "Test Bank"
14902                currency: "USD"
14903                country: "US"
14904                annual_transaction_volume: hundred_k
14905            chart_of_accounts:
14906              complexity: large
14907            output:
14908              output_directory: "./output"
14909            accounting_standards:
14910              enabled: true
14911              framework: us_gaap
14912              revenue_recognition:
14913                enabled: true
14914              leases:
14915                enabled: true
14916            audit_standards:
14917              enabled: true
14918              isa_compliance:
14919                enabled: true
14920              sox:
14921                enabled: true
14922        "#;
14923
14924        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14925        assert!(config.accounting_standards.enabled);
14926        assert!(matches!(
14927            config.accounting_standards.framework,
14928            Some(AccountingFrameworkConfig::UsGaap)
14929        ));
14930        assert!(config.accounting_standards.revenue_recognition.enabled);
14931        assert!(config.accounting_standards.leases.enabled);
14932        assert!(config.audit_standards.enabled);
14933        assert!(config.audit_standards.isa_compliance.enabled);
14934        assert!(config.audit_standards.sox.enabled);
14935    }
14936
14937    // ==========================================================================
14938    // Industry-Specific Config Tests
14939    // ==========================================================================
14940
14941    #[test]
14942    fn test_industry_specific_config_defaults() {
14943        let config = IndustrySpecificConfig::default();
14944        assert!(!config.enabled);
14945        assert!(!config.manufacturing.enabled);
14946        assert!(!config.retail.enabled);
14947        assert!(!config.healthcare.enabled);
14948        assert!(!config.technology.enabled);
14949        assert!(!config.financial_services.enabled);
14950        assert!(!config.professional_services.enabled);
14951    }
14952
14953    #[test]
14954    fn test_manufacturing_config_defaults() {
14955        let config = ManufacturingConfig::default();
14956        assert!(!config.enabled);
14957        assert_eq!(config.bom_depth, 4);
14958        assert!(!config.just_in_time);
14959        assert_eq!(config.supplier_tiers, 2);
14960        assert_eq!(config.target_yield_rate, 0.97);
14961        assert_eq!(config.scrap_alert_threshold, 0.03);
14962    }
14963
14964    #[test]
14965    fn test_retail_config_defaults() {
14966        let config = RetailConfig::default();
14967        assert!(!config.enabled);
14968        assert_eq!(config.avg_daily_transactions, 500);
14969        assert!(config.loss_prevention);
14970        assert_eq!(config.shrinkage_rate, 0.015);
14971    }
14972
14973    #[test]
14974    fn test_healthcare_config_defaults() {
14975        let config = HealthcareConfig::default();
14976        assert!(!config.enabled);
14977        assert_eq!(config.facility_type, "hospital");
14978        assert_eq!(config.avg_daily_encounters, 150);
14979        assert!(config.compliance.hipaa);
14980        assert!(config.compliance.stark_law);
14981        assert!(config.coding_systems.icd10);
14982        assert!(config.coding_systems.cpt);
14983    }
14984
14985    #[test]
14986    fn test_technology_config_defaults() {
14987        let config = TechnologyConfig::default();
14988        assert!(!config.enabled);
14989        assert_eq!(config.revenue_model, "saas");
14990        assert_eq!(config.subscription_revenue_pct, 0.60);
14991        assert!(config.rd_capitalization.enabled);
14992    }
14993
14994    #[test]
14995    fn test_config_with_industry_specific() {
14996        let yaml = r#"
14997            global:
14998              industry: healthcare
14999              start_date: "2024-01-01"
15000              period_months: 12
15001            companies:
15002              - code: "HOSP"
15003                name: "Test Hospital"
15004                currency: "USD"
15005                country: "US"
15006                annual_transaction_volume: hundred_k
15007            chart_of_accounts:
15008              complexity: medium
15009            output:
15010              output_directory: "./output"
15011            industry_specific:
15012              enabled: true
15013              healthcare:
15014                enabled: true
15015                facility_type: hospital
15016                payer_mix:
15017                  medicare: 0.45
15018                  medicaid: 0.15
15019                  commercial: 0.35
15020                  self_pay: 0.05
15021                coding_systems:
15022                  icd10: true
15023                  cpt: true
15024                  drg: true
15025                compliance:
15026                  hipaa: true
15027                  stark_law: true
15028                anomaly_rates:
15029                  upcoding: 0.03
15030                  unbundling: 0.02
15031        "#;
15032
15033        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15034        assert!(config.industry_specific.enabled);
15035        assert!(config.industry_specific.healthcare.enabled);
15036        assert_eq!(
15037            config.industry_specific.healthcare.facility_type,
15038            "hospital"
15039        );
15040        assert_eq!(config.industry_specific.healthcare.payer_mix.medicare, 0.45);
15041        assert_eq!(config.industry_specific.healthcare.payer_mix.self_pay, 0.05);
15042        assert!(config.industry_specific.healthcare.coding_systems.icd10);
15043        assert!(config.industry_specific.healthcare.compliance.hipaa);
15044        assert_eq!(
15045            config.industry_specific.healthcare.anomaly_rates.upcoding,
15046            0.03
15047        );
15048    }
15049
15050    #[test]
15051    fn test_config_with_manufacturing_specific() {
15052        let yaml = r#"
15053            global:
15054              industry: manufacturing
15055              start_date: "2024-01-01"
15056              period_months: 12
15057            companies:
15058              - code: "MFG"
15059                name: "Test Manufacturing"
15060                currency: "USD"
15061                country: "US"
15062                annual_transaction_volume: hundred_k
15063            chart_of_accounts:
15064              complexity: medium
15065            output:
15066              output_directory: "./output"
15067            industry_specific:
15068              enabled: true
15069              manufacturing:
15070                enabled: true
15071                bom_depth: 5
15072                just_in_time: true
15073                supplier_tiers: 3
15074                target_yield_rate: 0.98
15075                anomaly_rates:
15076                  yield_manipulation: 0.02
15077                  phantom_production: 0.01
15078        "#;
15079
15080        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15081        assert!(config.industry_specific.enabled);
15082        assert!(config.industry_specific.manufacturing.enabled);
15083        assert_eq!(config.industry_specific.manufacturing.bom_depth, 5);
15084        assert!(config.industry_specific.manufacturing.just_in_time);
15085        assert_eq!(config.industry_specific.manufacturing.supplier_tiers, 3);
15086        assert_eq!(
15087            config.industry_specific.manufacturing.target_yield_rate,
15088            0.98
15089        );
15090        assert_eq!(
15091            config
15092                .industry_specific
15093                .manufacturing
15094                .anomaly_rates
15095                .yield_manipulation,
15096            0.02
15097        );
15098    }
15099
15100    // ==========================================================================
15101    // Tax Configuration Tests
15102    // ==========================================================================
15103
15104    #[test]
15105    fn test_tax_config_defaults() {
15106        let tax = TaxConfig::default();
15107        assert!(!tax.enabled);
15108        assert!(tax.jurisdictions.countries.is_empty());
15109        assert!(!tax.jurisdictions.include_subnational);
15110        assert!(!tax.vat_gst.enabled);
15111        assert!(tax.vat_gst.standard_rates.is_empty());
15112        assert!(tax.vat_gst.reduced_rates.is_empty());
15113        assert!(tax.vat_gst.exempt_categories.is_empty());
15114        assert!(tax.vat_gst.reverse_charge);
15115        assert!(!tax.sales_tax.enabled);
15116        assert!(tax.sales_tax.nexus_states.is_empty());
15117        assert!(!tax.withholding.enabled);
15118        assert!(tax.withholding.treaty_network);
15119        assert_eq!(tax.withholding.default_rate, 0.30);
15120        assert_eq!(tax.withholding.treaty_reduced_rate, 0.15);
15121        assert!(tax.provisions.enabled);
15122        assert_eq!(tax.provisions.statutory_rate, 0.21);
15123        assert!(tax.provisions.uncertain_positions);
15124        assert!(!tax.payroll_tax.enabled);
15125        assert_eq!(tax.anomaly_rate, 0.03);
15126    }
15127
15128    #[test]
15129    fn test_tax_config_from_yaml() {
15130        let yaml = r#"
15131            global:
15132              seed: 42
15133              start_date: "2024-01-01"
15134              period_months: 12
15135              industry: retail
15136            companies:
15137              - code: C001
15138                name: Test Corp
15139                currency: USD
15140                country: US
15141                annual_transaction_volume: ten_k
15142            chart_of_accounts:
15143              complexity: small
15144            output:
15145              output_directory: ./output
15146            tax:
15147              enabled: true
15148              anomaly_rate: 0.05
15149              jurisdictions:
15150                countries: ["US", "DE", "GB"]
15151                include_subnational: true
15152              vat_gst:
15153                enabled: true
15154                standard_rates:
15155                  DE: 0.19
15156                  GB: 0.20
15157                reduced_rates:
15158                  DE: 0.07
15159                  GB: 0.05
15160                exempt_categories:
15161                  - financial_services
15162                  - healthcare
15163                reverse_charge: false
15164              sales_tax:
15165                enabled: true
15166                nexus_states: ["CA", "NY", "TX"]
15167              withholding:
15168                enabled: true
15169                treaty_network: false
15170                default_rate: 0.25
15171                treaty_reduced_rate: 0.10
15172              provisions:
15173                enabled: false
15174                statutory_rate: 0.28
15175                uncertain_positions: false
15176              payroll_tax:
15177                enabled: true
15178        "#;
15179
15180        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15181        assert!(config.tax.enabled);
15182        assert_eq!(config.tax.anomaly_rate, 0.05);
15183
15184        // Jurisdictions
15185        assert_eq!(config.tax.jurisdictions.countries.len(), 3);
15186        assert!(config
15187            .tax
15188            .jurisdictions
15189            .countries
15190            .contains(&"DE".to_string()));
15191        assert!(config.tax.jurisdictions.include_subnational);
15192
15193        // VAT/GST
15194        assert!(config.tax.vat_gst.enabled);
15195        assert_eq!(config.tax.vat_gst.standard_rates.get("DE"), Some(&0.19));
15196        assert_eq!(config.tax.vat_gst.standard_rates.get("GB"), Some(&0.20));
15197        assert_eq!(config.tax.vat_gst.reduced_rates.get("DE"), Some(&0.07));
15198        assert_eq!(config.tax.vat_gst.exempt_categories.len(), 2);
15199        assert!(!config.tax.vat_gst.reverse_charge);
15200
15201        // Sales tax
15202        assert!(config.tax.sales_tax.enabled);
15203        assert_eq!(config.tax.sales_tax.nexus_states.len(), 3);
15204        assert!(config
15205            .tax
15206            .sales_tax
15207            .nexus_states
15208            .contains(&"CA".to_string()));
15209
15210        // Withholding
15211        assert!(config.tax.withholding.enabled);
15212        assert!(!config.tax.withholding.treaty_network);
15213        assert_eq!(config.tax.withholding.default_rate, 0.25);
15214        assert_eq!(config.tax.withholding.treaty_reduced_rate, 0.10);
15215
15216        // Provisions
15217        assert!(!config.tax.provisions.enabled);
15218        assert_eq!(config.tax.provisions.statutory_rate, 0.28);
15219        assert!(!config.tax.provisions.uncertain_positions);
15220
15221        // Payroll tax
15222        assert!(config.tax.payroll_tax.enabled);
15223    }
15224
15225    #[test]
15226    fn test_generator_config_with_tax_default() {
15227        let yaml = r#"
15228            global:
15229              seed: 42
15230              start_date: "2024-01-01"
15231              period_months: 12
15232              industry: retail
15233            companies:
15234              - code: C001
15235                name: Test Corp
15236                currency: USD
15237                country: US
15238                annual_transaction_volume: ten_k
15239            chart_of_accounts:
15240              complexity: small
15241            output:
15242              output_directory: ./output
15243        "#;
15244
15245        let config: GeneratorConfig =
15246            serde_yaml::from_str(yaml).expect("Failed to parse config without tax section");
15247        // Tax should be present with defaults when not specified in YAML
15248        assert!(!config.tax.enabled);
15249        assert!(config.tax.jurisdictions.countries.is_empty());
15250        assert_eq!(config.tax.anomaly_rate, 0.03);
15251        assert!(config.tax.provisions.enabled); // provisions default to enabled=true
15252        assert_eq!(config.tax.provisions.statutory_rate, 0.21);
15253    }
15254
15255    // ==========================================================================
15256    // SessionSchemaConfig Tests
15257    // ==========================================================================
15258
15259    #[test]
15260    fn test_session_config_default_disabled() {
15261        let yaml = "{}";
15262        let config: SessionSchemaConfig =
15263            serde_yaml::from_str(yaml).expect("Failed to parse empty session config");
15264        assert!(!config.enabled);
15265        assert!(config.checkpoint_path.is_none());
15266        assert!(config.per_period_output);
15267        assert!(config.consolidated_output);
15268    }
15269
15270    #[test]
15271    fn test_config_backward_compatible_without_session() {
15272        let yaml = r#"
15273            global:
15274              seed: 42
15275              start_date: "2024-01-01"
15276              period_months: 12
15277              industry: retail
15278            companies:
15279              - code: C001
15280                name: Test Corp
15281                currency: USD
15282                country: US
15283                annual_transaction_volume: ten_k
15284            chart_of_accounts:
15285              complexity: small
15286            output:
15287              output_directory: ./output
15288        "#;
15289
15290        let config: GeneratorConfig =
15291            serde_yaml::from_str(yaml).expect("Failed to parse config without session");
15292        // Session should default to disabled
15293        assert!(!config.session.enabled);
15294        assert!(config.session.per_period_output);
15295        assert!(config.session.consolidated_output);
15296        // fiscal_year_months should be None
15297        assert!(config.global.fiscal_year_months.is_none());
15298    }
15299
15300    #[test]
15301    fn test_fiscal_year_months_parsed() {
15302        let yaml = r#"
15303            global:
15304              seed: 42
15305              start_date: "2024-01-01"
15306              period_months: 24
15307              industry: retail
15308              fiscal_year_months: 12
15309            companies:
15310              - code: C001
15311                name: Test Corp
15312                currency: USD
15313                country: US
15314                annual_transaction_volume: ten_k
15315            chart_of_accounts:
15316              complexity: small
15317            output:
15318              output_directory: ./output
15319            session:
15320              enabled: true
15321              checkpoint_path: /tmp/checkpoints
15322              per_period_output: true
15323              consolidated_output: false
15324        "#;
15325
15326        let config: GeneratorConfig =
15327            serde_yaml::from_str(yaml).expect("Failed to parse config with fiscal_year_months");
15328        assert_eq!(config.global.fiscal_year_months, Some(12));
15329        assert!(config.session.enabled);
15330        assert_eq!(
15331            config.session.checkpoint_path,
15332            Some("/tmp/checkpoints".to_string())
15333        );
15334        assert!(config.session.per_period_output);
15335        assert!(!config.session.consolidated_output);
15336    }
15337}