Skip to main content

datasynth_config/
schema.rs

1//! Configuration schema for synthetic data generation.
2
3use datasynth_core::distributions::{
4    AmountDistributionConfig, DebitCreditDistributionConfig, EvenOddDistributionConfig,
5    LineItemDistributionConfig, SeasonalityConfig,
6};
7use datasynth_core::models::{CoAComplexity, IndustrySector};
8use serde::{Deserialize, Serialize};
9use std::path::PathBuf;
10
11/// Root configuration for the synthetic data generator.
12///
13/// # camelCase alias policy
14///
15/// Every multi-word field carries `#[serde(alias = "camelCaseName")]`
16/// so SDK clients that follow JSON conventions can submit configs
17/// without round-tripping through a snake_case transformer.
18///
19/// Before v4.4.1 several fields — `documentFlows`, `accountingStandards`,
20/// `complianceRegulations`, `analyticsMetadata` — had no alias, so SDK
21/// submissions silently fell through to defaults. The symptom was
22/// "enabling the 6 feature subsections together collapses the archive
23/// from 99 files to 19". Root cause: those four fields never parsed;
24/// the orchestrator produced far less data than requested, and
25/// `output.exportFormat` similarly fell through so journal_entries
26/// landed as the default Parquet/CSV rather than JSON.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct GeneratorConfig {
29    /// Global settings
30    pub global: GlobalConfig,
31    /// Company configuration
32    pub companies: Vec<CompanyConfig>,
33    /// Chart of Accounts configuration
34    #[serde(alias = "chartOfAccounts")]
35    pub chart_of_accounts: ChartOfAccountsConfig,
36    /// Transaction generation settings
37    #[serde(default)]
38    pub transactions: TransactionConfig,
39    /// Output configuration
40    pub output: OutputConfig,
41    /// Fraud simulation settings
42    #[serde(default)]
43    pub fraud: FraudConfig,
44    /// Data quality variation settings
45    #[serde(default, alias = "dataQuality")]
46    pub data_quality: DataQualitySchemaConfig,
47    /// Internal Controls System settings
48    #[serde(default, alias = "internalControls")]
49    pub internal_controls: InternalControlsConfig,
50    /// Business process mix
51    #[serde(default, alias = "businessProcesses")]
52    pub business_processes: BusinessProcessConfig,
53    /// User persona distribution
54    #[serde(default, alias = "userPersonas")]
55    pub user_personas: UserPersonaConfig,
56    /// Template configuration for realistic data
57    #[serde(default)]
58    pub templates: TemplateConfig,
59    /// Approval workflow configuration
60    #[serde(default)]
61    pub approval: ApprovalConfig,
62    /// Department structure configuration
63    #[serde(default)]
64    pub departments: DepartmentConfig,
65    /// Master data generation settings
66    #[serde(default, alias = "masterData")]
67    pub master_data: MasterDataConfig,
68    /// Document flow generation settings
69    #[serde(default, alias = "documentFlows")]
70    pub document_flows: DocumentFlowConfig,
71    /// Intercompany transaction settings
72    #[serde(default)]
73    pub intercompany: IntercompanyConfig,
74    /// Balance and trial balance settings
75    #[serde(default)]
76    pub balance: BalanceConfig,
77    /// OCPM (Object-Centric Process Mining) settings
78    #[serde(default)]
79    pub ocpm: OcpmConfig,
80    /// Audit engagement and workpaper generation settings
81    #[serde(default)]
82    pub audit: AuditGenerationConfig,
83    /// Banking KYC/AML transaction generation settings
84    #[serde(default)]
85    pub banking: datasynth_banking::BankingConfig,
86    /// Scenario configuration for metadata and tagging (Phase 1.3)
87    #[serde(default)]
88    pub scenario: ScenarioConfig,
89    /// Temporal drift configuration for simulating distribution changes over time (Phase 2.2)
90    #[serde(default)]
91    pub temporal: TemporalDriftConfig,
92    /// Graph export configuration for accounting network export
93    #[serde(default, alias = "graphExport")]
94    pub graph_export: GraphExportConfig,
95    /// Streaming output API configuration
96    #[serde(default)]
97    pub streaming: StreamingSchemaConfig,
98    /// Rate limiting configuration
99    #[serde(default, alias = "rateLimit")]
100    pub rate_limit: RateLimitSchemaConfig,
101    /// Temporal attribute generation configuration
102    #[serde(default, alias = "temporalAttributes")]
103    pub temporal_attributes: TemporalAttributeSchemaConfig,
104    /// Relationship generation configuration
105    #[serde(default)]
106    pub relationships: RelationshipSchemaConfig,
107    /// Accounting standards framework configuration (IFRS, US GAAP)
108    #[serde(default, alias = "accountingStandards")]
109    pub accounting_standards: AccountingStandardsConfig,
110    /// Audit standards framework configuration (ISA, PCAOB)
111    #[serde(default, alias = "auditStandards")]
112    pub audit_standards: AuditStandardsConfig,
113    /// Advanced distribution configuration (mixture models, correlations, regime changes)
114    #[serde(default)]
115    pub distributions: AdvancedDistributionConfig,
116    /// Temporal patterns configuration (business days, period-end dynamics, processing lags)
117    #[serde(default, alias = "temporalPatterns")]
118    pub temporal_patterns: TemporalPatternsConfig,
119    /// Vendor network configuration (multi-tier supply chain modeling)
120    #[serde(default, alias = "vendorNetwork")]
121    pub vendor_network: VendorNetworkSchemaConfig,
122    /// Customer segmentation configuration (value segments, lifecycle stages)
123    #[serde(default, alias = "customerSegmentation")]
124    pub customer_segmentation: CustomerSegmentationSchemaConfig,
125    /// Relationship strength calculation configuration
126    #[serde(default, alias = "relationshipStrength")]
127    pub relationship_strength: RelationshipStrengthSchemaConfig,
128    /// Cross-process link configuration (P2P ↔ O2C via inventory)
129    #[serde(default, alias = "crossProcessLinks")]
130    pub cross_process_links: CrossProcessLinksSchemaConfig,
131    /// Organizational events configuration (acquisitions, divestitures, etc.)
132    #[serde(default, alias = "organizationalEvents")]
133    pub organizational_events: OrganizationalEventsSchemaConfig,
134    /// Behavioral drift configuration (vendor, customer, employee behavior)
135    #[serde(default, alias = "behavioralDrift")]
136    pub behavioral_drift: BehavioralDriftSchemaConfig,
137    /// Market drift configuration (economic cycles, commodities, price shocks)
138    #[serde(default, alias = "marketDrift")]
139    pub market_drift: MarketDriftSchemaConfig,
140    /// Drift labeling configuration for ground truth generation
141    #[serde(default, alias = "driftLabeling")]
142    pub drift_labeling: DriftLabelingSchemaConfig,
143    /// Enhanced anomaly injection configuration (multi-stage schemes, correlated injection, near-miss)
144    #[serde(default, alias = "anomalyInjection")]
145    pub anomaly_injection: EnhancedAnomalyConfig,
146    /// Industry-specific transaction and anomaly generation configuration
147    #[serde(default, alias = "industrySpecific")]
148    pub industry_specific: IndustrySpecificConfig,
149    /// Fingerprint privacy configuration for extraction/synthesis
150    #[serde(default, alias = "fingerprintPrivacy")]
151    pub fingerprint_privacy: FingerprintPrivacyConfig,
152    /// Quality gate configuration for pass/fail thresholds
153    #[serde(default, alias = "qualityGates")]
154    pub quality_gates: QualityGatesSchemaConfig,
155    /// Compliance configuration (EU AI Act, content marking)
156    #[serde(default)]
157    pub compliance: ComplianceSchemaConfig,
158    /// Webhook notification configuration
159    #[serde(default)]
160    pub webhooks: WebhookSchemaConfig,
161    /// LLM enrichment configuration (AI-augmented vendor names, descriptions, explanations)
162    #[serde(default)]
163    pub llm: LlmSchemaConfig,
164    /// Diffusion model configuration (statistical diffusion-based data enhancement)
165    #[serde(default)]
166    pub diffusion: DiffusionSchemaConfig,
167    /// Causal generation configuration (structural causal models, interventions)
168    #[serde(default)]
169    pub causal: CausalSchemaConfig,
170
171    // ===== Enterprise Process Chain Extensions =====
172    /// Source-to-Pay (S2C/S2P) configuration (sourcing, contracts, catalogs, scorecards)
173    #[serde(default, alias = "sourceToPay")]
174    pub source_to_pay: SourceToPayConfig,
175    /// Financial reporting configuration (financial statements, KPIs, budgets)
176    #[serde(default, alias = "financialReporting")]
177    pub financial_reporting: FinancialReportingConfig,
178    /// HR process configuration (payroll, time & attendance, expenses)
179    #[serde(default)]
180    pub hr: HrConfig,
181    /// Manufacturing configuration (production orders, WIP, routing)
182    #[serde(default)]
183    pub manufacturing: ManufacturingProcessConfig,
184    /// Sales quote configuration (quote-to-order pipeline)
185    #[serde(default, alias = "salesQuotes")]
186    pub sales_quotes: SalesQuoteConfig,
187    /// Tax accounting configuration (VAT/GST, sales tax, withholding, provisions, payroll tax)
188    #[serde(default)]
189    pub tax: TaxConfig,
190    /// Treasury and cash management configuration
191    #[serde(default)]
192    pub treasury: TreasuryConfig,
193    /// Project accounting configuration
194    #[serde(default, alias = "projectAccounting")]
195    pub project_accounting: ProjectAccountingConfig,
196    /// ESG / Sustainability reporting configuration
197    #[serde(default)]
198    pub esg: EsgConfig,
199    /// Country pack configuration (external packs directory, per-country overrides)
200    #[serde(default, alias = "countryPacks")]
201    pub country_packs: Option<CountryPacksSchemaConfig>,
202    /// Counterfactual simulation scenario configuration
203    #[serde(default)]
204    pub scenarios: ScenariosConfig,
205    /// Generation session configuration (period-by-period generation with balance carry-forward)
206    #[serde(default)]
207    pub session: SessionSchemaConfig,
208    /// Compliance regulations framework configuration (standards registry, jurisdictions, temporal versioning, audit templates, graph integration)
209    #[serde(default, alias = "complianceRegulations")]
210    pub compliance_regulations: ComplianceRegulationsConfig,
211    /// v3.3.0: analytics metadata phase — prior-year comparatives,
212    /// industry benchmarks, management reports, drift events. Off by
213    /// default so v3.2.1 archives are byte-identical.
214    #[serde(default, alias = "analyticsMetadata")]
215    pub analytics_metadata: AnalyticsMetadataConfig,
216    /// Phase 1 of the central concentration abstraction (#143). Post-generation
217    /// passes over the JE batch that reshape distributional structure toward a
218    /// corpus-derived target. Off by default — see
219    /// `docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md`.
220    #[serde(default)]
221    pub concentration: ConcentrationConfig,
222}
223
224/// v3.3.0: analytics-metadata phase configuration.
225///
226/// Gates the `phase_analytics_metadata` pass that runs AFTER all
227/// JE-adding phases (including the fraud-bias sweep at Phase 20b).
228/// When enabled, the orchestrator calls `PriorYearGenerator`,
229/// `IndustryBenchmarkGenerator`, `ManagementReportGenerator`, and
230/// `DriftEventGenerator` in sequence; each sub-flag below controls
231/// whether that specific generator fires.
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct AnalyticsMetadataConfig {
234    /// Master switch for the whole analytics phase.
235    #[serde(default)]
236    pub enabled: bool,
237    /// Emit `PriorYearComparative` records derived from current
238    /// period's account balances.
239    #[serde(default = "default_true")]
240    pub prior_year: bool,
241    /// Emit `IndustryBenchmark` records for the configured industry.
242    #[serde(default = "default_true")]
243    pub industry_benchmark: bool,
244    /// Emit management-report artefacts.
245    #[serde(default = "default_true")]
246    pub management_reports: bool,
247    /// Emit `LabeledDriftEvent` records — post-generation sweep over
248    /// journal entries to label detected drift patterns.
249    #[serde(default = "default_true")]
250    pub drift_events: bool,
251}
252
253impl Default for AnalyticsMetadataConfig {
254    fn default() -> Self {
255        Self {
256            enabled: false,
257            prior_year: true,
258            industry_benchmark: true,
259            management_reports: true,
260            drift_events: true,
261        }
262    }
263}
264
265/// LLM enrichment configuration.
266///
267/// Controls AI-augmented metadata enrichment using LLM providers.
268/// When enabled, vendor names, transaction descriptions, and anomaly explanations
269/// are enriched using the configured provider (mock by default).
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct LlmSchemaConfig {
272    /// Whether LLM enrichment is enabled.
273    #[serde(default)]
274    pub enabled: bool,
275    /// Provider type: "mock", "openai", "anthropic", "custom".
276    #[serde(default = "default_llm_provider")]
277    pub provider: String,
278    /// Model name/ID for the provider.
279    #[serde(default = "default_llm_model_name")]
280    pub model: String,
281    /// Maximum number of vendor names to enrich per run.
282    #[serde(default = "default_llm_batch_size")]
283    pub max_vendor_enrichments: usize,
284
285    /// v4.1.1+: also enrich customer names at generate time.
286    /// Default `false` preserves v4.1.0 behaviour.
287    #[serde(default)]
288    pub enrich_customers: bool,
289
290    /// v4.1.1+: also enrich material descriptions at generate time.
291    /// Default `false`.
292    #[serde(default)]
293    pub enrich_materials: bool,
294
295    /// v4.1.1+: also enrich audit finding titles at generate time
296    /// (the finding narratives remain on their existing template path
297    /// because they're richer and locale-specific). Default `false`.
298    #[serde(default)]
299    pub enrich_findings: bool,
300
301    /// v4.1.1+: upper bound on customer enrichments per run. Matches
302    /// `max_vendor_enrichments` semantics.
303    #[serde(default = "default_llm_batch_size")]
304    pub max_customer_enrichments: usize,
305
306    /// v4.1.1+: upper bound on material enrichments per run.
307    #[serde(default = "default_llm_batch_size")]
308    pub max_material_enrichments: usize,
309
310    /// v4.1.1+: upper bound on finding enrichments per run.
311    #[serde(default = "default_llm_batch_size")]
312    pub max_finding_enrichments: usize,
313}
314
315fn default_llm_provider() -> String {
316    "mock".to_string()
317}
318
319fn default_llm_model_name() -> String {
320    "gpt-4o-mini".to_string()
321}
322
323fn default_llm_batch_size() -> usize {
324    50
325}
326
327impl Default for LlmSchemaConfig {
328    fn default() -> Self {
329        Self {
330            enabled: false,
331            provider: default_llm_provider(),
332            model: default_llm_model_name(),
333            max_vendor_enrichments: default_llm_batch_size(),
334            enrich_customers: false,
335            enrich_materials: false,
336            enrich_findings: false,
337            max_customer_enrichments: default_llm_batch_size(),
338            max_material_enrichments: default_llm_batch_size(),
339            max_finding_enrichments: default_llm_batch_size(),
340        }
341    }
342}
343
344/// Diffusion model configuration.
345///
346/// Controls statistical diffusion-based data enhancement that generates samples
347/// matching target distribution properties (means, standard deviations, correlations).
348#[derive(Debug, Clone, Serialize, Deserialize)]
349pub struct DiffusionSchemaConfig {
350    /// Whether diffusion enhancement is enabled.
351    #[serde(default)]
352    pub enabled: bool,
353    /// Number of diffusion steps (higher = better quality, slower).
354    #[serde(default = "default_diffusion_steps")]
355    pub n_steps: usize,
356    /// Noise schedule type: "linear", "cosine", "sigmoid".
357    #[serde(default = "default_diffusion_schedule")]
358    pub schedule: String,
359    /// Number of sample rows to generate for demonstration.
360    #[serde(default = "default_diffusion_sample_size")]
361    pub sample_size: usize,
362    /// Backend type: "statistical" (default), "neural", "hybrid".
363    #[serde(default = "default_diffusion_backend")]
364    pub backend: String,
365    /// Neural diffusion backend configuration (used when backend is "neural" or "hybrid").
366    #[serde(default)]
367    pub neural: NeuralDiffusionSchemaConfig,
368}
369
370fn default_diffusion_steps() -> usize {
371    100
372}
373
374fn default_diffusion_schedule() -> String {
375    "linear".to_string()
376}
377
378fn default_diffusion_sample_size() -> usize {
379    100
380}
381
382fn default_diffusion_backend() -> String {
383    "statistical".to_string()
384}
385
386impl Default for DiffusionSchemaConfig {
387    fn default() -> Self {
388        Self {
389            enabled: false,
390            n_steps: default_diffusion_steps(),
391            schedule: default_diffusion_schedule(),
392            sample_size: default_diffusion_sample_size(),
393            backend: default_diffusion_backend(),
394            neural: NeuralDiffusionSchemaConfig::default(),
395        }
396    }
397}
398
399/// Neural diffusion backend configuration.
400///
401/// Controls the `candle`-based neural score network that learns joint distributions
402/// from training data for the neural and hybrid diffusion backends.
403#[derive(Debug, Clone, Serialize, Deserialize)]
404pub struct NeuralDiffusionSchemaConfig {
405    /// Hidden layer dimensions for the score network MLP.
406    #[serde(default = "default_neural_hidden_dims")]
407    pub hidden_dims: Vec<usize>,
408    /// Dimensionality of the timestep embedding.
409    #[serde(default = "default_neural_timestep_embed_dim")]
410    pub timestep_embed_dim: usize,
411    /// Learning rate for training.
412    #[serde(default = "default_neural_learning_rate")]
413    pub learning_rate: f64,
414    /// Number of training epochs.
415    #[serde(default = "default_neural_training_epochs")]
416    pub training_epochs: usize,
417    /// Training batch size.
418    #[serde(default = "default_neural_batch_size")]
419    pub batch_size: usize,
420    /// Blend weight for hybrid mode (0.0 = all statistical, 1.0 = all neural).
421    #[serde(default = "default_neural_hybrid_weight")]
422    pub hybrid_weight: f64,
423    /// Hybrid blending strategy: "weighted_average", "column_select", "threshold".
424    #[serde(default = "default_neural_hybrid_strategy")]
425    pub hybrid_strategy: String,
426    /// Columns to apply neural generation to (empty = all numeric columns).
427    #[serde(default)]
428    pub neural_columns: Vec<String>,
429    /// v4.4.0+ Optional path to a pre-trained score-network checkpoint
430    /// (`.safetensors`). When set, the orchestrator loads the
431    /// checkpoint instead of training from the first batch — useful
432    /// for long-running production deployments where training cost
433    /// dominates per-run cost. When empty, the orchestrator trains
434    /// on the first generated JE amounts.
435    #[serde(default, skip_serializing_if = "Option::is_none")]
436    pub checkpoint_path: Option<String>,
437}
438
439fn default_neural_hidden_dims() -> Vec<usize> {
440    vec![256, 256, 128]
441}
442
443fn default_neural_timestep_embed_dim() -> usize {
444    64
445}
446
447fn default_neural_learning_rate() -> f64 {
448    0.001
449}
450
451fn default_neural_training_epochs() -> usize {
452    100
453}
454
455fn default_neural_batch_size() -> usize {
456    64
457}
458
459fn default_neural_hybrid_weight() -> f64 {
460    0.5
461}
462
463fn default_neural_hybrid_strategy() -> String {
464    "weighted_average".to_string()
465}
466
467impl Default for NeuralDiffusionSchemaConfig {
468    fn default() -> Self {
469        Self {
470            hidden_dims: default_neural_hidden_dims(),
471            timestep_embed_dim: default_neural_timestep_embed_dim(),
472            learning_rate: default_neural_learning_rate(),
473            training_epochs: default_neural_training_epochs(),
474            batch_size: default_neural_batch_size(),
475            hybrid_weight: default_neural_hybrid_weight(),
476            hybrid_strategy: default_neural_hybrid_strategy(),
477            neural_columns: Vec::new(),
478            checkpoint_path: None,
479        }
480    }
481}
482
483/// Causal generation configuration.
484///
485/// Controls structural causal model (SCM) based data generation that respects
486/// causal relationships between variables, supports do-calculus interventions,
487/// and enables counterfactual scenarios.
488#[derive(Debug, Clone, Serialize, Deserialize)]
489pub struct CausalSchemaConfig {
490    /// Whether causal generation is enabled.
491    #[serde(default)]
492    pub enabled: bool,
493    /// Built-in template to use: "fraud_detection", "revenue_cycle", or "custom".
494    #[serde(default = "default_causal_template")]
495    pub template: String,
496    /// Number of causal samples to generate.
497    #[serde(default = "default_causal_sample_size")]
498    pub sample_size: usize,
499    /// Whether to run causal validation on the output.
500    #[serde(default = "default_true")]
501    pub validate: bool,
502}
503
504fn default_causal_template() -> String {
505    "fraud_detection".to_string()
506}
507
508fn default_causal_sample_size() -> usize {
509    500
510}
511
512impl Default for CausalSchemaConfig {
513    fn default() -> Self {
514        Self {
515            enabled: false,
516            template: default_causal_template(),
517            sample_size: default_causal_sample_size(),
518            validate: true,
519        }
520    }
521}
522
523/// Graph export configuration for accounting network and ML training exports.
524///
525/// This section enables exporting generated data as graphs for:
526/// - Network reconstruction algorithms
527/// - Graph neural network training
528/// - Neo4j graph database import
529#[derive(Debug, Clone, Serialize, Deserialize)]
530pub struct GraphExportConfig {
531    /// Enable graph export.
532    #[serde(default)]
533    pub enabled: bool,
534
535    /// Graph types to generate.
536    #[serde(default = "default_graph_types")]
537    pub graph_types: Vec<GraphTypeConfig>,
538
539    /// Export formats to generate.
540    #[serde(default = "default_graph_formats")]
541    pub formats: Vec<GraphExportFormat>,
542
543    /// Train split ratio for ML datasets.
544    #[serde(default = "default_train_ratio")]
545    pub train_ratio: f64,
546
547    /// Validation split ratio for ML datasets.
548    #[serde(default = "default_val_ratio")]
549    pub validation_ratio: f64,
550
551    /// Random seed for train/val/test splits.
552    #[serde(default)]
553    pub split_seed: Option<u64>,
554
555    /// Output subdirectory for graph exports (relative to output directory).
556    #[serde(default = "default_graph_subdir")]
557    pub output_subdirectory: String,
558
559    /// Multi-layer hypergraph export settings for RustGraph integration.
560    #[serde(default)]
561    pub hypergraph: HypergraphExportSettings,
562
563    /// DGL-specific export settings.
564    #[serde(default)]
565    pub dgl: DglExportConfig,
566
567    /// `graphs/je_network.csv` flat edge-list export settings (v5.8.0+).
568    #[serde(default)]
569    pub je_network: JeNetworkConfig,
570}
571
572/// Method used to construct edges from journal entries when writing
573/// `graphs/je_network.csv` (v5.8.0+).
574///
575/// Reference: Ivertowski (2024), *Hardware Accelerated Method for
576/// Accounting Network Generation*, Methods A through E.
577#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
578#[serde(rename_all = "snake_case")]
579pub enum JeNetworkMethod {
580    /// Method B (full Cartesian product) for every JE — bijective on
581    /// 2-line entries (Method A) and `n × m` Cartesian for multi-line
582    /// entries with proportional amount allocation.  Produces
583    /// O(n × m) edges per JE — a 50-debit / 50-credit period-close
584    /// consolidation alone yields 2 500 edges, and a typical
585    /// HF-scale 1 M-line config can blow up to 200 M+ edges (and tens
586    /// of GB of memory). Use explicitly when downstream consumers
587    /// already depend on the Cartesian shape.
588    Cartesian,
589    /// Method A only — emit a single edge per 2-line journal entry
590    /// (1 debit + 1 credit) and skip multi-line entries entirely.
591    /// Edge count = number of 2-line JEs (≈ 60 % of entries per the
592    /// 2024 paper); per-edge confidence is exactly `1.0`.
593    ///
594    /// **Default since v5.27** (previously `Cartesian`). The Cartesian
595    /// default OOM'd small-complexity CLI smoke tests on 14-16 GB CI
596    /// runners — a 50 × 50 period-close JE alone wanted 20 GB of edge
597    /// memory. Method A is the bounded, exactness-preserving fallback
598    /// recommended for published reference datasets where size and
599    /// exactness matter more than recall on multi-line consolidations.
600    /// Set `je_network.method: cartesian` explicitly to restore the
601    /// pre-v5.27 behaviour.
602    #[default]
603    A,
604}
605
606/// Configuration for the `graphs/je_network.csv` flat edge-list
607/// export (v5.8.0+).
608#[derive(Debug, Clone, Default, Serialize, Deserialize)]
609#[serde(deny_unknown_fields)]
610pub struct JeNetworkConfig {
611    /// Edge-construction method (see [`JeNetworkMethod`]).
612    #[serde(default)]
613    pub method: JeNetworkMethod,
614}
615
616fn default_graph_types() -> Vec<GraphTypeConfig> {
617    vec![GraphTypeConfig::default()]
618}
619
620fn default_graph_formats() -> Vec<GraphExportFormat> {
621    vec![GraphExportFormat::PytorchGeometric]
622}
623
624fn default_train_ratio() -> f64 {
625    0.7
626}
627
628fn default_val_ratio() -> f64 {
629    0.15
630}
631
632fn default_graph_subdir() -> String {
633    "graphs".to_string()
634}
635
636impl Default for GraphExportConfig {
637    fn default() -> Self {
638        Self {
639            enabled: false,
640            graph_types: default_graph_types(),
641            formats: default_graph_formats(),
642            train_ratio: 0.7,
643            validation_ratio: 0.15,
644            split_seed: None,
645            output_subdirectory: "graphs".to_string(),
646            hypergraph: HypergraphExportSettings::default(),
647            dgl: DglExportConfig::default(),
648            je_network: JeNetworkConfig::default(),
649        }
650    }
651}
652
653/// DGL-specific export settings.
654#[derive(Debug, Clone, Default, Serialize, Deserialize)]
655pub struct DglExportConfig {
656    /// Export as a heterogeneous graph (distinct node/edge types).
657    ///
658    /// When `true` the DGL exporter produces a `HeteroData` object with typed
659    /// node and edge stores rather than a single homogeneous graph.
660    /// Set to `true` in `graph_export.dgl.heterogeneous: true` in YAML.
661    #[serde(default)]
662    pub heterogeneous: bool,
663}
664
665// Default derived: heterogeneous = false (bool default)
666
667/// Settings for the multi-layer hypergraph export (RustGraph integration).
668///
669/// Produces a 3-layer hypergraph:
670/// - Layer 1: Governance & Controls (COSO, SOX, internal controls, organizational)
671/// - Layer 2: Process Events (P2P/O2C document flows, OCPM events)
672/// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
673#[derive(Debug, Clone, Serialize, Deserialize)]
674pub struct HypergraphExportSettings {
675    /// Enable hypergraph export.
676    #[serde(default)]
677    pub enabled: bool,
678
679    /// Maximum total nodes across all layers (default 50000).
680    #[serde(default = "default_hypergraph_max_nodes")]
681    pub max_nodes: usize,
682
683    /// Aggregation strategy when node budget is exceeded.
684    #[serde(default = "default_aggregation_strategy")]
685    pub aggregation_strategy: String,
686
687    /// Layer 1 (Governance & Controls) settings.
688    #[serde(default)]
689    pub governance_layer: GovernanceLayerSettings,
690
691    /// Layer 2 (Process Events) settings.
692    #[serde(default)]
693    pub process_layer: ProcessLayerSettings,
694
695    /// Layer 3 (Accounting Network) settings.
696    #[serde(default)]
697    pub accounting_layer: AccountingLayerSettings,
698
699    /// Cross-layer edge generation settings.
700    #[serde(default)]
701    pub cross_layer: CrossLayerSettings,
702
703    /// Output subdirectory for hypergraph files (relative to graph output directory).
704    #[serde(default = "default_hypergraph_subdir")]
705    pub output_subdirectory: String,
706
707    /// Output format: "native" (default) for internal field names, "unified" for RustGraph format.
708    #[serde(default = "default_hypergraph_format")]
709    pub output_format: String,
710
711    /// Optional URL for streaming unified JSONL to a RustGraph ingest endpoint.
712    #[serde(default)]
713    pub stream_target: Option<String>,
714
715    /// Batch size for streaming (number of JSONL lines per HTTP POST). Default: 1000.
716    #[serde(default = "default_stream_batch_size")]
717    pub stream_batch_size: usize,
718}
719
720fn default_hypergraph_max_nodes() -> usize {
721    50_000
722}
723
724fn default_aggregation_strategy() -> String {
725    "pool_by_counterparty".to_string()
726}
727
728fn default_hypergraph_subdir() -> String {
729    "hypergraph".to_string()
730}
731
732fn default_hypergraph_format() -> String {
733    "native".to_string()
734}
735
736fn default_stream_batch_size() -> usize {
737    1000
738}
739
740impl Default for HypergraphExportSettings {
741    fn default() -> Self {
742        Self {
743            enabled: false,
744            max_nodes: 50_000,
745            aggregation_strategy: "pool_by_counterparty".to_string(),
746            governance_layer: GovernanceLayerSettings::default(),
747            process_layer: ProcessLayerSettings::default(),
748            accounting_layer: AccountingLayerSettings::default(),
749            cross_layer: CrossLayerSettings::default(),
750            output_subdirectory: "hypergraph".to_string(),
751            output_format: "native".to_string(),
752            stream_target: None,
753            stream_batch_size: 1000,
754        }
755    }
756}
757
758/// Layer 1: Governance & Controls layer settings.
759#[derive(Debug, Clone, Serialize, Deserialize)]
760pub struct GovernanceLayerSettings {
761    /// Include COSO framework nodes (5 components + 17 principles).
762    #[serde(default = "default_true")]
763    pub include_coso: bool,
764    /// Include internal control nodes.
765    #[serde(default = "default_true")]
766    pub include_controls: bool,
767    /// Include SOX assertion nodes.
768    #[serde(default = "default_true")]
769    pub include_sox: bool,
770    /// Include vendor master data nodes.
771    #[serde(default = "default_true")]
772    pub include_vendors: bool,
773    /// Include customer master data nodes.
774    #[serde(default = "default_true")]
775    pub include_customers: bool,
776    /// Include employee/organizational nodes.
777    #[serde(default = "default_true")]
778    pub include_employees: bool,
779}
780
781impl Default for GovernanceLayerSettings {
782    fn default() -> Self {
783        Self {
784            include_coso: true,
785            include_controls: true,
786            include_sox: true,
787            include_vendors: true,
788            include_customers: true,
789            include_employees: true,
790        }
791    }
792}
793
794/// Layer 2: Process Events layer settings.
795#[derive(Debug, Clone, Serialize, Deserialize)]
796pub struct ProcessLayerSettings {
797    /// Include P2P (Procure-to-Pay) document flow nodes.
798    #[serde(default = "default_true")]
799    pub include_p2p: bool,
800    /// Include O2C (Order-to-Cash) document flow nodes.
801    #[serde(default = "default_true")]
802    pub include_o2c: bool,
803    /// Include S2C (Source-to-Contract) document flow nodes.
804    #[serde(default = "default_true")]
805    pub include_s2c: bool,
806    /// Include H2R (Hire-to-Retire) document flow nodes.
807    #[serde(default = "default_true")]
808    pub include_h2r: bool,
809    /// Include MFG (Manufacturing) document flow nodes.
810    #[serde(default = "default_true")]
811    pub include_mfg: bool,
812    /// Include BANK (Banking) document flow nodes.
813    #[serde(default = "default_true")]
814    pub include_bank: bool,
815    /// Include AUDIT document flow nodes.
816    #[serde(default = "default_true")]
817    pub include_audit: bool,
818    /// Include R2R (Record-to-Report) document flow nodes (bank recon + period close).
819    #[serde(default = "default_true")]
820    pub include_r2r: bool,
821    /// Export OCPM events as hyperedges.
822    #[serde(default = "default_true")]
823    pub events_as_hyperedges: bool,
824    /// Threshold: if a counterparty has more documents than this, aggregate into pool nodes.
825    #[serde(default = "default_docs_per_counterparty_threshold")]
826    pub docs_per_counterparty_threshold: usize,
827}
828
829fn default_docs_per_counterparty_threshold() -> usize {
830    20
831}
832
833impl Default for ProcessLayerSettings {
834    fn default() -> Self {
835        Self {
836            include_p2p: true,
837            include_o2c: true,
838            include_s2c: true,
839            include_h2r: true,
840            include_mfg: true,
841            include_bank: true,
842            include_audit: true,
843            include_r2r: true,
844            events_as_hyperedges: true,
845            docs_per_counterparty_threshold: 20,
846        }
847    }
848}
849
850/// Layer 3: Accounting Network layer settings.
851#[derive(Debug, Clone, Serialize, Deserialize)]
852pub struct AccountingLayerSettings {
853    /// Include GL account nodes.
854    #[serde(default = "default_true")]
855    pub include_accounts: bool,
856    /// Export journal entries as hyperedges (debit+credit accounts as participants).
857    #[serde(default = "default_true")]
858    pub je_as_hyperedges: bool,
859}
860
861impl Default for AccountingLayerSettings {
862    fn default() -> Self {
863        Self {
864            include_accounts: true,
865            je_as_hyperedges: true,
866        }
867    }
868}
869
870/// Cross-layer edge generation settings.
871#[derive(Debug, Clone, Serialize, Deserialize)]
872pub struct CrossLayerSettings {
873    /// Generate cross-layer edges (Control→Account, Vendor→PO, etc.).
874    #[serde(default = "default_true")]
875    pub enabled: bool,
876}
877
878impl Default for CrossLayerSettings {
879    fn default() -> Self {
880        Self { enabled: true }
881    }
882}
883
884/// Configuration for a specific graph type to export.
885#[derive(Debug, Clone, Serialize, Deserialize)]
886pub struct GraphTypeConfig {
887    /// Name identifier for this graph configuration.
888    #[serde(default = "default_graph_name")]
889    pub name: String,
890
891    /// Whether to aggregate parallel edges between the same nodes.
892    #[serde(default)]
893    pub aggregate_edges: bool,
894
895    /// Minimum edge weight to include (filters out small transactions).
896    #[serde(default)]
897    pub min_edge_weight: f64,
898
899    /// Whether to include document nodes (creates hub-and-spoke structure).
900    #[serde(default)]
901    pub include_document_nodes: bool,
902}
903
904fn default_graph_name() -> String {
905    "accounting_network".to_string()
906}
907
908impl Default for GraphTypeConfig {
909    fn default() -> Self {
910        Self {
911            name: "accounting_network".to_string(),
912            aggregate_edges: false,
913            min_edge_weight: 0.0,
914            include_document_nodes: false,
915        }
916    }
917}
918
919/// Export format for graph data.
920#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
921#[serde(rename_all = "snake_case")]
922pub enum GraphExportFormat {
923    /// PyTorch Geometric format (.npy files + metadata.json).
924    PytorchGeometric,
925    /// Neo4j format (CSV files + Cypher import scripts).
926    Neo4j,
927    /// Deep Graph Library format.
928    Dgl,
929    /// RustGraph/RustAssureTwin JSON format.
930    RustGraph,
931    /// RustGraph multi-layer hypergraph format (nodes.jsonl + edges.jsonl + hyperedges.jsonl).
932    RustGraphHypergraph,
933}
934
935/// Scenario configuration for metadata, tagging, and ML training setup.
936///
937/// This section enables tracking the purpose and characteristics of a generation run.
938#[derive(Debug, Clone, Default, Serialize, Deserialize)]
939pub struct ScenarioConfig {
940    /// Tags for categorizing and filtering datasets.
941    /// Examples: "fraud_detection", "retail", "month_end_stress", "ml_training"
942    #[serde(default)]
943    pub tags: Vec<String>,
944
945    /// Data quality profile preset.
946    /// - "clean": Minimal data quality issues (0.1% missing, 0.05% typos)
947    /// - "noisy": Moderate issues (5% missing, 2% typos, 1% duplicates)
948    /// - "legacy": Heavy issues simulating legacy system data (10% missing, 5% typos)
949    #[serde(default)]
950    pub profile: Option<String>,
951
952    /// Human-readable description of the scenario purpose.
953    #[serde(default)]
954    pub description: Option<String>,
955
956    /// Whether this run is for ML training (enables balanced labeling).
957    #[serde(default)]
958    pub ml_training: bool,
959
960    /// Target anomaly class balance for ML training.
961    /// If set, anomalies will be injected to achieve this ratio.
962    #[serde(default)]
963    pub target_anomaly_ratio: Option<f64>,
964
965    /// Custom metadata key-value pairs.
966    #[serde(default)]
967    pub metadata: std::collections::HashMap<String, String>,
968}
969
970/// Temporal drift configuration for simulating distribution changes over time.
971///
972/// This enables generation of data that shows realistic temporal evolution,
973/// useful for training drift detection models and testing temporal robustness.
974#[derive(Debug, Clone, Serialize, Deserialize)]
975pub struct TemporalDriftConfig {
976    /// Enable temporal drift simulation.
977    #[serde(default)]
978    pub enabled: bool,
979
980    /// Amount mean drift per period (e.g., 0.02 = 2% mean shift per month).
981    /// Simulates gradual inflation or business growth.
982    #[serde(default = "default_amount_drift")]
983    pub amount_mean_drift: f64,
984
985    /// Amount variance drift per period (e.g., 0.01 = 1% variance increase per month).
986    /// Simulates increasing volatility over time.
987    #[serde(default)]
988    pub amount_variance_drift: f64,
989
990    /// Anomaly rate drift per period (e.g., 0.001 = 0.1% increase per month).
991    /// Simulates increasing fraud attempts or degrading controls.
992    #[serde(default)]
993    pub anomaly_rate_drift: f64,
994
995    /// Concept drift rate - how quickly feature distributions change (0.0-1.0).
996    /// Higher values cause more rapid distribution shifts.
997    #[serde(default = "default_concept_drift")]
998    pub concept_drift_rate: f64,
999
1000    /// Sudden drift events - probability of a sudden distribution shift in any period.
1001    #[serde(default)]
1002    pub sudden_drift_probability: f64,
1003
1004    /// Magnitude of sudden drift events when they occur (multiplier).
1005    #[serde(default = "default_sudden_drift_magnitude")]
1006    pub sudden_drift_magnitude: f64,
1007
1008    /// Seasonal drift - enable cyclic patterns that repeat annually.
1009    #[serde(default)]
1010    pub seasonal_drift: bool,
1011
1012    /// Drift start period (0 = from beginning). Use to simulate stable baseline before drift.
1013    #[serde(default)]
1014    pub drift_start_period: u32,
1015
1016    /// Drift type: "gradual", "sudden", "recurring", "mixed"
1017    #[serde(default = "default_drift_type")]
1018    pub drift_type: DriftType,
1019}
1020
1021fn default_amount_drift() -> f64 {
1022    0.02
1023}
1024
1025fn default_concept_drift() -> f64 {
1026    0.01
1027}
1028
1029fn default_sudden_drift_magnitude() -> f64 {
1030    2.0
1031}
1032
1033fn default_drift_type() -> DriftType {
1034    DriftType::Gradual
1035}
1036
1037impl Default for TemporalDriftConfig {
1038    fn default() -> Self {
1039        Self {
1040            enabled: false,
1041            amount_mean_drift: 0.02,
1042            amount_variance_drift: 0.0,
1043            anomaly_rate_drift: 0.0,
1044            concept_drift_rate: 0.01,
1045            sudden_drift_probability: 0.0,
1046            sudden_drift_magnitude: 2.0,
1047            seasonal_drift: false,
1048            drift_start_period: 0,
1049            drift_type: DriftType::Gradual,
1050        }
1051    }
1052}
1053
1054impl TemporalDriftConfig {
1055    /// Convert to core DriftConfig for use in generators.
1056    pub fn to_core_config(&self) -> datasynth_core::distributions::DriftConfig {
1057        datasynth_core::distributions::DriftConfig {
1058            enabled: self.enabled,
1059            amount_mean_drift: self.amount_mean_drift,
1060            amount_variance_drift: self.amount_variance_drift,
1061            anomaly_rate_drift: self.anomaly_rate_drift,
1062            concept_drift_rate: self.concept_drift_rate,
1063            sudden_drift_probability: self.sudden_drift_probability,
1064            sudden_drift_magnitude: self.sudden_drift_magnitude,
1065            seasonal_drift: self.seasonal_drift,
1066            drift_start_period: self.drift_start_period,
1067            drift_type: match self.drift_type {
1068                DriftType::Gradual => datasynth_core::distributions::DriftType::Gradual,
1069                DriftType::Sudden => datasynth_core::distributions::DriftType::Sudden,
1070                DriftType::Recurring => datasynth_core::distributions::DriftType::Recurring,
1071                DriftType::Mixed => datasynth_core::distributions::DriftType::Mixed,
1072            },
1073            regime_changes: Vec::new(),
1074            economic_cycle: Default::default(),
1075            parameter_drifts: Vec::new(),
1076        }
1077    }
1078}
1079
1080/// Types of temporal drift patterns.
1081#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1082#[serde(rename_all = "snake_case")]
1083pub enum DriftType {
1084    /// Gradual, continuous drift over time (like inflation).
1085    #[default]
1086    Gradual,
1087    /// Sudden, point-in-time shifts (like policy changes).
1088    Sudden,
1089    /// Recurring patterns that cycle (like seasonal variations).
1090    Recurring,
1091    /// Combination of gradual background drift with occasional sudden shifts.
1092    Mixed,
1093}
1094
1095// ============================================================================
1096// Streaming Output API Configuration (Phase 2)
1097// ============================================================================
1098
1099/// Configuration for streaming output API.
1100#[derive(Debug, Clone, Serialize, Deserialize)]
1101pub struct StreamingSchemaConfig {
1102    /// Enable streaming output.
1103    #[serde(default)]
1104    pub enabled: bool,
1105    /// Target events per second (0 = unlimited, default 0).
1106    #[serde(default)]
1107    pub events_per_second: f64,
1108    /// Token bucket burst size (default 100).
1109    #[serde(default = "default_burst_size")]
1110    pub burst_size: u32,
1111    /// Buffer size for streaming (number of items).
1112    #[serde(default = "default_buffer_size")]
1113    pub buffer_size: usize,
1114    /// Enable progress reporting.
1115    #[serde(default = "default_true")]
1116    pub enable_progress: bool,
1117    /// Progress reporting interval (number of items).
1118    #[serde(default = "default_progress_interval")]
1119    pub progress_interval: u64,
1120    /// Backpressure strategy.
1121    #[serde(default)]
1122    pub backpressure: BackpressureSchemaStrategy,
1123}
1124
1125fn default_buffer_size() -> usize {
1126    1000
1127}
1128
1129fn default_progress_interval() -> u64 {
1130    100
1131}
1132
1133impl Default for StreamingSchemaConfig {
1134    fn default() -> Self {
1135        Self {
1136            enabled: false,
1137            events_per_second: 0.0,
1138            burst_size: 100,
1139            buffer_size: 1000,
1140            enable_progress: true,
1141            progress_interval: 100,
1142            backpressure: BackpressureSchemaStrategy::Block,
1143        }
1144    }
1145}
1146
1147/// Backpressure strategy for streaming output.
1148#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1149#[serde(rename_all = "snake_case")]
1150pub enum BackpressureSchemaStrategy {
1151    /// Block until space is available in the buffer.
1152    #[default]
1153    Block,
1154    /// Drop oldest items when buffer is full.
1155    DropOldest,
1156    /// Drop newest items when buffer is full.
1157    DropNewest,
1158    /// Buffer overflow items up to a limit, then block.
1159    Buffer,
1160}
1161
1162// ============================================================================
1163// Rate Limiting Configuration (Phase 5)
1164// ============================================================================
1165
1166/// Configuration for rate limiting.
1167#[derive(Debug, Clone, Serialize, Deserialize)]
1168pub struct RateLimitSchemaConfig {
1169    /// Enable rate limiting.
1170    #[serde(default)]
1171    pub enabled: bool,
1172    /// Entities per second limit.
1173    #[serde(default = "default_entities_per_second")]
1174    pub entities_per_second: f64,
1175    /// Burst size (number of tokens in bucket).
1176    #[serde(default = "default_burst_size")]
1177    pub burst_size: u32,
1178    /// Backpressure strategy for rate limiting.
1179    #[serde(default)]
1180    pub backpressure: RateLimitBackpressureSchema,
1181}
1182
1183fn default_entities_per_second() -> f64 {
1184    1000.0
1185}
1186
1187fn default_burst_size() -> u32 {
1188    100
1189}
1190
1191impl Default for RateLimitSchemaConfig {
1192    fn default() -> Self {
1193        Self {
1194            enabled: false,
1195            entities_per_second: 1000.0,
1196            burst_size: 100,
1197            backpressure: RateLimitBackpressureSchema::Block,
1198        }
1199    }
1200}
1201
1202/// Backpressure strategy for rate limiting.
1203#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1204#[serde(rename_all = "snake_case")]
1205pub enum RateLimitBackpressureSchema {
1206    /// Block until rate allows.
1207    #[default]
1208    Block,
1209    /// Drop items that exceed rate.
1210    Drop,
1211    /// Buffer items and process when rate allows.
1212    Buffer,
1213}
1214
1215// ============================================================================
1216// Temporal Attribute Generation Configuration (Phase 3)
1217// ============================================================================
1218
1219/// Configuration for temporal attribute generation.
1220#[derive(Debug, Clone, Serialize, Deserialize)]
1221pub struct TemporalAttributeSchemaConfig {
1222    /// Enable temporal attribute generation.
1223    #[serde(default)]
1224    pub enabled: bool,
1225    /// Valid time configuration.
1226    #[serde(default)]
1227    pub valid_time: ValidTimeSchemaConfig,
1228    /// Transaction time configuration.
1229    #[serde(default)]
1230    pub transaction_time: TransactionTimeSchemaConfig,
1231    /// Generate version chains for entities.
1232    #[serde(default)]
1233    pub generate_version_chains: bool,
1234    /// Average number of versions per entity.
1235    #[serde(default = "default_avg_versions")]
1236    pub avg_versions_per_entity: f64,
1237}
1238
1239fn default_avg_versions() -> f64 {
1240    1.5
1241}
1242
1243impl Default for TemporalAttributeSchemaConfig {
1244    fn default() -> Self {
1245        Self {
1246            enabled: false,
1247            valid_time: ValidTimeSchemaConfig::default(),
1248            transaction_time: TransactionTimeSchemaConfig::default(),
1249            generate_version_chains: false,
1250            avg_versions_per_entity: 1.5,
1251        }
1252    }
1253}
1254
1255/// Configuration for valid time (business time) generation.
1256#[derive(Debug, Clone, Serialize, Deserialize)]
1257pub struct ValidTimeSchemaConfig {
1258    /// Probability that valid_to is set (entity has ended validity).
1259    #[serde(default = "default_closed_probability")]
1260    pub closed_probability: f64,
1261    /// Average validity duration in days.
1262    #[serde(default = "default_avg_validity_days")]
1263    pub avg_validity_days: u32,
1264    /// Standard deviation of validity duration in days.
1265    #[serde(default = "default_validity_stddev")]
1266    pub validity_stddev_days: u32,
1267}
1268
1269fn default_closed_probability() -> f64 {
1270    0.1
1271}
1272
1273fn default_avg_validity_days() -> u32 {
1274    365
1275}
1276
1277fn default_validity_stddev() -> u32 {
1278    90
1279}
1280
1281impl Default for ValidTimeSchemaConfig {
1282    fn default() -> Self {
1283        Self {
1284            closed_probability: 0.1,
1285            avg_validity_days: 365,
1286            validity_stddev_days: 90,
1287        }
1288    }
1289}
1290
1291/// Configuration for transaction time (system time) generation.
1292#[derive(Debug, Clone, Serialize, Deserialize)]
1293pub struct TransactionTimeSchemaConfig {
1294    /// Average recording delay in seconds (0 = immediate).
1295    #[serde(default)]
1296    pub avg_recording_delay_seconds: u32,
1297    /// Allow backdating (recording time before valid time).
1298    #[serde(default)]
1299    pub allow_backdating: bool,
1300    /// Probability of backdating if allowed.
1301    #[serde(default = "default_backdating_probability")]
1302    pub backdating_probability: f64,
1303    /// Maximum backdate days.
1304    #[serde(default = "default_max_backdate_days")]
1305    pub max_backdate_days: u32,
1306}
1307
1308fn default_backdating_probability() -> f64 {
1309    0.01
1310}
1311
1312fn default_max_backdate_days() -> u32 {
1313    30
1314}
1315
1316impl Default for TransactionTimeSchemaConfig {
1317    fn default() -> Self {
1318        Self {
1319            avg_recording_delay_seconds: 0,
1320            allow_backdating: false,
1321            backdating_probability: 0.01,
1322            max_backdate_days: 30,
1323        }
1324    }
1325}
1326
1327// ============================================================================
1328// Relationship Generation Configuration (Phase 4)
1329// ============================================================================
1330
1331/// Configuration for relationship generation.
1332#[derive(Debug, Clone, Serialize, Deserialize)]
1333pub struct RelationshipSchemaConfig {
1334    /// Relationship type definitions.
1335    #[serde(default)]
1336    pub relationship_types: Vec<RelationshipTypeSchemaConfig>,
1337    /// Allow orphan entities (entities with no relationships).
1338    #[serde(default = "default_true")]
1339    pub allow_orphans: bool,
1340    /// Probability of creating an orphan entity.
1341    #[serde(default = "default_orphan_probability")]
1342    pub orphan_probability: f64,
1343    /// Allow circular relationships.
1344    #[serde(default)]
1345    pub allow_circular: bool,
1346    /// Maximum depth for circular relationship detection.
1347    #[serde(default = "default_max_circular_depth")]
1348    pub max_circular_depth: u32,
1349}
1350
1351fn default_orphan_probability() -> f64 {
1352    0.01
1353}
1354
1355fn default_max_circular_depth() -> u32 {
1356    3
1357}
1358
1359impl Default for RelationshipSchemaConfig {
1360    fn default() -> Self {
1361        Self {
1362            relationship_types: Vec::new(),
1363            allow_orphans: true,
1364            orphan_probability: 0.01,
1365            allow_circular: false,
1366            max_circular_depth: 3,
1367        }
1368    }
1369}
1370
1371/// Configuration for a specific relationship type.
1372#[derive(Debug, Clone, Serialize, Deserialize)]
1373pub struct RelationshipTypeSchemaConfig {
1374    /// Name of the relationship type (e.g., "debits", "credits", "created").
1375    pub name: String,
1376    /// Source entity type (e.g., "journal_entry").
1377    pub source_type: String,
1378    /// Target entity type (e.g., "account").
1379    pub target_type: String,
1380    /// Cardinality rule for this relationship.
1381    #[serde(default)]
1382    pub cardinality: CardinalitySchemaRule,
1383    /// Weight for this relationship in random selection.
1384    #[serde(default = "default_relationship_weight")]
1385    pub weight: f64,
1386    /// Whether this relationship is required.
1387    #[serde(default)]
1388    pub required: bool,
1389    /// Whether this relationship is directed.
1390    #[serde(default = "default_true")]
1391    pub directed: bool,
1392}
1393
1394fn default_relationship_weight() -> f64 {
1395    1.0
1396}
1397
1398impl Default for RelationshipTypeSchemaConfig {
1399    fn default() -> Self {
1400        Self {
1401            name: String::new(),
1402            source_type: String::new(),
1403            target_type: String::new(),
1404            cardinality: CardinalitySchemaRule::default(),
1405            weight: 1.0,
1406            required: false,
1407            directed: true,
1408        }
1409    }
1410}
1411
1412/// Cardinality rule for relationships in schema config.
1413#[derive(Debug, Clone, Serialize, Deserialize)]
1414#[serde(rename_all = "snake_case")]
1415pub enum CardinalitySchemaRule {
1416    /// One source to one target.
1417    OneToOne,
1418    /// One source to many targets.
1419    OneToMany {
1420        /// Minimum number of targets.
1421        min: u32,
1422        /// Maximum number of targets.
1423        max: u32,
1424    },
1425    /// Many sources to one target.
1426    ManyToOne {
1427        /// Minimum number of sources.
1428        min: u32,
1429        /// Maximum number of sources.
1430        max: u32,
1431    },
1432    /// Many sources to many targets.
1433    ManyToMany {
1434        /// Minimum targets per source.
1435        min_per_source: u32,
1436        /// Maximum targets per source.
1437        max_per_source: u32,
1438    },
1439}
1440
1441impl Default for CardinalitySchemaRule {
1442    fn default() -> Self {
1443        Self::OneToMany { min: 1, max: 5 }
1444    }
1445}
1446
1447/// Global configuration settings.
1448#[derive(Debug, Clone, Serialize, Deserialize)]
1449pub struct GlobalConfig {
1450    /// Random seed for reproducibility
1451    pub seed: Option<u64>,
1452    /// Industry sector
1453    pub industry: IndustrySector,
1454    /// Simulation start date (YYYY-MM-DD)
1455    #[serde(alias = "startDate")]
1456    pub start_date: String,
1457    /// Simulation period in months
1458    #[serde(alias = "periodMonths")]
1459    pub period_months: u32,
1460    /// Base currency for group reporting
1461    #[serde(default = "default_currency", alias = "groupCurrency")]
1462    pub group_currency: String,
1463    /// Presentation currency for consolidated financial statements (ISO 4217).
1464    /// If not set, defaults to `group_currency`.
1465    #[serde(default, alias = "presentationCurrency")]
1466    pub presentation_currency: Option<String>,
1467    /// Enable parallel generation
1468    #[serde(default = "default_true")]
1469    pub parallel: bool,
1470    /// Number of worker threads (0 = auto-detect)
1471    #[serde(default, alias = "workerThreads")]
1472    pub worker_threads: usize,
1473    /// Memory limit in MB (0 = unlimited)
1474    #[serde(default, alias = "memoryLimitMb")]
1475    pub memory_limit_mb: usize,
1476    /// Fiscal year length in months (defaults to 12 if not set).
1477    /// Used by session-based generation to split the total period into fiscal years.
1478    #[serde(default, alias = "fiscalYearMonths")]
1479    pub fiscal_year_months: Option<u32>,
1480}
1481
1482fn default_currency() -> String {
1483    "USD".to_string()
1484}
1485fn default_true() -> bool {
1486    true
1487}
1488
1489/// Configuration for generation session behavior.
1490///
1491/// When enabled, the generation pipeline splits the total period into fiscal years
1492/// and generates data period-by-period, carrying forward balance state.
1493#[derive(Debug, Clone, Serialize, Deserialize)]
1494pub struct SessionSchemaConfig {
1495    /// Whether session-based (period-by-period) generation is enabled.
1496    #[serde(default)]
1497    pub enabled: bool,
1498    /// Optional path for saving/loading session checkpoint files.
1499    #[serde(default)]
1500    pub checkpoint_path: Option<String>,
1501    /// Whether to write output files per fiscal period (e.g., `period_01/`).
1502    #[serde(default = "default_true")]
1503    pub per_period_output: bool,
1504    /// Whether to also produce a single consolidated output across all periods.
1505    #[serde(default = "default_true")]
1506    pub consolidated_output: bool,
1507}
1508
1509impl Default for SessionSchemaConfig {
1510    fn default() -> Self {
1511        Self {
1512            enabled: false,
1513            checkpoint_path: None,
1514            per_period_output: true,
1515            consolidated_output: true,
1516        }
1517    }
1518}
1519
1520/// Company code configuration.
1521#[derive(Debug, Clone, Serialize, Deserialize)]
1522pub struct CompanyConfig {
1523    /// Company code identifier
1524    pub code: String,
1525    /// Company name
1526    pub name: String,
1527    /// Local currency (ISO 4217)
1528    pub currency: String,
1529    /// Functional currency for IAS 21 translation (ISO 4217).
1530    /// If not set, defaults to the `currency` field (i.e. local == functional).
1531    #[serde(default, alias = "functionalCurrency")]
1532    pub functional_currency: Option<String>,
1533    /// Country code (ISO 3166-1 alpha-2)
1534    pub country: String,
1535    /// Fiscal year variant
1536    #[serde(default = "default_fiscal_variant", alias = "fiscalYearVariant")]
1537    pub fiscal_year_variant: String,
1538    /// Transaction volume per year
1539    #[serde(alias = "annualTransactionVolume")]
1540    pub annual_transaction_volume: TransactionVolume,
1541    /// Company-specific transaction weight
1542    #[serde(default = "default_weight", alias = "volumeWeight")]
1543    pub volume_weight: f64,
1544}
1545
1546fn default_fiscal_variant() -> String {
1547    "K4".to_string()
1548}
1549fn default_weight() -> f64 {
1550    1.0
1551}
1552
1553/// Transaction volume presets.
1554#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1555#[serde(rename_all = "snake_case")]
1556pub enum TransactionVolume {
1557    /// 10,000 transactions per year
1558    TenK,
1559    /// 50,000 transactions per year
1560    FiftyK,
1561    /// 100,000 transactions per year
1562    HundredK,
1563    /// 1,000,000 transactions per year
1564    OneM,
1565    /// 10,000,000 transactions per year
1566    TenM,
1567    /// 100,000,000 transactions per year
1568    HundredM,
1569    /// Custom count
1570    Custom(u64),
1571}
1572
1573impl TransactionVolume {
1574    /// Get the transaction count.
1575    pub fn count(&self) -> u64 {
1576        match self {
1577            Self::TenK => 10_000,
1578            Self::FiftyK => 50_000,
1579            Self::HundredK => 100_000,
1580            Self::OneM => 1_000_000,
1581            Self::TenM => 10_000_000,
1582            Self::HundredM => 100_000_000,
1583            Self::Custom(n) => *n,
1584        }
1585    }
1586}
1587
1588/// Chart of Accounts configuration.
1589#[derive(Debug, Clone, Serialize, Deserialize)]
1590pub struct ChartOfAccountsConfig {
1591    /// CoA complexity level
1592    pub complexity: CoAComplexity,
1593    /// Use industry-specific accounts
1594    #[serde(default = "default_true")]
1595    pub industry_specific: bool,
1596    /// Custom account definitions file
1597    pub custom_accounts: Option<PathBuf>,
1598    /// Minimum hierarchy depth
1599    #[serde(default = "default_min_depth")]
1600    pub min_hierarchy_depth: u8,
1601    /// Maximum hierarchy depth
1602    #[serde(default = "default_max_depth")]
1603    pub max_hierarchy_depth: u8,
1604    /// **v5.7.0** — expand canonical accounts into industry-specific
1605    /// 6-digit sub-accounts using the embedded
1606    /// [`datasynth_core::industry_packs`] (manufacturing, retail,
1607    /// financial_services, healthcare, technology). When `true`:
1608    ///
1609    /// - Each canonical 4-digit account that has an expansion in the
1610    ///   pack becomes a non-postable control account (`is_postable =
1611    ///   false`).
1612    /// - 2–6 6-digit sub-accounts are added per parent, with
1613    ///   suffix-driven names (`"Product Revenue — Steel Products"`),
1614    ///   industry-realistic gaps, and inherited ISO 21378 codes.
1615    /// - Generators that currently target canonical accounts via
1616    ///   constants will pick a sub-account deterministically per
1617    ///   `document_id` (preserving seed-based reproducibility).
1618    ///
1619    /// Default: `false` (preserves v5.6.0 behaviour exactly — same
1620    /// account count, same numbering, same goldens).
1621    #[serde(default, alias = "expandIndustrySubaccounts")]
1622    pub expand_industry_subaccounts: bool,
1623}
1624
1625fn default_min_depth() -> u8 {
1626    2
1627}
1628fn default_max_depth() -> u8 {
1629    5
1630}
1631
1632impl Default for ChartOfAccountsConfig {
1633    fn default() -> Self {
1634        Self {
1635            complexity: CoAComplexity::Small,
1636            industry_specific: true,
1637            custom_accounts: None,
1638            min_hierarchy_depth: default_min_depth(),
1639            max_hierarchy_depth: default_max_depth(),
1640            expand_industry_subaccounts: false,
1641        }
1642    }
1643}
1644
1645/// Transaction generation configuration.
1646#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1647pub struct TransactionConfig {
1648    /// Line item distribution
1649    #[serde(default)]
1650    pub line_item_distribution: LineItemDistributionConfig,
1651    /// Debit/credit balance distribution
1652    #[serde(default)]
1653    pub debit_credit_distribution: DebitCreditDistributionConfig,
1654    /// Even/odd line count distribution
1655    #[serde(default)]
1656    pub even_odd_distribution: EvenOddDistributionConfig,
1657    /// Transaction source distribution
1658    #[serde(default)]
1659    pub source_distribution: SourceDistribution,
1660    /// **T2-D** Source-mix breadth. When unset or `true` (the default), the
1661    /// emitted `source` column is drawn from a generic SAP document-type mix
1662    /// (~25 codes, entropy ~2.7) instead of the coarse `TransactionSource`
1663    /// enum (~4 values, entropy ~0.75), closing the source-mix gap measured
1664    /// in experiments/ml/FINDINGS.md §6. Industry priors, when loaded, take
1665    /// precedence. Set `false` to restore the legacy enum labels. `Option`
1666    /// (not bare `bool`) so the default is genuinely on under both serde and
1667    /// `Default::default()`.
1668    #[serde(default)]
1669    pub synthetic_source_codes: Option<bool>,
1670    /// **SOTA-1** Recurring / standard-journal templating. When unset or `true`
1671    /// (the default), the no-priors generation path reuses a small per-(company,
1672    /// process) library of standard JE account-archetypes with high probability,
1673    /// so standard postings recur (and a hot subset of accounts dominates)
1674    /// instead of every JE drawing fresh uniform accounts. Matches the corpus's
1675    /// heavy templating (FINDINGS.md sec.8: 97% recurring, top-50 cover 65%; vs
1676    /// the engine's 758/1k unique). Reuse overrides only account *choice* (the
1677    /// main RNG + amounts/dates/counts are unchanged). Set `false` for the
1678    /// legacy uniform-per-line account selection.
1679    #[serde(default)]
1680    pub recurring_templates: Option<bool>,
1681    /// **SOTA-5** Fraction of journal entries that are reversals/corrections of
1682    /// a recent JE (swap dr/cr, reference the original) — a process auditors
1683    /// specifically look for, and largely absent from the engine (FINDINGS.md
1684    /// sec.8: corpus reversal-proxy ~10% vs synthetic ~0.2%). Unset → a default
1685    /// of ~0.10 (matching the corpus proxy); `0.0` disables it. Reversals are
1686    /// interspersed without perturbing the normal JEs (separate RNG + derived id).
1687    #[serde(default)]
1688    pub reversal_rate: Option<f64>,
1689    /// **SOTA-2** Concentrate posting activity onto a hot subset of accounts via
1690    /// a Zipf (power-law) override of the per-line account pick, so a few
1691    /// accounts carry most lines like a real GL (FINDINGS.md sec.8: corpus
1692    /// top-10% of accounts ≈ 95% of lines vs the engine's near-uniform ~0.21).
1693    /// The uniform draw is still consumed (amounts/dates/counts unchanged) — only
1694    /// the chosen account moves toward the hot set. Set `false` for the legacy
1695    /// uniform-over-pool selection. Default-on when unset.
1696    #[serde(default)]
1697    pub account_concentration: Option<bool>,
1698    /// **SOTA-6** Fraction of journal entries that are allocation/assessment
1699    /// batches — large 1-to-many postings (one cost pool spread across many
1700    /// cost centers) that drive the corpus lines-per-JE tail (FINDINGS.md
1701    /// sec.8: AB docs ~52 lines vs the engine's ~4.6 mean with no large-batch
1702    /// process). Each batch carries ~30-80 cost-center-spread sub-lines and
1703    /// stays balanced. Unset → a small default (~0.008, ≈8% of lines); `0.0`
1704    /// disables. Interspersed without perturbing the normal JEs (separate RNG +
1705    /// derived id, reusing a recent JE's header).
1706    #[serde(default)]
1707    pub allocation_batch_rate: Option<f64>,
1708    /// **SOTA-3** Populate a line-level `business_unit` dimension — an
1709    /// organisational segment that rolls up the cost center, or the profit
1710    /// center as fallback (the same dimension value always maps to the same BU).
1711    /// The corpus carries a BU dimension (~11 codes) the engine lacked entirely;
1712    /// this fills it wherever a cost or profit center is present (~corpus fill),
1713    /// so BU-level analytics are coherent. Default-on when unset; `false`
1714    /// leaves `business_unit` empty (legacy).
1715    #[serde(default)]
1716    pub business_unit_dimension: Option<bool>,
1717    /// **SOTA-4** Fraction of journal entries that post in a foreign
1718    /// (document) currency — SAP-style: `debit_amount`/`credit_amount`/
1719    /// `local_amount` stay the company-ledger amount (DMBTR; the trial balance
1720    /// is unaffected), and the line's `transaction_amount` (WRBTR) plus
1721    /// `header.currency` (WAERS) / `header.exchange_rate` carry the foreign
1722    /// value. The corpus shows ~3.5% functional≠reporting (FINDINGS §8).
1723    /// Unset/`0.0` → all company-currency (default). Additive — ledger
1724    /// coherence is preserved; enable for corpus-matching / FX realism.
1725    #[serde(default)]
1726    pub foreign_currency_rate: Option<f64>,
1727    /// Seasonality configuration
1728    #[serde(default)]
1729    pub seasonality: SeasonalityConfig,
1730    /// Amount distribution
1731    #[serde(default)]
1732    pub amounts: AmountDistributionConfig,
1733    /// Benford's Law compliance configuration
1734    #[serde(default)]
1735    pub benford: BenfordConfig,
1736    /// SOTA-10 (FINDINGS §14): optional hard cap on total lines per JE. Corpus has
1737    /// p99.9 ~99 lines / max ~924; the synthetic engine occasionally produces
1738    /// 2000+-line monster JEs that degrade the audit packet's signal-to-noise.
1739    /// `None` = no cap (legacy); ~100 is a realism-matching default. Applies after
1740    /// copula adjustment; preserves balance by scaling debit/credit proportionally.
1741    #[serde(default)]
1742    pub lines_per_je_cap: Option<usize>,
1743    /// SOTA-9 (FINDINGS §14): archetype reuse probability for the recurring-templates
1744    /// process (overrides the historical 0.90 default). Corpus recurring share ~0.97;
1745    /// raising this concentrates `edges/je` toward the corpus value (currently 8.75×
1746    /// too diffuse). Range [0.0, 1.0]. None = use legacy 0.90.
1747    #[serde(default)]
1748    pub archetype_reuse_probability: Option<f64>,
1749    /// SOTA-8 (FINDINGS §14): source-conditional Dirichlet account-pair sampler.
1750    /// Models the corpus finding that per-source account usage is *concentrated*
1751    /// (entropy ~0.68 vs synth 0.97) over a *larger* pool (~23 vs 5 accts/source).
1752    /// Default off — opt-in so existing synthetic streams stay byte-identical;
1753    /// enable for audit-realism + tighter inverse-audit normal manifold.
1754    #[serde(default)]
1755    pub source_conditional_account_pair: SourceConditionalAccountPairConfig,
1756}
1757
1758/// SOTA-8 — per-source Dirichlet over account pairs. Concentration α controls
1759/// per-source structure tightness (low α = razor-tight prior, high α = diffuse);
1760/// `accts_per_source_target` controls the per-source account-pool size.
1761#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1762pub struct SourceConditionalAccountPairConfig {
1763    /// Enable the source-conditional account-pair sampler (default off).
1764    #[serde(default)]
1765    pub enabled: bool,
1766    /// Symmetric Dirichlet α — lower = more concentrated PMF per source.
1767    /// α=0.5 + N_s=25 ⇒ expected normalised entropy ≈ 0.65 (corpus median 0.68).
1768    #[serde(default = "default_source_cond_concentration")]
1769    pub concentration: f64,
1770    /// Expected distinct accounts per source (jittered by LogNormal(0, 0.3)).
1771    /// Corpus median 23.5; synth pre-SOTA-8 is ~5.
1772    #[serde(default = "default_accts_per_source_target")]
1773    pub accts_per_source_target: usize,
1774}
1775
1776fn default_source_cond_concentration() -> f64 {
1777    0.5
1778}
1779
1780fn default_accts_per_source_target() -> usize {
1781    25
1782}
1783
1784impl Default for SourceConditionalAccountPairConfig {
1785    fn default() -> Self {
1786        Self {
1787            enabled: false,
1788            concentration: default_source_cond_concentration(),
1789            accts_per_source_target: default_accts_per_source_target(),
1790        }
1791    }
1792}
1793
1794/// Benford's Law compliance configuration.
1795#[derive(Debug, Clone, Serialize, Deserialize)]
1796pub struct BenfordConfig {
1797    /// Enable Benford's Law compliance for amount generation
1798    #[serde(default = "default_true")]
1799    pub enabled: bool,
1800    /// Tolerance for deviation from ideal Benford distribution (0.0-1.0)
1801    #[serde(default = "default_benford_tolerance")]
1802    pub tolerance: f64,
1803    /// Transaction sources exempt from Benford's Law (fixed amounts)
1804    #[serde(default)]
1805    pub exempt_sources: Vec<BenfordExemption>,
1806}
1807
1808fn default_benford_tolerance() -> f64 {
1809    0.05
1810}
1811
1812impl Default for BenfordConfig {
1813    fn default() -> Self {
1814        Self {
1815            enabled: true,
1816            tolerance: default_benford_tolerance(),
1817            exempt_sources: vec![BenfordExemption::Recurring, BenfordExemption::Payroll],
1818        }
1819    }
1820}
1821
1822/// Types of transactions exempt from Benford's Law.
1823#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1824#[serde(rename_all = "snake_case")]
1825pub enum BenfordExemption {
1826    /// Recurring fixed amounts (rent, subscriptions)
1827    Recurring,
1828    /// Payroll (standardized salaries)
1829    Payroll,
1830    /// Fixed fees and charges
1831    FixedFees,
1832    /// Round number purchases (often legitimate)
1833    RoundAmounts,
1834}
1835
1836/// Distribution of transaction sources.
1837#[derive(Debug, Clone, Serialize, Deserialize)]
1838pub struct SourceDistribution {
1839    /// Manual entries percentage
1840    pub manual: f64,
1841    /// Automated system entries
1842    pub automated: f64,
1843    /// Recurring entries
1844    pub recurring: f64,
1845    /// Adjustment entries
1846    pub adjustment: f64,
1847}
1848
1849impl Default for SourceDistribution {
1850    fn default() -> Self {
1851        Self {
1852            manual: 0.20,
1853            automated: 0.70,
1854            recurring: 0.07,
1855            adjustment: 0.03,
1856        }
1857    }
1858}
1859
1860/// Output configuration.
1861#[derive(Debug, Clone, Serialize, Deserialize)]
1862pub struct OutputConfig {
1863    /// Output mode
1864    #[serde(default)]
1865    pub mode: OutputMode,
1866    /// Output directory
1867    #[serde(alias = "outputDirectory")]
1868    pub output_directory: PathBuf,
1869    /// File formats to generate. Accepts both `formats: [json, csv]`
1870    /// (canonical YAML) and `exportFormat: "json"` / `exportFormats:
1871    /// ["json", "csv"]` (SDK-style camelCase). The single-string
1872    /// `exportFormat` form is deserialised via `one_or_many_formats`
1873    /// so SDK clients submitting `exportFormat: "json"` hit the right
1874    /// code path instead of silently falling through to the Parquet
1875    /// default — the bug the SDK team flagged in v4.4.0.
1876    #[serde(
1877        default = "default_formats",
1878        alias = "exportFormats",
1879        alias = "exportFormat",
1880        deserialize_with = "one_or_many_formats"
1881    )]
1882    pub formats: Vec<FileFormat>,
1883    /// Compression settings
1884    #[serde(default)]
1885    pub compression: CompressionConfig,
1886    /// Batch size for writes
1887    #[serde(default = "default_batch_size", alias = "batchSize")]
1888    pub batch_size: usize,
1889    /// Include ACDOCA format
1890    #[serde(default = "default_true", alias = "includeAcdoca")]
1891    pub include_acdoca: bool,
1892    /// Include BSEG format
1893    #[serde(default, alias = "includeBseg")]
1894    pub include_bseg: bool,
1895    /// Partition by fiscal period
1896    #[serde(default = "default_true", alias = "partitionByPeriod")]
1897    pub partition_by_period: bool,
1898    /// Partition by company code
1899    #[serde(default, alias = "partitionByCompany")]
1900    pub partition_by_company: bool,
1901    /// Numeric serialization mode for JSON output.
1902    /// "string" (default): decimals as `"1729237.30"` — lossless precision.
1903    /// "native": decimals as `1729237.30` — friendlier for pandas/analytics.
1904    #[serde(default, alias = "numericMode")]
1905    pub numeric_mode: NumericMode,
1906    /// JSON export layout for journal entries and document flows.
1907    /// "nested" (default): `{"header": {...}, "lines": [...]}` — natural ERP structure.
1908    /// "flat": header fields repeated on every line — friendlier for analytics/ML.
1909    ///
1910    /// Accepts both `export_layout` (canonical / YAML) and `exportLayout`
1911    /// (camelCase / SDK JSON) so SDKs that follow camelCase conventions
1912    /// hit the flat path rather than silently getting the Nested default.
1913    /// Before v3.1.1 the missing camelCase alias meant SDK requests with
1914    /// `exportLayout: "flat"` were silently ignored, which SDK operators
1915    /// reported as "flat hangs generation" (the job completed with Nested
1916    /// layout, but manifests didn't match the expected flat shape).
1917    #[serde(default, alias = "exportLayout")]
1918    pub export_layout: ExportLayout,
1919    /// SAP / HANA export settings (only read when the CLI
1920    /// `--export-format sap` flag is passed). Empty by default so
1921    /// existing configs don't change behaviour; dialect defaults to
1922    /// `classic` for backward compatibility.
1923    #[serde(default, alias = "sapExport")]
1924    pub sap: SapExportSettings,
1925    /// SAF-T (Standard Audit File for Tax) export settings. Read when
1926    /// the CLI `--export-format saft` flag is passed. Defaults to
1927    /// Portugal (`pt`) because the PT variant is the most mature and
1928    /// cross-jurisdiction compatible. Override with
1929    /// `jurisdiction: pl|ro|no|lu` for the other supported countries.
1930    #[serde(default, alias = "saftExport")]
1931    pub saft: SaftExportSettings,
1932}
1933
1934/// Configuration for the SAP export writers (BKPF / BSEG / ACDOCA and
1935/// master-data tables).
1936///
1937/// Mirror of `datasynth_output::SapExportConfig` in YAML form — the CLI
1938/// translates this into the runtime struct before invoking the exporter,
1939/// replacing the v3.x hardcoded `SapExportConfig::default()`.
1940#[derive(Debug, Clone, Serialize, Deserialize)]
1941pub struct SapExportSettings {
1942    /// SAP client / MANDT column value on every table.
1943    #[serde(default = "default_sap_client")]
1944    pub client: String,
1945    /// Leading ledger for ACDOCA rows (0L for S/4HANA default).
1946    #[serde(default = "default_sap_ledger")]
1947    pub ledger: String,
1948    /// Source system identifier — written to ACDOCA.AWSYS so downstream
1949    /// consumers can distinguish synthetic rows from production ones.
1950    #[serde(default = "default_sap_source_system")]
1951    pub source_system: String,
1952    /// Local currency (WAERS / RWCUR).
1953    #[serde(default = "default_sap_currency")]
1954    pub local_currency: String,
1955    /// Optional group / consolidation currency (triggers the HSL / RHCUR columns).
1956    #[serde(default, skip_serializing_if = "Option::is_none")]
1957    pub group_currency: Option<String>,
1958    /// Which SAP tables to export. Empty = default set (bkpf, bseg, acdoca).
1959    #[serde(default)]
1960    pub tables: Vec<String>,
1961    /// Include ZSIM_* extension columns on ACDOCA rows.
1962    #[serde(default = "default_true")]
1963    pub include_extension_fields: bool,
1964    /// Export dialect — `classic` (R/3 / BODS) or `hana` (S/4HANA CDS).
1965    #[serde(default)]
1966    pub dialect: SapDialectSetting,
1967    /// Legacy flag, retained for backward compatibility. Has no effect
1968    /// when `dialect = hana`.
1969    #[serde(default = "default_true")]
1970    pub use_sap_date_format: bool,
1971}
1972
1973impl Default for SapExportSettings {
1974    fn default() -> Self {
1975        Self {
1976            client: default_sap_client(),
1977            ledger: default_sap_ledger(),
1978            source_system: default_sap_source_system(),
1979            local_currency: default_sap_currency(),
1980            group_currency: None,
1981            tables: Vec::new(),
1982            include_extension_fields: true,
1983            dialect: SapDialectSetting::default(),
1984            use_sap_date_format: true,
1985        }
1986    }
1987}
1988
1989fn default_sap_client() -> String {
1990    "100".to_string()
1991}
1992fn default_sap_ledger() -> String {
1993    "0L".to_string()
1994}
1995fn default_sap_source_system() -> String {
1996    "SYNTH".to_string()
1997}
1998fn default_sap_currency() -> String {
1999    "USD".to_string()
2000}
2001
2002/// SAP export dialect (wire form — `datasynth_output::SapDialect` is the
2003/// runtime form).
2004#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
2005#[serde(rename_all = "snake_case")]
2006pub enum SapDialectSetting {
2007    /// Legacy R/3 / BODS-compatible CSV (default).
2008    #[default]
2009    Classic,
2010    /// S/4HANA CDS dialect (semicolon + UTF-8 BOM + decimal comma + ISO dates).
2011    Hana,
2012}
2013
2014/// SAF-T export settings (v4.3.1).
2015#[derive(Debug, Clone, Serialize, Deserialize)]
2016pub struct SaftExportSettings {
2017    /// ISO-ish two-letter code: `pt` / `pl` / `ro` / `no` / `lu`.
2018    /// Defaults to `pt` (Portugal, most mature variant).
2019    #[serde(default = "default_saft_jurisdiction")]
2020    pub jurisdiction: String,
2021    /// Company tax registration number / VAT ID / TIN used in the
2022    /// `Header.TaxRegistrationNumber` element. Falls back to
2023    /// `"Desconhecido"` (Portuguese for "unknown") when empty.
2024    #[serde(default)]
2025    pub company_tax_id: String,
2026    /// Optional override for the company name used in the Header.
2027    /// When empty, the first configured company's `name` is used.
2028    #[serde(default)]
2029    pub company_name: String,
2030}
2031
2032impl Default for SaftExportSettings {
2033    fn default() -> Self {
2034        Self {
2035            jurisdiction: default_saft_jurisdiction(),
2036            company_tax_id: String::new(),
2037            company_name: String::new(),
2038        }
2039    }
2040}
2041
2042fn default_saft_jurisdiction() -> String {
2043    "pt".to_string()
2044}
2045
2046fn default_formats() -> Vec<FileFormat> {
2047    vec![FileFormat::Parquet]
2048}
2049fn default_batch_size() -> usize {
2050    100_000
2051}
2052
2053/// Custom deserializer for `formats` that accepts either a single
2054/// `FileFormat` (e.g. `"json"` for SDK `exportFormat: "json"`) or a
2055/// vector (e.g. `["json", "csv"]`). Without this shim an SDK config
2056/// with `exportFormat: "json"` would fail to parse (serde expects a
2057/// sequence for a `Vec` field) and silently fall through to defaults.
2058fn one_or_many_formats<'de, D>(deserializer: D) -> Result<Vec<FileFormat>, D::Error>
2059where
2060    D: serde::Deserializer<'de>,
2061{
2062    #[derive(Deserialize)]
2063    #[serde(untagged)]
2064    enum OneOrMany {
2065        One(FileFormat),
2066        Many(Vec<FileFormat>),
2067    }
2068    match OneOrMany::deserialize(deserializer)? {
2069        OneOrMany::One(f) => Ok(vec![f]),
2070        OneOrMany::Many(v) => Ok(v),
2071    }
2072}
2073
2074impl Default for OutputConfig {
2075    fn default() -> Self {
2076        Self {
2077            mode: OutputMode::FlatFile,
2078            output_directory: PathBuf::from("./output"),
2079            formats: default_formats(),
2080            compression: CompressionConfig::default(),
2081            batch_size: default_batch_size(),
2082            include_acdoca: true,
2083            include_bseg: false,
2084            partition_by_period: true,
2085            partition_by_company: false,
2086            numeric_mode: NumericMode::default(),
2087            export_layout: ExportLayout::default(),
2088            sap: SapExportSettings::default(),
2089            saft: SaftExportSettings::default(),
2090        }
2091    }
2092}
2093
2094/// Numeric serialization mode for JSON decimal fields.
2095#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
2096#[serde(rename_all = "snake_case")]
2097pub enum NumericMode {
2098    /// Decimals as JSON strings (e.g. `"1729237.30"`). Preserves full precision.
2099    #[default]
2100    String,
2101    /// Decimals as JSON numbers (e.g. `1729237.30`). Friendlier for pandas/analytics.
2102    Native,
2103}
2104
2105/// JSON export layout for nested structures (journal entries, document flows).
2106#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
2107#[serde(rename_all = "snake_case")]
2108pub enum ExportLayout {
2109    /// Nested structure: `{"header": {...}, "lines": [...]}`. Natural ERP format.
2110    #[default]
2111    Nested,
2112    /// Flat structure: header fields repeated on every line. Analytics-friendly.
2113    Flat,
2114}
2115
2116/// Output mode.
2117#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2118#[serde(rename_all = "snake_case")]
2119pub enum OutputMode {
2120    /// Stream records as generated
2121    Streaming,
2122    /// Write to flat files
2123    #[default]
2124    FlatFile,
2125    /// Both streaming and flat file
2126    Both,
2127}
2128
2129/// Supported file formats.
2130#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
2131#[serde(rename_all = "snake_case")]
2132pub enum FileFormat {
2133    Csv,
2134    Parquet,
2135    Json,
2136    JsonLines,
2137}
2138
2139/// Compression configuration.
2140#[derive(Debug, Clone, Serialize, Deserialize)]
2141pub struct CompressionConfig {
2142    /// Enable compression
2143    #[serde(default = "default_true")]
2144    pub enabled: bool,
2145    /// Compression algorithm
2146    #[serde(default)]
2147    pub algorithm: CompressionAlgorithm,
2148    /// Compression level (1-9)
2149    #[serde(default = "default_compression_level")]
2150    pub level: u8,
2151}
2152
2153fn default_compression_level() -> u8 {
2154    3
2155}
2156
2157impl Default for CompressionConfig {
2158    fn default() -> Self {
2159        Self {
2160            enabled: true,
2161            algorithm: CompressionAlgorithm::default(),
2162            level: default_compression_level(),
2163        }
2164    }
2165}
2166
2167/// Compression algorithms.
2168#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2169#[serde(rename_all = "snake_case")]
2170pub enum CompressionAlgorithm {
2171    Gzip,
2172    #[default]
2173    Zstd,
2174    Lz4,
2175    Snappy,
2176}
2177
2178/// Fraud simulation configuration.
2179///
2180/// ## Document-level vs. line-level fraud
2181///
2182/// `fraud_rate` applies to individual journal-entry lines (line-level).
2183/// `document_fraud_rate` (optional) applies to source documents
2184/// (purchase orders, vendor invoices, customer invoices, payments), and when
2185/// `propagate_to_lines` is true, every JE derived from a fraudulent document
2186/// also gets `is_fraud = true`. This lets users express either:
2187///
2188///  * pure line-level fraud (`document_fraud_rate = None`): legacy behaviour;
2189///  * pure document-level fraud (`fraud_rate ≈ 0` and `document_fraud_rate` set):
2190///    fraud rings expressed at document granularity — realistic for PO/invoice
2191///    fraud schemes where one fraudulent document spawns multiple derived JEs;
2192///  * hybrid (both set): document-level scheme fraud plus unrelated line-level
2193///    slip-ups.
2194///
2195/// `propagate_to_document` does the inverse: when a JE is tagged as fraud by
2196/// the anomaly injector, its source document is also marked fraudulent.
2197#[derive(Debug, Clone, Serialize, Deserialize)]
2198pub struct FraudConfig {
2199    /// Enable fraud scenario generation
2200    #[serde(default)]
2201    pub enabled: bool,
2202    /// Line-level fraud rate: fraction of individual JE lines flagged as fraud (0.0 to 1.0).
2203    ///
2204    /// # Effective line-level prevalence
2205    ///
2206    /// If `document_fraud_rate = Some(d)` and `propagate_to_lines = true`,
2207    /// the observed line-level fraud prevalence is roughly:
2208    ///
2209    /// > `P(line is_fraud) ≈ fraud_rate + d × avg_lines_per_fraud_doc / total_lines`
2210    ///
2211    /// For a typical retail job (avg 3 lines per document, ~30 % of lines
2212    /// come from doc-flow-derived JEs) the combined rate lands near:
2213    ///
2214    /// > `fraud_rate + 0.3 × d`
2215    ///
2216    /// so setting `fraud_rate=0.02, document_fraud_rate=0.05, propagate_to_lines=true`
2217    /// produces ~3.5 % line-level fraud, not 2 %. To target a specific
2218    /// line-level prevalence X, choose `fraud_rate = X - 0.3 × d`.
2219    #[serde(default = "default_fraud_rate", alias = "fraudRate")]
2220    pub fraud_rate: f64,
2221    /// Document-level fraud rate: fraction of source documents (PO, vendor
2222    /// invoice, customer invoice, payment) flagged as fraud. `None` disables
2223    /// document-level injection; `Some(r)` marks ~r × document-count as fraud
2224    /// independently of the line-level rate.
2225    ///
2226    /// v4.4.2+ default: `Some(0.01)` — the SDK team reported
2227    /// `is_fraud_propagated: 0/72` regressed from `12/33` in 3.1.1 because
2228    /// the default had silently become None. A 1% document-fraud default
2229    /// restores the propagation signal (~0.3% of JE headers carry
2230    /// `is_fraud_propagated = true`) without meaningfully changing the
2231    /// line-level fraud prevalence. Set to `Some(0.0)` or `null` in your
2232    /// YAML to explicitly disable document-level injection.
2233    #[serde(default = "default_document_fraud_rate", alias = "documentFraudRate")]
2234    pub document_fraud_rate: Option<f64>,
2235    /// When true, flagging a document as fraudulent cascades `is_fraud = true`
2236    /// and `fraud_type` to every journal entry derived from that document,
2237    /// and records `fraud_source_document_id` on the JE header.
2238    /// Default: `true`.
2239    #[serde(default = "default_true", alias = "propagateToLines")]
2240    pub propagate_to_lines: bool,
2241    /// When true, tagging a JE as fraud via line-level anomaly injection also
2242    /// marks the JE's source document as fraudulent (if it can be resolved).
2243    /// Default: `true`.
2244    #[serde(default = "default_true", alias = "propagateToDocument")]
2245    pub propagate_to_document: bool,
2246    /// Fraud type distribution
2247    #[serde(default)]
2248    pub fraud_type_distribution: FraudTypeDistribution,
2249    /// Enable fraud clustering
2250    #[serde(default)]
2251    pub clustering_enabled: bool,
2252    /// Clustering factor
2253    #[serde(default = "default_clustering_factor")]
2254    pub clustering_factor: f64,
2255    /// Approval thresholds for threshold-adjacent fraud pattern
2256    #[serde(default = "default_approval_thresholds")]
2257    pub approval_thresholds: Vec<f64>,
2258}
2259
2260fn default_approval_thresholds() -> Vec<f64> {
2261    vec![1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0]
2262}
2263
2264fn default_fraud_rate() -> f64 {
2265    0.005
2266}
2267fn default_document_fraud_rate() -> Option<f64> {
2268    // v5.0.1: bumped 0.01 → 0.05 to deliver meaningful scheme-level
2269    // fraud propagation at typical line-level rates. The 1 % default
2270    // (set in v4.4.2 to restore `is_fraud_propagated > 0`) was too
2271    // conservative — at `fraud_rate = 0.08` it produced ~3.6 % observed
2272    // propagation against a 26.7 % target. The new 5 % default + the
2273    // additive formula `P(line is_fraud) ≈ fraud_rate + 0.3 × d` yields
2274    // ~9.5 % combined at fraud_rate=0.08 (closer to the spec target).
2275    // Set explicitly to `Some(0.0)` or `null` in YAML to disable, or to
2276    // a higher value (e.g. 0.20) for scheme-heavy fraud workloads.
2277    Some(0.05)
2278}
2279fn default_clustering_factor() -> f64 {
2280    3.0
2281}
2282
2283impl Default for FraudConfig {
2284    fn default() -> Self {
2285        Self {
2286            enabled: false,
2287            fraud_rate: default_fraud_rate(),
2288            document_fraud_rate: default_document_fraud_rate(),
2289            propagate_to_lines: true,
2290            propagate_to_document: true,
2291            fraud_type_distribution: FraudTypeDistribution::default(),
2292            clustering_enabled: false,
2293            clustering_factor: default_clustering_factor(),
2294            approval_thresholds: default_approval_thresholds(),
2295        }
2296    }
2297}
2298
2299/// Distribution of fraud types.
2300///
2301/// All fields default to `0.0` if absent from the YAML, so partial
2302/// distributions are accepted; the validator (`validate_sum_to_one`)
2303/// then enforces that the populated weights sum to `1.0 ± 0.01`.
2304#[derive(Debug, Clone, Serialize, Deserialize)]
2305#[serde(deny_unknown_fields)]
2306pub struct FraudTypeDistribution {
2307    #[serde(default)]
2308    pub suspense_account_abuse: f64,
2309    #[serde(default)]
2310    pub fictitious_transaction: f64,
2311    #[serde(default)]
2312    pub revenue_manipulation: f64,
2313    #[serde(default)]
2314    pub expense_capitalization: f64,
2315    #[serde(default)]
2316    pub split_transaction: f64,
2317    #[serde(default)]
2318    pub timing_anomaly: f64,
2319    #[serde(default)]
2320    pub unauthorized_access: f64,
2321    #[serde(default)]
2322    pub duplicate_payment: f64,
2323    /// Vendor kickback scheme.
2324    #[serde(default)]
2325    pub kickback_scheme: f64,
2326    /// Round-tripping funds through multiple entities or accounts.
2327    #[serde(default)]
2328    pub round_tripping: f64,
2329    /// Unauthorized customer/vendor discounts (sweethearting, side deals).
2330    #[serde(default)]
2331    pub unauthorized_discount: f64,
2332}
2333
2334impl Default for FraudTypeDistribution {
2335    fn default() -> Self {
2336        // Preserves the pre-extension default sum=1.0 over the original
2337        // eight fields.  The three additional fields (kickback_scheme,
2338        // round_tripping, unauthorized_discount) default to 0.0 so that
2339        // existing fraud packs / templates that explicitly enumerate the
2340        // original eight fields continue to merge to a 1.0 sum without
2341        // modification.  Users who want those fraud types must set them
2342        // explicitly (and rebalance the others).
2343        Self {
2344            suspense_account_abuse: 0.25,
2345            fictitious_transaction: 0.15,
2346            revenue_manipulation: 0.10,
2347            expense_capitalization: 0.10,
2348            split_transaction: 0.15,
2349            timing_anomaly: 0.10,
2350            unauthorized_access: 0.10,
2351            duplicate_payment: 0.05,
2352            kickback_scheme: 0.0,
2353            round_tripping: 0.0,
2354            unauthorized_discount: 0.0,
2355        }
2356    }
2357}
2358
2359/// Internal Controls System (ICS) configuration.
2360#[derive(Debug, Clone, Serialize, Deserialize)]
2361pub struct InternalControlsConfig {
2362    /// Enable internal controls system
2363    #[serde(default)]
2364    pub enabled: bool,
2365    /// Rate at which controls result in exceptions (0.0 - 1.0)
2366    #[serde(default = "default_exception_rate")]
2367    pub exception_rate: f64,
2368    /// Rate at which SoD violations occur (0.0 - 1.0)
2369    #[serde(default = "default_sod_violation_rate")]
2370    pub sod_violation_rate: f64,
2371    /// Export control master data to separate files
2372    #[serde(default = "default_true")]
2373    pub export_control_master_data: bool,
2374    /// SOX materiality threshold for marking transactions as SOX-relevant
2375    #[serde(default = "default_sox_materiality_threshold")]
2376    pub sox_materiality_threshold: f64,
2377    /// Enable COSO 2013 framework integration
2378    #[serde(default = "default_true")]
2379    pub coso_enabled: bool,
2380    /// Include entity-level controls in generation
2381    #[serde(default)]
2382    pub include_entity_level_controls: bool,
2383    /// Target maturity level for controls
2384    /// Valid values: "ad_hoc", "repeatable", "defined", "managed", "optimized", "mixed"
2385    #[serde(default = "default_target_maturity_level")]
2386    pub target_maturity_level: String,
2387}
2388
2389fn default_exception_rate() -> f64 {
2390    0.02
2391}
2392
2393fn default_sod_violation_rate() -> f64 {
2394    0.01
2395}
2396
2397fn default_sox_materiality_threshold() -> f64 {
2398    10000.0
2399}
2400
2401fn default_target_maturity_level() -> String {
2402    "mixed".to_string()
2403}
2404
2405impl Default for InternalControlsConfig {
2406    fn default() -> Self {
2407        Self {
2408            enabled: false,
2409            exception_rate: default_exception_rate(),
2410            sod_violation_rate: default_sod_violation_rate(),
2411            export_control_master_data: true,
2412            sox_materiality_threshold: default_sox_materiality_threshold(),
2413            coso_enabled: true,
2414            include_entity_level_controls: false,
2415            target_maturity_level: default_target_maturity_level(),
2416        }
2417    }
2418}
2419
2420/// Business process configuration.
2421#[derive(Debug, Clone, Serialize, Deserialize)]
2422pub struct BusinessProcessConfig {
2423    /// Order-to-Cash weight
2424    #[serde(default = "default_o2c")]
2425    pub o2c_weight: f64,
2426    /// Procure-to-Pay weight
2427    #[serde(default = "default_p2p")]
2428    pub p2p_weight: f64,
2429    /// Record-to-Report weight
2430    #[serde(default = "default_r2r")]
2431    pub r2r_weight: f64,
2432    /// Hire-to-Retire weight
2433    #[serde(default = "default_h2r")]
2434    pub h2r_weight: f64,
2435    /// Acquire-to-Retire weight
2436    #[serde(default = "default_a2r")]
2437    pub a2r_weight: f64,
2438}
2439
2440fn default_o2c() -> f64 {
2441    0.35
2442}
2443fn default_p2p() -> f64 {
2444    0.30
2445}
2446fn default_r2r() -> f64 {
2447    0.20
2448}
2449fn default_h2r() -> f64 {
2450    0.10
2451}
2452fn default_a2r() -> f64 {
2453    0.05
2454}
2455
2456impl Default for BusinessProcessConfig {
2457    fn default() -> Self {
2458        Self {
2459            o2c_weight: default_o2c(),
2460            p2p_weight: default_p2p(),
2461            r2r_weight: default_r2r(),
2462            h2r_weight: default_h2r(),
2463            a2r_weight: default_a2r(),
2464        }
2465    }
2466}
2467
2468/// User persona configuration.
2469#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2470pub struct UserPersonaConfig {
2471    /// Distribution of user personas
2472    #[serde(default)]
2473    pub persona_distribution: PersonaDistribution,
2474    /// Users per persona type
2475    #[serde(default)]
2476    pub users_per_persona: UsersPerPersona,
2477}
2478
2479/// Distribution of user personas for transaction generation.
2480#[derive(Debug, Clone, Serialize, Deserialize)]
2481pub struct PersonaDistribution {
2482    pub junior_accountant: f64,
2483    pub senior_accountant: f64,
2484    pub controller: f64,
2485    pub manager: f64,
2486    pub automated_system: f64,
2487}
2488
2489impl Default for PersonaDistribution {
2490    fn default() -> Self {
2491        Self {
2492            junior_accountant: 0.15,
2493            senior_accountant: 0.15,
2494            controller: 0.05,
2495            manager: 0.05,
2496            automated_system: 0.60,
2497        }
2498    }
2499}
2500
2501/// Number of users per persona type.
2502#[derive(Debug, Clone, Serialize, Deserialize)]
2503pub struct UsersPerPersona {
2504    pub junior_accountant: usize,
2505    pub senior_accountant: usize,
2506    pub controller: usize,
2507    pub manager: usize,
2508    pub automated_system: usize,
2509}
2510
2511impl Default for UsersPerPersona {
2512    fn default() -> Self {
2513        Self {
2514            junior_accountant: 10,
2515            senior_accountant: 5,
2516            controller: 2,
2517            manager: 3,
2518            automated_system: 20,
2519        }
2520    }
2521}
2522
2523/// Template configuration for realistic data generation.
2524///
2525/// # User-supplied template packs (v3.2.0+)
2526///
2527/// Set `path` to a directory (or single YAML/JSON file) to override or
2528/// extend the embedded default pools for vendor names, customer names,
2529/// material/asset descriptions, audit findings, bank names, and
2530/// department names. When `path` is `None` (the default), generators
2531/// use the compiled-in pools and output is byte-identical to v3.1.2.
2532///
2533/// See `crates/datasynth-core/src/templates/loader.rs::TemplateData`
2534/// for the full YAML schema. Use `datasynth-data templates export` to
2535/// dump the defaults as a starter pack.
2536#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2537pub struct TemplateConfig {
2538    /// Name generation settings
2539    #[serde(default)]
2540    pub names: NameTemplateConfig,
2541    /// Description generation settings
2542    #[serde(default)]
2543    pub descriptions: DescriptionTemplateConfig,
2544    /// Reference number settings
2545    #[serde(default)]
2546    pub references: ReferenceTemplateConfig,
2547    /// Optional path to a user-supplied template file or directory.
2548    /// When set, entries from the file(s) augment or replace the
2549    /// embedded defaults according to `merge_strategy`.
2550    ///
2551    /// `None` (default) = use embedded pools only (byte-identical to v3.1.2).
2552    #[serde(default, alias = "templatesPath")]
2553    pub path: Option<std::path::PathBuf>,
2554    /// How file-based entries combine with embedded defaults.
2555    ///
2556    /// - `extend` (default): append file entries to embedded pools,
2557    ///   de-duplicating. Safe for incremental overlays.
2558    /// - `replace`: discard embedded pools entirely and use only file
2559    ///   entries. Requires a fully-populated template file.
2560    /// - `merge_prefer_file`: replace individual categories when present
2561    ///   in the file; keep embedded for absent categories.
2562    #[serde(default, alias = "mergeStrategy")]
2563    pub merge_strategy: TemplateMergeStrategy,
2564}
2565
2566/// Strategy for combining user-supplied template files with embedded defaults.
2567#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2568#[serde(rename_all = "snake_case")]
2569pub enum TemplateMergeStrategy {
2570    /// Append file entries to embedded pools (default).
2571    #[default]
2572    Extend,
2573    /// Replace embedded pools entirely with file entries.
2574    Replace,
2575    /// Replace individual categories when present in file; keep embedded for absent ones.
2576    MergePreferFile,
2577}
2578
2579/// Name template configuration.
2580#[derive(Debug, Clone, Serialize, Deserialize)]
2581pub struct NameTemplateConfig {
2582    /// Distribution of name cultures
2583    #[serde(default)]
2584    pub culture_distribution: CultureDistribution,
2585    /// Email domain for generated users
2586    #[serde(default = "default_email_domain")]
2587    pub email_domain: String,
2588    /// Generate realistic display names
2589    #[serde(default = "default_true")]
2590    pub generate_realistic_names: bool,
2591}
2592
2593fn default_email_domain() -> String {
2594    "company.com".to_string()
2595}
2596
2597impl Default for NameTemplateConfig {
2598    fn default() -> Self {
2599        Self {
2600            culture_distribution: CultureDistribution::default(),
2601            email_domain: default_email_domain(),
2602            generate_realistic_names: true,
2603        }
2604    }
2605}
2606
2607/// Distribution of name cultures for generation.
2608#[derive(Debug, Clone, Serialize, Deserialize)]
2609pub struct CultureDistribution {
2610    pub western_us: f64,
2611    pub hispanic: f64,
2612    pub german: f64,
2613    pub french: f64,
2614    pub chinese: f64,
2615    pub japanese: f64,
2616    pub indian: f64,
2617}
2618
2619impl Default for CultureDistribution {
2620    fn default() -> Self {
2621        Self {
2622            western_us: 0.40,
2623            hispanic: 0.20,
2624            german: 0.10,
2625            french: 0.05,
2626            chinese: 0.10,
2627            japanese: 0.05,
2628            indian: 0.10,
2629        }
2630    }
2631}
2632
2633/// Description template configuration.
2634#[derive(Debug, Clone, Serialize, Deserialize)]
2635pub struct DescriptionTemplateConfig {
2636    /// Generate header text for journal entries
2637    #[serde(default = "default_true")]
2638    pub generate_header_text: bool,
2639    /// Generate line text for journal entry lines
2640    #[serde(default = "default_true")]
2641    pub generate_line_text: bool,
2642}
2643
2644impl Default for DescriptionTemplateConfig {
2645    fn default() -> Self {
2646        Self {
2647            generate_header_text: true,
2648            generate_line_text: true,
2649        }
2650    }
2651}
2652
2653/// Reference number template configuration.
2654#[derive(Debug, Clone, Serialize, Deserialize)]
2655pub struct ReferenceTemplateConfig {
2656    /// Generate reference numbers
2657    #[serde(default = "default_true")]
2658    pub generate_references: bool,
2659    /// Invoice prefix
2660    #[serde(default = "default_invoice_prefix")]
2661    pub invoice_prefix: String,
2662    /// Purchase order prefix
2663    #[serde(default = "default_po_prefix")]
2664    pub po_prefix: String,
2665    /// Sales order prefix
2666    #[serde(default = "default_so_prefix")]
2667    pub so_prefix: String,
2668}
2669
2670fn default_invoice_prefix() -> String {
2671    "INV".to_string()
2672}
2673fn default_po_prefix() -> String {
2674    "PO".to_string()
2675}
2676fn default_so_prefix() -> String {
2677    "SO".to_string()
2678}
2679
2680impl Default for ReferenceTemplateConfig {
2681    fn default() -> Self {
2682        Self {
2683            generate_references: true,
2684            invoice_prefix: default_invoice_prefix(),
2685            po_prefix: default_po_prefix(),
2686            so_prefix: default_so_prefix(),
2687        }
2688    }
2689}
2690
2691/// Approval workflow configuration.
2692#[derive(Debug, Clone, Serialize, Deserialize)]
2693pub struct ApprovalConfig {
2694    /// Enable approval workflow generation
2695    #[serde(default)]
2696    pub enabled: bool,
2697    /// Threshold below which transactions are auto-approved
2698    #[serde(default = "default_auto_approve_threshold")]
2699    pub auto_approve_threshold: f64,
2700    /// Rate at which approvals are rejected (0.0 to 1.0)
2701    #[serde(default = "default_rejection_rate")]
2702    pub rejection_rate: f64,
2703    /// Rate at which approvals require revision (0.0 to 1.0)
2704    #[serde(default = "default_revision_rate")]
2705    pub revision_rate: f64,
2706    /// Average delay in hours for approval processing
2707    #[serde(default = "default_approval_delay_hours")]
2708    pub average_approval_delay_hours: f64,
2709    /// Approval chain thresholds
2710    #[serde(default)]
2711    pub thresholds: Vec<ApprovalThresholdConfig>,
2712}
2713
2714fn default_auto_approve_threshold() -> f64 {
2715    1000.0
2716}
2717fn default_rejection_rate() -> f64 {
2718    0.02
2719}
2720fn default_revision_rate() -> f64 {
2721    0.05
2722}
2723fn default_approval_delay_hours() -> f64 {
2724    4.0
2725}
2726
2727impl Default for ApprovalConfig {
2728    fn default() -> Self {
2729        Self {
2730            enabled: false,
2731            auto_approve_threshold: default_auto_approve_threshold(),
2732            rejection_rate: default_rejection_rate(),
2733            revision_rate: default_revision_rate(),
2734            average_approval_delay_hours: default_approval_delay_hours(),
2735            thresholds: vec![
2736                ApprovalThresholdConfig {
2737                    amount: 1000.0,
2738                    level: 1,
2739                    roles: vec!["senior_accountant".to_string()],
2740                },
2741                ApprovalThresholdConfig {
2742                    amount: 10000.0,
2743                    level: 2,
2744                    roles: vec!["senior_accountant".to_string(), "controller".to_string()],
2745                },
2746                ApprovalThresholdConfig {
2747                    amount: 100000.0,
2748                    level: 3,
2749                    roles: vec![
2750                        "senior_accountant".to_string(),
2751                        "controller".to_string(),
2752                        "manager".to_string(),
2753                    ],
2754                },
2755                ApprovalThresholdConfig {
2756                    amount: 500000.0,
2757                    level: 4,
2758                    roles: vec![
2759                        "senior_accountant".to_string(),
2760                        "controller".to_string(),
2761                        "manager".to_string(),
2762                        "executive".to_string(),
2763                    ],
2764                },
2765            ],
2766        }
2767    }
2768}
2769
2770/// Configuration for a single approval threshold.
2771#[derive(Debug, Clone, Serialize, Deserialize)]
2772pub struct ApprovalThresholdConfig {
2773    /// Amount threshold
2774    pub amount: f64,
2775    /// Approval level required
2776    pub level: u8,
2777    /// Roles that can approve at this level
2778    pub roles: Vec<String>,
2779}
2780
2781/// Department configuration.
2782#[derive(Debug, Clone, Serialize, Deserialize)]
2783pub struct DepartmentConfig {
2784    /// Enable department assignment
2785    #[serde(default)]
2786    pub enabled: bool,
2787    /// Multiplier for department headcounts
2788    #[serde(default = "default_headcount_multiplier")]
2789    pub headcount_multiplier: f64,
2790    /// Custom department definitions (optional)
2791    #[serde(default)]
2792    pub custom_departments: Vec<CustomDepartmentConfig>,
2793}
2794
2795fn default_headcount_multiplier() -> f64 {
2796    1.0
2797}
2798
2799impl Default for DepartmentConfig {
2800    fn default() -> Self {
2801        Self {
2802            enabled: false,
2803            headcount_multiplier: default_headcount_multiplier(),
2804            custom_departments: Vec::new(),
2805        }
2806    }
2807}
2808
2809/// Custom department definition.
2810#[derive(Debug, Clone, Serialize, Deserialize)]
2811pub struct CustomDepartmentConfig {
2812    /// Department code
2813    pub code: String,
2814    /// Department name
2815    pub name: String,
2816    /// Associated cost center
2817    #[serde(default)]
2818    pub cost_center: Option<String>,
2819    /// Primary business processes
2820    #[serde(default)]
2821    pub primary_processes: Vec<String>,
2822    /// Parent department code
2823    #[serde(default)]
2824    pub parent_code: Option<String>,
2825}
2826
2827// ============================================================================
2828// Master Data Configuration
2829// ============================================================================
2830
2831/// Master data generation configuration.
2832#[derive(Debug, Clone, Default, Serialize, Deserialize)]
2833pub struct MasterDataConfig {
2834    /// Vendor master data settings
2835    #[serde(default)]
2836    pub vendors: VendorMasterConfig,
2837    /// Customer master data settings
2838    #[serde(default)]
2839    pub customers: CustomerMasterConfig,
2840    /// Material master data settings
2841    #[serde(default)]
2842    pub materials: MaterialMasterConfig,
2843    /// Fixed asset master data settings
2844    #[serde(default)]
2845    pub fixed_assets: FixedAssetMasterConfig,
2846    /// Employee master data settings
2847    #[serde(default)]
2848    pub employees: EmployeeMasterConfig,
2849    /// Cost center master data settings
2850    #[serde(default)]
2851    pub cost_centers: CostCenterMasterConfig,
2852}
2853
2854/// Vendor master data configuration.
2855#[derive(Debug, Clone, Serialize, Deserialize)]
2856pub struct VendorMasterConfig {
2857    /// Number of vendors to generate
2858    #[serde(default = "default_vendor_count")]
2859    pub count: usize,
2860    /// Percentage of vendors that are intercompany (0.0 to 1.0)
2861    #[serde(default = "default_intercompany_percent")]
2862    pub intercompany_percent: f64,
2863    /// Payment terms distribution
2864    #[serde(default)]
2865    pub payment_terms_distribution: PaymentTermsDistribution,
2866    /// Vendor behavior distribution
2867    #[serde(default)]
2868    pub behavior_distribution: VendorBehaviorDistribution,
2869    /// Generate bank account details
2870    #[serde(default = "default_true")]
2871    pub generate_bank_accounts: bool,
2872    /// Generate tax IDs
2873    #[serde(default = "default_true")]
2874    pub generate_tax_ids: bool,
2875}
2876
2877fn default_vendor_count() -> usize {
2878    500
2879}
2880
2881fn default_intercompany_percent() -> f64 {
2882    0.05
2883}
2884
2885impl Default for VendorMasterConfig {
2886    fn default() -> Self {
2887        Self {
2888            count: default_vendor_count(),
2889            intercompany_percent: default_intercompany_percent(),
2890            payment_terms_distribution: PaymentTermsDistribution::default(),
2891            behavior_distribution: VendorBehaviorDistribution::default(),
2892            generate_bank_accounts: true,
2893            generate_tax_ids: true,
2894        }
2895    }
2896}
2897
2898/// Payment terms distribution for vendors.
2899#[derive(Debug, Clone, Serialize, Deserialize)]
2900pub struct PaymentTermsDistribution {
2901    /// Net 30 days
2902    pub net_30: f64,
2903    /// Net 60 days
2904    pub net_60: f64,
2905    /// Net 90 days
2906    pub net_90: f64,
2907    /// 2% 10 Net 30 (early payment discount)
2908    pub two_ten_net_30: f64,
2909    /// Due on receipt
2910    pub due_on_receipt: f64,
2911    /// End of month
2912    pub end_of_month: f64,
2913}
2914
2915impl Default for PaymentTermsDistribution {
2916    fn default() -> Self {
2917        Self {
2918            net_30: 0.40,
2919            net_60: 0.20,
2920            net_90: 0.10,
2921            two_ten_net_30: 0.15,
2922            due_on_receipt: 0.05,
2923            end_of_month: 0.10,
2924        }
2925    }
2926}
2927
2928/// Vendor behavior distribution.
2929///
2930/// All fields default to `0.0` if absent from the YAML, so partial
2931/// distributions are accepted; the validator (`validate_sum_to_one`)
2932/// then enforces that the populated weights sum to `1.0 ± 0.01`.
2933#[derive(Debug, Clone, Serialize, Deserialize)]
2934#[serde(deny_unknown_fields)]
2935pub struct VendorBehaviorDistribution {
2936    /// Reliable vendors (consistent delivery, quality)
2937    #[serde(default)]
2938    pub reliable: f64,
2939    /// Sometimes late vendors
2940    #[serde(default)]
2941    pub sometimes_late: f64,
2942    /// Inconsistent quality vendors
2943    #[serde(default)]
2944    pub inconsistent_quality: f64,
2945    /// Premium vendors (high quality, premium pricing)
2946    #[serde(default)]
2947    pub premium: f64,
2948    /// Budget vendors (lower quality, lower pricing)
2949    #[serde(default)]
2950    pub budget: f64,
2951    /// Erratic vendors (variable behavior, unpredictable performance)
2952    #[serde(default)]
2953    pub erratic: f64,
2954    /// Problematic vendors (frequent issues, high risk for fraud scenarios)
2955    #[serde(default)]
2956    pub problematic: f64,
2957}
2958
2959impl Default for VendorBehaviorDistribution {
2960    fn default() -> Self {
2961        // Preserves the pre-extension default sum=1.0 over the original
2962        // five fields.  `erratic` and `problematic` default to 0.0 so
2963        // that existing configs/packs continue to merge to a 1.0 sum
2964        // without modification.
2965        Self {
2966            reliable: 0.50,
2967            sometimes_late: 0.20,
2968            inconsistent_quality: 0.10,
2969            premium: 0.10,
2970            budget: 0.10,
2971            erratic: 0.0,
2972            problematic: 0.0,
2973        }
2974    }
2975}
2976
2977/// Customer master data configuration.
2978#[derive(Debug, Clone, Serialize, Deserialize)]
2979pub struct CustomerMasterConfig {
2980    /// Number of customers to generate
2981    #[serde(default = "default_customer_count")]
2982    pub count: usize,
2983    /// Percentage of customers that are intercompany (0.0 to 1.0)
2984    #[serde(default = "default_intercompany_percent")]
2985    pub intercompany_percent: f64,
2986    /// Credit rating distribution
2987    #[serde(default)]
2988    pub credit_rating_distribution: CreditRatingDistribution,
2989    /// Payment behavior distribution
2990    #[serde(default)]
2991    pub payment_behavior_distribution: PaymentBehaviorDistribution,
2992    /// Generate credit limits based on rating
2993    #[serde(default = "default_true")]
2994    pub generate_credit_limits: bool,
2995}
2996
2997fn default_customer_count() -> usize {
2998    2000
2999}
3000
3001impl Default for CustomerMasterConfig {
3002    fn default() -> Self {
3003        Self {
3004            count: default_customer_count(),
3005            intercompany_percent: default_intercompany_percent(),
3006            credit_rating_distribution: CreditRatingDistribution::default(),
3007            payment_behavior_distribution: PaymentBehaviorDistribution::default(),
3008            generate_credit_limits: true,
3009        }
3010    }
3011}
3012
3013/// Credit rating distribution for customers.
3014///
3015/// Two parallel vocabularies are accepted:
3016///   * Bond-grade tiers: `aaa`, `aa`, `a`, `bbb`, `bb`, `b`, `below_b`
3017///   * Plain-English tiers: `excellent`, `good`, `fair`, `poor`
3018///
3019/// All fields default to `0.0` if absent; mix and match as needed.
3020/// The validator enforces that the populated weights sum to `1.0`.
3021#[derive(Debug, Clone, Serialize, Deserialize)]
3022#[serde(deny_unknown_fields)]
3023pub struct CreditRatingDistribution {
3024    /// AAA rating
3025    #[serde(default)]
3026    pub aaa: f64,
3027    /// AA rating
3028    #[serde(default)]
3029    pub aa: f64,
3030    /// A rating
3031    #[serde(default)]
3032    pub a: f64,
3033    /// BBB rating
3034    #[serde(default)]
3035    pub bbb: f64,
3036    /// BB rating
3037    #[serde(default)]
3038    pub bb: f64,
3039    /// B rating
3040    #[serde(default)]
3041    pub b: f64,
3042    /// Below B rating
3043    #[serde(default)]
3044    pub below_b: f64,
3045    /// Plain-English: excellent credit (≈ AAA/AA tier)
3046    #[serde(default)]
3047    pub excellent: f64,
3048    /// Plain-English: good credit (≈ A tier)
3049    #[serde(default)]
3050    pub good: f64,
3051    /// Plain-English: fair credit (≈ BBB/BB tier)
3052    #[serde(default)]
3053    pub fair: f64,
3054    /// Plain-English: poor credit (≈ B/below tier)
3055    #[serde(default)]
3056    pub poor: f64,
3057}
3058
3059impl Default for CreditRatingDistribution {
3060    fn default() -> Self {
3061        Self {
3062            aaa: 0.05,
3063            aa: 0.10,
3064            a: 0.20,
3065            bbb: 0.30,
3066            bb: 0.20,
3067            b: 0.10,
3068            below_b: 0.05,
3069            excellent: 0.0,
3070            good: 0.0,
3071            fair: 0.0,
3072            poor: 0.0,
3073        }
3074    }
3075}
3076
3077/// Payment behavior distribution for customers.
3078///
3079/// All fields default to `0.0` if absent from the YAML.  Validator
3080/// enforces that populated weights sum to `1.0 ± 0.01`.
3081#[derive(Debug, Clone, Serialize, Deserialize)]
3082#[serde(deny_unknown_fields)]
3083pub struct PaymentBehaviorDistribution {
3084    /// Always pays early
3085    #[serde(default)]
3086    pub early_payer: f64,
3087    /// Pays on time
3088    #[serde(default)]
3089    pub on_time: f64,
3090    /// Occasionally late
3091    #[serde(default)]
3092    pub occasional_late: f64,
3093    /// Frequently late
3094    #[serde(default)]
3095    pub frequent_late: f64,
3096    /// Takes early payment discounts
3097    #[serde(default)]
3098    pub discount_taker: f64,
3099}
3100
3101impl Default for PaymentBehaviorDistribution {
3102    fn default() -> Self {
3103        Self {
3104            early_payer: 0.10,
3105            on_time: 0.50,
3106            occasional_late: 0.25,
3107            frequent_late: 0.10,
3108            discount_taker: 0.05,
3109        }
3110    }
3111}
3112
3113/// Material master data configuration.
3114#[derive(Debug, Clone, Serialize, Deserialize)]
3115pub struct MaterialMasterConfig {
3116    /// Number of materials to generate
3117    #[serde(default = "default_material_count")]
3118    pub count: usize,
3119    /// Material type distribution
3120    #[serde(default)]
3121    pub type_distribution: MaterialTypeDistribution,
3122    /// Valuation method distribution
3123    #[serde(default)]
3124    pub valuation_distribution: ValuationMethodDistribution,
3125    /// Percentage of materials with BOM (bill of materials)
3126    #[serde(default = "default_bom_percent")]
3127    pub bom_percent: f64,
3128    /// Maximum BOM depth
3129    #[serde(default = "default_max_bom_depth")]
3130    pub max_bom_depth: u8,
3131}
3132
3133fn default_material_count() -> usize {
3134    5000
3135}
3136
3137fn default_bom_percent() -> f64 {
3138    0.20
3139}
3140
3141fn default_max_bom_depth() -> u8 {
3142    3
3143}
3144
3145impl Default for MaterialMasterConfig {
3146    fn default() -> Self {
3147        Self {
3148            count: default_material_count(),
3149            type_distribution: MaterialTypeDistribution::default(),
3150            valuation_distribution: ValuationMethodDistribution::default(),
3151            bom_percent: default_bom_percent(),
3152            max_bom_depth: default_max_bom_depth(),
3153        }
3154    }
3155}
3156
3157/// Material type distribution.
3158#[derive(Debug, Clone, Serialize, Deserialize)]
3159pub struct MaterialTypeDistribution {
3160    /// Raw materials
3161    pub raw_material: f64,
3162    /// Semi-finished goods
3163    pub semi_finished: f64,
3164    /// Finished goods
3165    pub finished_good: f64,
3166    /// Trading goods (purchased for resale)
3167    pub trading_good: f64,
3168    /// Operating supplies
3169    pub operating_supply: f64,
3170    /// Services
3171    pub service: f64,
3172}
3173
3174impl Default for MaterialTypeDistribution {
3175    fn default() -> Self {
3176        Self {
3177            raw_material: 0.30,
3178            semi_finished: 0.15,
3179            finished_good: 0.25,
3180            trading_good: 0.15,
3181            operating_supply: 0.10,
3182            service: 0.05,
3183        }
3184    }
3185}
3186
3187/// Valuation method distribution for materials.
3188#[derive(Debug, Clone, Serialize, Deserialize)]
3189pub struct ValuationMethodDistribution {
3190    /// Standard cost
3191    pub standard_cost: f64,
3192    /// Moving average
3193    pub moving_average: f64,
3194    /// FIFO (First In, First Out)
3195    pub fifo: f64,
3196    /// LIFO (Last In, First Out)
3197    pub lifo: f64,
3198}
3199
3200impl Default for ValuationMethodDistribution {
3201    fn default() -> Self {
3202        Self {
3203            standard_cost: 0.50,
3204            moving_average: 0.30,
3205            fifo: 0.15,
3206            lifo: 0.05,
3207        }
3208    }
3209}
3210
3211/// Fixed asset master data configuration.
3212#[derive(Debug, Clone, Serialize, Deserialize)]
3213pub struct FixedAssetMasterConfig {
3214    /// Number of fixed assets to generate
3215    #[serde(default = "default_asset_count")]
3216    pub count: usize,
3217    /// Asset class distribution
3218    #[serde(default)]
3219    pub class_distribution: AssetClassDistribution,
3220    /// Depreciation method distribution
3221    #[serde(default)]
3222    pub depreciation_distribution: DepreciationMethodDistribution,
3223    /// Percentage of assets that are fully depreciated
3224    #[serde(default = "default_fully_depreciated_percent")]
3225    pub fully_depreciated_percent: f64,
3226    /// Generate acquisition history
3227    #[serde(default = "default_true")]
3228    pub generate_acquisition_history: bool,
3229}
3230
3231fn default_asset_count() -> usize {
3232    800
3233}
3234
3235fn default_fully_depreciated_percent() -> f64 {
3236    0.15
3237}
3238
3239impl Default for FixedAssetMasterConfig {
3240    fn default() -> Self {
3241        Self {
3242            count: default_asset_count(),
3243            class_distribution: AssetClassDistribution::default(),
3244            depreciation_distribution: DepreciationMethodDistribution::default(),
3245            fully_depreciated_percent: default_fully_depreciated_percent(),
3246            generate_acquisition_history: true,
3247        }
3248    }
3249}
3250
3251/// Asset class distribution.
3252#[derive(Debug, Clone, Serialize, Deserialize)]
3253pub struct AssetClassDistribution {
3254    /// Buildings and structures
3255    pub buildings: f64,
3256    /// Machinery and equipment
3257    pub machinery: f64,
3258    /// Vehicles
3259    pub vehicles: f64,
3260    /// IT equipment
3261    pub it_equipment: f64,
3262    /// Furniture and fixtures
3263    pub furniture: f64,
3264    /// Land (non-depreciable)
3265    pub land: f64,
3266    /// Leasehold improvements
3267    pub leasehold: f64,
3268}
3269
3270impl Default for AssetClassDistribution {
3271    fn default() -> Self {
3272        Self {
3273            buildings: 0.15,
3274            machinery: 0.30,
3275            vehicles: 0.15,
3276            it_equipment: 0.20,
3277            furniture: 0.10,
3278            land: 0.05,
3279            leasehold: 0.05,
3280        }
3281    }
3282}
3283
3284/// Depreciation method distribution.
3285#[derive(Debug, Clone, Serialize, Deserialize)]
3286pub struct DepreciationMethodDistribution {
3287    /// Straight line
3288    pub straight_line: f64,
3289    /// Declining balance
3290    pub declining_balance: f64,
3291    /// Double declining balance
3292    pub double_declining: f64,
3293    /// Sum of years' digits
3294    pub sum_of_years: f64,
3295    /// Units of production
3296    pub units_of_production: f64,
3297}
3298
3299impl Default for DepreciationMethodDistribution {
3300    fn default() -> Self {
3301        Self {
3302            straight_line: 0.60,
3303            declining_balance: 0.20,
3304            double_declining: 0.10,
3305            sum_of_years: 0.05,
3306            units_of_production: 0.05,
3307        }
3308    }
3309}
3310
3311/// Employee master data configuration.
3312#[derive(Debug, Clone, Serialize, Deserialize)]
3313pub struct EmployeeMasterConfig {
3314    /// Number of employees to generate
3315    #[serde(default = "default_employee_count")]
3316    pub count: usize,
3317    /// Generate organizational hierarchy
3318    #[serde(default = "default_true")]
3319    pub generate_hierarchy: bool,
3320    /// Maximum hierarchy depth
3321    #[serde(default = "default_hierarchy_depth")]
3322    pub max_hierarchy_depth: u8,
3323    /// Average span of control (direct reports per manager)
3324    #[serde(default = "default_span_of_control")]
3325    pub average_span_of_control: f64,
3326    /// Approval limit distribution by job level
3327    #[serde(default)]
3328    pub approval_limits: ApprovalLimitDistribution,
3329    /// Department distribution
3330    #[serde(default)]
3331    pub department_distribution: EmployeeDepartmentDistribution,
3332}
3333
3334fn default_employee_count() -> usize {
3335    1500
3336}
3337
3338fn default_hierarchy_depth() -> u8 {
3339    6
3340}
3341
3342fn default_span_of_control() -> f64 {
3343    5.0
3344}
3345
3346impl Default for EmployeeMasterConfig {
3347    fn default() -> Self {
3348        Self {
3349            count: default_employee_count(),
3350            generate_hierarchy: true,
3351            max_hierarchy_depth: default_hierarchy_depth(),
3352            average_span_of_control: default_span_of_control(),
3353            approval_limits: ApprovalLimitDistribution::default(),
3354            department_distribution: EmployeeDepartmentDistribution::default(),
3355        }
3356    }
3357}
3358
3359/// Approval limit distribution by job level.
3360#[derive(Debug, Clone, Serialize, Deserialize)]
3361pub struct ApprovalLimitDistribution {
3362    /// Staff level approval limit
3363    #[serde(default = "default_staff_limit")]
3364    pub staff: f64,
3365    /// Senior staff approval limit
3366    #[serde(default = "default_senior_limit")]
3367    pub senior: f64,
3368    /// Manager approval limit
3369    #[serde(default = "default_manager_limit")]
3370    pub manager: f64,
3371    /// Director approval limit
3372    #[serde(default = "default_director_limit")]
3373    pub director: f64,
3374    /// VP approval limit
3375    #[serde(default = "default_vp_limit")]
3376    pub vp: f64,
3377    /// Executive approval limit
3378    #[serde(default = "default_executive_limit")]
3379    pub executive: f64,
3380}
3381
3382fn default_staff_limit() -> f64 {
3383    1000.0
3384}
3385fn default_senior_limit() -> f64 {
3386    5000.0
3387}
3388fn default_manager_limit() -> f64 {
3389    25000.0
3390}
3391fn default_director_limit() -> f64 {
3392    100000.0
3393}
3394fn default_vp_limit() -> f64 {
3395    500000.0
3396}
3397fn default_executive_limit() -> f64 {
3398    f64::INFINITY
3399}
3400
3401impl Default for ApprovalLimitDistribution {
3402    fn default() -> Self {
3403        Self {
3404            staff: default_staff_limit(),
3405            senior: default_senior_limit(),
3406            manager: default_manager_limit(),
3407            director: default_director_limit(),
3408            vp: default_vp_limit(),
3409            executive: default_executive_limit(),
3410        }
3411    }
3412}
3413
3414/// Employee distribution across departments.
3415#[derive(Debug, Clone, Serialize, Deserialize)]
3416pub struct EmployeeDepartmentDistribution {
3417    /// Finance and Accounting
3418    pub finance: f64,
3419    /// Procurement
3420    pub procurement: f64,
3421    /// Sales
3422    pub sales: f64,
3423    /// Warehouse and Logistics
3424    pub warehouse: f64,
3425    /// IT
3426    pub it: f64,
3427    /// Human Resources
3428    pub hr: f64,
3429    /// Operations
3430    pub operations: f64,
3431    /// Executive
3432    pub executive: f64,
3433}
3434
3435impl Default for EmployeeDepartmentDistribution {
3436    fn default() -> Self {
3437        Self {
3438            finance: 0.12,
3439            procurement: 0.10,
3440            sales: 0.25,
3441            warehouse: 0.15,
3442            it: 0.10,
3443            hr: 0.05,
3444            operations: 0.20,
3445            executive: 0.03,
3446        }
3447    }
3448}
3449
3450/// Cost center master data configuration.
3451#[derive(Debug, Clone, Serialize, Deserialize)]
3452pub struct CostCenterMasterConfig {
3453    /// Number of cost centers to generate
3454    #[serde(default = "default_cost_center_count")]
3455    pub count: usize,
3456    /// Generate cost center hierarchy
3457    #[serde(default = "default_true")]
3458    pub generate_hierarchy: bool,
3459    /// Maximum hierarchy depth
3460    #[serde(default = "default_cc_hierarchy_depth")]
3461    pub max_hierarchy_depth: u8,
3462}
3463
3464fn default_cost_center_count() -> usize {
3465    50
3466}
3467
3468fn default_cc_hierarchy_depth() -> u8 {
3469    3
3470}
3471
3472impl Default for CostCenterMasterConfig {
3473    fn default() -> Self {
3474        Self {
3475            count: default_cost_center_count(),
3476            generate_hierarchy: true,
3477            max_hierarchy_depth: default_cc_hierarchy_depth(),
3478        }
3479    }
3480}
3481
3482// ============================================================================
3483// Document Flow Configuration
3484// ============================================================================
3485
3486/// Document flow generation configuration.
3487#[derive(Debug, Clone, Serialize, Deserialize)]
3488pub struct DocumentFlowConfig {
3489    /// P2P (Procure-to-Pay) flow configuration
3490    #[serde(default)]
3491    pub p2p: P2PFlowConfig,
3492    /// O2C (Order-to-Cash) flow configuration
3493    #[serde(default)]
3494    pub o2c: O2CFlowConfig,
3495    /// Generate document reference chains
3496    #[serde(default = "default_true")]
3497    pub generate_document_references: bool,
3498    /// Export document flow graph
3499    #[serde(default)]
3500    pub export_flow_graph: bool,
3501}
3502
3503impl Default for DocumentFlowConfig {
3504    fn default() -> Self {
3505        Self {
3506            p2p: P2PFlowConfig::default(),
3507            o2c: O2CFlowConfig::default(),
3508            generate_document_references: true,
3509            export_flow_graph: false,
3510        }
3511    }
3512}
3513
3514/// P2P (Procure-to-Pay) flow configuration.
3515#[derive(Debug, Clone, Serialize, Deserialize)]
3516pub struct P2PFlowConfig {
3517    /// Enable P2P document flow generation
3518    #[serde(default = "default_true")]
3519    pub enabled: bool,
3520    /// Three-way match success rate (PO-GR-Invoice)
3521    #[serde(default = "default_three_way_match_rate")]
3522    pub three_way_match_rate: f64,
3523    /// Rate of partial deliveries
3524    #[serde(default = "default_partial_delivery_rate")]
3525    pub partial_delivery_rate: f64,
3526    /// Rate of price variances between PO and Invoice
3527    #[serde(default = "default_price_variance_rate")]
3528    pub price_variance_rate: f64,
3529    /// Maximum price variance percentage
3530    #[serde(default = "default_max_price_variance")]
3531    pub max_price_variance_percent: f64,
3532    /// Rate of quantity variances between PO/GR and Invoice
3533    #[serde(default = "default_quantity_variance_rate")]
3534    pub quantity_variance_rate: f64,
3535    /// Average days from PO to goods receipt
3536    #[serde(default = "default_po_to_gr_days")]
3537    pub average_po_to_gr_days: u32,
3538    /// Average days from GR to invoice
3539    #[serde(default = "default_gr_to_invoice_days")]
3540    pub average_gr_to_invoice_days: u32,
3541    /// Average days from invoice to payment
3542    #[serde(default = "default_invoice_to_payment_days")]
3543    pub average_invoice_to_payment_days: u32,
3544    /// PO line count distribution
3545    #[serde(default)]
3546    pub line_count_distribution: DocumentLineCountDistribution,
3547    /// Payment behavior configuration
3548    #[serde(default)]
3549    pub payment_behavior: P2PPaymentBehaviorConfig,
3550    /// Rate of over-deliveries (quantity received exceeds PO quantity)
3551    #[serde(default)]
3552    pub over_delivery_rate: Option<f64>,
3553    /// Rate of early payment discounts being taken
3554    #[serde(default)]
3555    pub early_payment_discount_rate: Option<f64>,
3556}
3557
3558fn default_three_way_match_rate() -> f64 {
3559    0.95
3560}
3561
3562fn default_partial_delivery_rate() -> f64 {
3563    0.15
3564}
3565
3566fn default_price_variance_rate() -> f64 {
3567    0.08
3568}
3569
3570fn default_max_price_variance() -> f64 {
3571    0.05
3572}
3573
3574fn default_quantity_variance_rate() -> f64 {
3575    0.05
3576}
3577
3578fn default_po_to_gr_days() -> u32 {
3579    14
3580}
3581
3582fn default_gr_to_invoice_days() -> u32 {
3583    5
3584}
3585
3586fn default_invoice_to_payment_days() -> u32 {
3587    30
3588}
3589
3590impl Default for P2PFlowConfig {
3591    fn default() -> Self {
3592        Self {
3593            enabled: true,
3594            three_way_match_rate: default_three_way_match_rate(),
3595            partial_delivery_rate: default_partial_delivery_rate(),
3596            price_variance_rate: default_price_variance_rate(),
3597            max_price_variance_percent: default_max_price_variance(),
3598            quantity_variance_rate: default_quantity_variance_rate(),
3599            average_po_to_gr_days: default_po_to_gr_days(),
3600            average_gr_to_invoice_days: default_gr_to_invoice_days(),
3601            average_invoice_to_payment_days: default_invoice_to_payment_days(),
3602            line_count_distribution: DocumentLineCountDistribution::default(),
3603            payment_behavior: P2PPaymentBehaviorConfig::default(),
3604            over_delivery_rate: None,
3605            early_payment_discount_rate: None,
3606        }
3607    }
3608}
3609
3610// ============================================================================
3611// P2P Payment Behavior Configuration
3612// ============================================================================
3613
3614/// P2P payment behavior configuration.
3615#[derive(Debug, Clone, Serialize, Deserialize)]
3616pub struct P2PPaymentBehaviorConfig {
3617    /// Rate of late payments (beyond due date)
3618    #[serde(default = "default_p2p_late_payment_rate")]
3619    pub late_payment_rate: f64,
3620    /// Distribution of late payment days
3621    #[serde(default)]
3622    pub late_payment_days_distribution: LatePaymentDaysDistribution,
3623    /// Rate of partial payments
3624    #[serde(default = "default_p2p_partial_payment_rate")]
3625    pub partial_payment_rate: f64,
3626    /// Rate of payment corrections (NSF, chargebacks, reversals)
3627    #[serde(default = "default_p2p_payment_correction_rate")]
3628    pub payment_correction_rate: f64,
3629    /// Average days until partial payment remainder is paid
3630    #[serde(default = "default_p2p_avg_days_until_remainder")]
3631    pub avg_days_until_remainder: u32,
3632}
3633
3634fn default_p2p_late_payment_rate() -> f64 {
3635    0.15
3636}
3637
3638fn default_p2p_partial_payment_rate() -> f64 {
3639    0.05
3640}
3641
3642fn default_p2p_payment_correction_rate() -> f64 {
3643    0.02
3644}
3645
3646fn default_p2p_avg_days_until_remainder() -> u32 {
3647    30
3648}
3649
3650impl Default for P2PPaymentBehaviorConfig {
3651    fn default() -> Self {
3652        Self {
3653            late_payment_rate: default_p2p_late_payment_rate(),
3654            late_payment_days_distribution: LatePaymentDaysDistribution::default(),
3655            partial_payment_rate: default_p2p_partial_payment_rate(),
3656            payment_correction_rate: default_p2p_payment_correction_rate(),
3657            avg_days_until_remainder: default_p2p_avg_days_until_remainder(),
3658        }
3659    }
3660}
3661
3662/// Distribution of late payment days for P2P.
3663#[derive(Debug, Clone, Serialize, Deserialize)]
3664pub struct LatePaymentDaysDistribution {
3665    /// 1-7 days late (slightly late)
3666    #[serde(default = "default_slightly_late")]
3667    pub slightly_late_1_to_7: f64,
3668    /// 8-14 days late
3669    #[serde(default = "default_late_8_14")]
3670    pub late_8_to_14: f64,
3671    /// 15-30 days late (very late)
3672    #[serde(default = "default_very_late")]
3673    pub very_late_15_to_30: f64,
3674    /// 31-60 days late (severely late)
3675    #[serde(default = "default_severely_late")]
3676    pub severely_late_31_to_60: f64,
3677    /// Over 60 days late (extremely late)
3678    #[serde(default = "default_extremely_late")]
3679    pub extremely_late_over_60: f64,
3680}
3681
3682fn default_slightly_late() -> f64 {
3683    0.50
3684}
3685
3686fn default_late_8_14() -> f64 {
3687    0.25
3688}
3689
3690fn default_very_late() -> f64 {
3691    0.15
3692}
3693
3694fn default_severely_late() -> f64 {
3695    0.07
3696}
3697
3698fn default_extremely_late() -> f64 {
3699    0.03
3700}
3701
3702impl Default for LatePaymentDaysDistribution {
3703    fn default() -> Self {
3704        Self {
3705            slightly_late_1_to_7: default_slightly_late(),
3706            late_8_to_14: default_late_8_14(),
3707            very_late_15_to_30: default_very_late(),
3708            severely_late_31_to_60: default_severely_late(),
3709            extremely_late_over_60: default_extremely_late(),
3710        }
3711    }
3712}
3713
3714/// O2C (Order-to-Cash) flow configuration.
3715#[derive(Debug, Clone, Serialize, Deserialize)]
3716pub struct O2CFlowConfig {
3717    /// Enable O2C document flow generation
3718    #[serde(default = "default_true")]
3719    pub enabled: bool,
3720    /// Credit check failure rate
3721    #[serde(default = "default_credit_check_failure_rate")]
3722    pub credit_check_failure_rate: f64,
3723    /// Rate of partial shipments
3724    #[serde(default = "default_partial_shipment_rate")]
3725    pub partial_shipment_rate: f64,
3726    /// Rate of returns
3727    #[serde(default = "default_return_rate")]
3728    pub return_rate: f64,
3729    /// Bad debt write-off rate
3730    #[serde(default = "default_bad_debt_rate")]
3731    pub bad_debt_rate: f64,
3732    /// Average days from SO to delivery
3733    #[serde(default = "default_so_to_delivery_days")]
3734    pub average_so_to_delivery_days: u32,
3735    /// Average days from delivery to invoice
3736    #[serde(default = "default_delivery_to_invoice_days")]
3737    pub average_delivery_to_invoice_days: u32,
3738    /// Average days from invoice to receipt
3739    #[serde(default = "default_invoice_to_receipt_days")]
3740    pub average_invoice_to_receipt_days: u32,
3741    /// SO line count distribution
3742    #[serde(default)]
3743    pub line_count_distribution: DocumentLineCountDistribution,
3744    /// Cash discount configuration
3745    #[serde(default)]
3746    pub cash_discount: CashDiscountConfig,
3747    /// Payment behavior configuration
3748    #[serde(default)]
3749    pub payment_behavior: O2CPaymentBehaviorConfig,
3750    /// Rate of late payments
3751    #[serde(default)]
3752    pub late_payment_rate: Option<f64>,
3753}
3754
3755fn default_credit_check_failure_rate() -> f64 {
3756    0.02
3757}
3758
3759fn default_partial_shipment_rate() -> f64 {
3760    0.10
3761}
3762
3763fn default_return_rate() -> f64 {
3764    0.03
3765}
3766
3767fn default_bad_debt_rate() -> f64 {
3768    0.01
3769}
3770
3771fn default_so_to_delivery_days() -> u32 {
3772    7
3773}
3774
3775fn default_delivery_to_invoice_days() -> u32 {
3776    1
3777}
3778
3779fn default_invoice_to_receipt_days() -> u32 {
3780    45
3781}
3782
3783impl Default for O2CFlowConfig {
3784    fn default() -> Self {
3785        Self {
3786            enabled: true,
3787            credit_check_failure_rate: default_credit_check_failure_rate(),
3788            partial_shipment_rate: default_partial_shipment_rate(),
3789            return_rate: default_return_rate(),
3790            bad_debt_rate: default_bad_debt_rate(),
3791            average_so_to_delivery_days: default_so_to_delivery_days(),
3792            average_delivery_to_invoice_days: default_delivery_to_invoice_days(),
3793            average_invoice_to_receipt_days: default_invoice_to_receipt_days(),
3794            line_count_distribution: DocumentLineCountDistribution::default(),
3795            cash_discount: CashDiscountConfig::default(),
3796            payment_behavior: O2CPaymentBehaviorConfig::default(),
3797            late_payment_rate: None,
3798        }
3799    }
3800}
3801
3802// ============================================================================
3803// O2C Payment Behavior Configuration
3804// ============================================================================
3805
3806/// O2C payment behavior configuration.
3807#[derive(Debug, Clone, Serialize, Deserialize, Default)]
3808pub struct O2CPaymentBehaviorConfig {
3809    /// Dunning (Mahnung) configuration
3810    #[serde(default)]
3811    pub dunning: DunningConfig,
3812    /// Partial payment configuration
3813    #[serde(default)]
3814    pub partial_payments: PartialPaymentConfig,
3815    /// Short payment configuration (unauthorized deductions)
3816    #[serde(default)]
3817    pub short_payments: ShortPaymentConfig,
3818    /// On-account payment configuration (unapplied payments)
3819    #[serde(default)]
3820    pub on_account_payments: OnAccountPaymentConfig,
3821    /// Payment correction configuration (NSF, chargebacks)
3822    #[serde(default)]
3823    pub payment_corrections: PaymentCorrectionConfig,
3824}
3825
3826/// Dunning (Mahnungen) configuration for AR collections.
3827#[derive(Debug, Clone, Serialize, Deserialize)]
3828pub struct DunningConfig {
3829    /// Enable dunning process
3830    #[serde(default)]
3831    pub enabled: bool,
3832    /// Days overdue for level 1 dunning (1st reminder)
3833    #[serde(default = "default_dunning_level_1_days")]
3834    pub level_1_days_overdue: u32,
3835    /// Days overdue for level 2 dunning (2nd reminder)
3836    #[serde(default = "default_dunning_level_2_days")]
3837    pub level_2_days_overdue: u32,
3838    /// Days overdue for level 3 dunning (final notice)
3839    #[serde(default = "default_dunning_level_3_days")]
3840    pub level_3_days_overdue: u32,
3841    /// Days overdue for collection handover
3842    #[serde(default = "default_collection_days")]
3843    pub collection_days_overdue: u32,
3844    /// Payment rates after each dunning level
3845    #[serde(default)]
3846    pub payment_after_dunning_rates: DunningPaymentRates,
3847    /// Rate of invoices blocked from dunning (disputes)
3848    #[serde(default = "default_dunning_block_rate")]
3849    pub dunning_block_rate: f64,
3850    /// Interest rate per year for overdue amounts
3851    #[serde(default = "default_dunning_interest_rate")]
3852    pub interest_rate_per_year: f64,
3853    /// Fixed dunning charge per letter
3854    #[serde(default = "default_dunning_charge")]
3855    pub dunning_charge: f64,
3856}
3857
3858fn default_dunning_level_1_days() -> u32 {
3859    14
3860}
3861
3862fn default_dunning_level_2_days() -> u32 {
3863    28
3864}
3865
3866fn default_dunning_level_3_days() -> u32 {
3867    42
3868}
3869
3870fn default_collection_days() -> u32 {
3871    60
3872}
3873
3874fn default_dunning_block_rate() -> f64 {
3875    0.05
3876}
3877
3878fn default_dunning_interest_rate() -> f64 {
3879    0.09
3880}
3881
3882fn default_dunning_charge() -> f64 {
3883    25.0
3884}
3885
3886impl Default for DunningConfig {
3887    fn default() -> Self {
3888        Self {
3889            enabled: false,
3890            level_1_days_overdue: default_dunning_level_1_days(),
3891            level_2_days_overdue: default_dunning_level_2_days(),
3892            level_3_days_overdue: default_dunning_level_3_days(),
3893            collection_days_overdue: default_collection_days(),
3894            payment_after_dunning_rates: DunningPaymentRates::default(),
3895            dunning_block_rate: default_dunning_block_rate(),
3896            interest_rate_per_year: default_dunning_interest_rate(),
3897            dunning_charge: default_dunning_charge(),
3898        }
3899    }
3900}
3901
3902/// Payment rates after each dunning level.
3903#[derive(Debug, Clone, Serialize, Deserialize)]
3904pub struct DunningPaymentRates {
3905    /// Rate that pays after level 1 reminder
3906    #[serde(default = "default_after_level_1")]
3907    pub after_level_1: f64,
3908    /// Rate that pays after level 2 reminder
3909    #[serde(default = "default_after_level_2")]
3910    pub after_level_2: f64,
3911    /// Rate that pays after level 3 final notice
3912    #[serde(default = "default_after_level_3")]
3913    pub after_level_3: f64,
3914    /// Rate that pays during collection
3915    #[serde(default = "default_during_collection")]
3916    pub during_collection: f64,
3917    /// Rate that never pays (becomes bad debt)
3918    #[serde(default = "default_never_pay")]
3919    pub never_pay: f64,
3920}
3921
3922fn default_after_level_1() -> f64 {
3923    0.40
3924}
3925
3926fn default_after_level_2() -> f64 {
3927    0.30
3928}
3929
3930fn default_after_level_3() -> f64 {
3931    0.15
3932}
3933
3934fn default_during_collection() -> f64 {
3935    0.05
3936}
3937
3938fn default_never_pay() -> f64 {
3939    0.10
3940}
3941
3942impl Default for DunningPaymentRates {
3943    fn default() -> Self {
3944        Self {
3945            after_level_1: default_after_level_1(),
3946            after_level_2: default_after_level_2(),
3947            after_level_3: default_after_level_3(),
3948            during_collection: default_during_collection(),
3949            never_pay: default_never_pay(),
3950        }
3951    }
3952}
3953
3954/// Partial payment configuration.
3955#[derive(Debug, Clone, Serialize, Deserialize)]
3956pub struct PartialPaymentConfig {
3957    /// Rate of invoices paid partially
3958    #[serde(default = "default_partial_payment_rate")]
3959    pub rate: f64,
3960    /// Distribution of partial payment percentages
3961    #[serde(default)]
3962    pub percentage_distribution: PartialPaymentPercentageDistribution,
3963    /// Average days until remainder is paid
3964    #[serde(default = "default_avg_days_until_remainder")]
3965    pub avg_days_until_remainder: u32,
3966}
3967
3968fn default_partial_payment_rate() -> f64 {
3969    0.08
3970}
3971
3972fn default_avg_days_until_remainder() -> u32 {
3973    30
3974}
3975
3976impl Default for PartialPaymentConfig {
3977    fn default() -> Self {
3978        Self {
3979            rate: default_partial_payment_rate(),
3980            percentage_distribution: PartialPaymentPercentageDistribution::default(),
3981            avg_days_until_remainder: default_avg_days_until_remainder(),
3982        }
3983    }
3984}
3985
3986/// Distribution of partial payment percentages.
3987#[derive(Debug, Clone, Serialize, Deserialize)]
3988pub struct PartialPaymentPercentageDistribution {
3989    /// Pay 25% of invoice
3990    #[serde(default = "default_partial_25")]
3991    pub pay_25_percent: f64,
3992    /// Pay 50% of invoice
3993    #[serde(default = "default_partial_50")]
3994    pub pay_50_percent: f64,
3995    /// Pay 75% of invoice
3996    #[serde(default = "default_partial_75")]
3997    pub pay_75_percent: f64,
3998    /// Pay random percentage
3999    #[serde(default = "default_partial_random")]
4000    pub pay_random_percent: f64,
4001}
4002
4003fn default_partial_25() -> f64 {
4004    0.15
4005}
4006
4007fn default_partial_50() -> f64 {
4008    0.50
4009}
4010
4011fn default_partial_75() -> f64 {
4012    0.25
4013}
4014
4015fn default_partial_random() -> f64 {
4016    0.10
4017}
4018
4019impl Default for PartialPaymentPercentageDistribution {
4020    fn default() -> Self {
4021        Self {
4022            pay_25_percent: default_partial_25(),
4023            pay_50_percent: default_partial_50(),
4024            pay_75_percent: default_partial_75(),
4025            pay_random_percent: default_partial_random(),
4026        }
4027    }
4028}
4029
4030/// Short payment configuration (unauthorized deductions).
4031#[derive(Debug, Clone, Serialize, Deserialize)]
4032pub struct ShortPaymentConfig {
4033    /// Rate of payments that are short
4034    #[serde(default = "default_short_payment_rate")]
4035    pub rate: f64,
4036    /// Distribution of short payment reasons
4037    #[serde(default)]
4038    pub reason_distribution: ShortPaymentReasonDistribution,
4039    /// Maximum percentage that can be short
4040    #[serde(default = "default_max_short_percent")]
4041    pub max_short_percent: f64,
4042}
4043
4044fn default_short_payment_rate() -> f64 {
4045    0.03
4046}
4047
4048fn default_max_short_percent() -> f64 {
4049    0.10
4050}
4051
4052impl Default for ShortPaymentConfig {
4053    fn default() -> Self {
4054        Self {
4055            rate: default_short_payment_rate(),
4056            reason_distribution: ShortPaymentReasonDistribution::default(),
4057            max_short_percent: default_max_short_percent(),
4058        }
4059    }
4060}
4061
4062/// Distribution of short payment reasons.
4063#[derive(Debug, Clone, Serialize, Deserialize)]
4064pub struct ShortPaymentReasonDistribution {
4065    /// Pricing dispute
4066    #[serde(default = "default_pricing_dispute")]
4067    pub pricing_dispute: f64,
4068    /// Quality issue
4069    #[serde(default = "default_quality_issue")]
4070    pub quality_issue: f64,
4071    /// Quantity discrepancy
4072    #[serde(default = "default_quantity_discrepancy")]
4073    pub quantity_discrepancy: f64,
4074    /// Unauthorized deduction
4075    #[serde(default = "default_unauthorized_deduction")]
4076    pub unauthorized_deduction: f64,
4077    /// Early payment discount taken incorrectly
4078    #[serde(default = "default_incorrect_discount")]
4079    pub incorrect_discount: f64,
4080}
4081
4082fn default_pricing_dispute() -> f64 {
4083    0.30
4084}
4085
4086fn default_quality_issue() -> f64 {
4087    0.20
4088}
4089
4090fn default_quantity_discrepancy() -> f64 {
4091    0.20
4092}
4093
4094fn default_unauthorized_deduction() -> f64 {
4095    0.15
4096}
4097
4098fn default_incorrect_discount() -> f64 {
4099    0.15
4100}
4101
4102impl Default for ShortPaymentReasonDistribution {
4103    fn default() -> Self {
4104        Self {
4105            pricing_dispute: default_pricing_dispute(),
4106            quality_issue: default_quality_issue(),
4107            quantity_discrepancy: default_quantity_discrepancy(),
4108            unauthorized_deduction: default_unauthorized_deduction(),
4109            incorrect_discount: default_incorrect_discount(),
4110        }
4111    }
4112}
4113
4114/// On-account payment configuration (unapplied payments).
4115#[derive(Debug, Clone, Serialize, Deserialize)]
4116pub struct OnAccountPaymentConfig {
4117    /// Rate of payments that are on-account (unapplied)
4118    #[serde(default = "default_on_account_rate")]
4119    pub rate: f64,
4120    /// Average days until on-account payments are applied
4121    #[serde(default = "default_avg_days_until_applied")]
4122    pub avg_days_until_applied: u32,
4123}
4124
4125fn default_on_account_rate() -> f64 {
4126    0.02
4127}
4128
4129fn default_avg_days_until_applied() -> u32 {
4130    14
4131}
4132
4133impl Default for OnAccountPaymentConfig {
4134    fn default() -> Self {
4135        Self {
4136            rate: default_on_account_rate(),
4137            avg_days_until_applied: default_avg_days_until_applied(),
4138        }
4139    }
4140}
4141
4142/// Payment correction configuration.
4143#[derive(Debug, Clone, Serialize, Deserialize)]
4144pub struct PaymentCorrectionConfig {
4145    /// Rate of payments requiring correction
4146    #[serde(default = "default_payment_correction_rate")]
4147    pub rate: f64,
4148    /// Distribution of correction types
4149    #[serde(default)]
4150    pub type_distribution: PaymentCorrectionTypeDistribution,
4151}
4152
4153fn default_payment_correction_rate() -> f64 {
4154    0.02
4155}
4156
4157impl Default for PaymentCorrectionConfig {
4158    fn default() -> Self {
4159        Self {
4160            rate: default_payment_correction_rate(),
4161            type_distribution: PaymentCorrectionTypeDistribution::default(),
4162        }
4163    }
4164}
4165
4166/// Distribution of payment correction types.
4167#[derive(Debug, Clone, Serialize, Deserialize)]
4168pub struct PaymentCorrectionTypeDistribution {
4169    /// NSF (Non-sufficient funds) / bounced check
4170    #[serde(default = "default_nsf_rate")]
4171    pub nsf: f64,
4172    /// Chargeback
4173    #[serde(default = "default_chargeback_rate")]
4174    pub chargeback: f64,
4175    /// Wrong amount applied
4176    #[serde(default = "default_wrong_amount_rate")]
4177    pub wrong_amount: f64,
4178    /// Wrong customer applied
4179    #[serde(default = "default_wrong_customer_rate")]
4180    pub wrong_customer: f64,
4181    /// Duplicate payment
4182    #[serde(default = "default_duplicate_payment_rate")]
4183    pub duplicate_payment: f64,
4184}
4185
4186fn default_nsf_rate() -> f64 {
4187    0.30
4188}
4189
4190fn default_chargeback_rate() -> f64 {
4191    0.20
4192}
4193
4194fn default_wrong_amount_rate() -> f64 {
4195    0.20
4196}
4197
4198fn default_wrong_customer_rate() -> f64 {
4199    0.15
4200}
4201
4202fn default_duplicate_payment_rate() -> f64 {
4203    0.15
4204}
4205
4206impl Default for PaymentCorrectionTypeDistribution {
4207    fn default() -> Self {
4208        Self {
4209            nsf: default_nsf_rate(),
4210            chargeback: default_chargeback_rate(),
4211            wrong_amount: default_wrong_amount_rate(),
4212            wrong_customer: default_wrong_customer_rate(),
4213            duplicate_payment: default_duplicate_payment_rate(),
4214        }
4215    }
4216}
4217
4218/// Document line count distribution.
4219#[derive(Debug, Clone, Serialize, Deserialize)]
4220pub struct DocumentLineCountDistribution {
4221    /// Minimum number of lines
4222    #[serde(default = "default_min_lines")]
4223    pub min_lines: u32,
4224    /// Maximum number of lines
4225    #[serde(default = "default_max_lines")]
4226    pub max_lines: u32,
4227    /// Most common line count (mode)
4228    #[serde(default = "default_mode_lines")]
4229    pub mode_lines: u32,
4230}
4231
4232fn default_min_lines() -> u32 {
4233    1
4234}
4235
4236fn default_max_lines() -> u32 {
4237    20
4238}
4239
4240fn default_mode_lines() -> u32 {
4241    3
4242}
4243
4244impl Default for DocumentLineCountDistribution {
4245    fn default() -> Self {
4246        Self {
4247            min_lines: default_min_lines(),
4248            max_lines: default_max_lines(),
4249            mode_lines: default_mode_lines(),
4250        }
4251    }
4252}
4253
4254/// Cash discount configuration.
4255#[derive(Debug, Clone, Serialize, Deserialize)]
4256pub struct CashDiscountConfig {
4257    /// Percentage of invoices eligible for cash discount
4258    #[serde(default = "default_discount_eligible_rate")]
4259    pub eligible_rate: f64,
4260    /// Rate at which customers take the discount
4261    #[serde(default = "default_discount_taken_rate")]
4262    pub taken_rate: f64,
4263    /// Standard discount percentage
4264    #[serde(default = "default_discount_percent")]
4265    pub discount_percent: f64,
4266    /// Days within which discount must be taken
4267    #[serde(default = "default_discount_days")]
4268    pub discount_days: u32,
4269}
4270
4271fn default_discount_eligible_rate() -> f64 {
4272    0.30
4273}
4274
4275fn default_discount_taken_rate() -> f64 {
4276    0.60
4277}
4278
4279fn default_discount_percent() -> f64 {
4280    0.02
4281}
4282
4283fn default_discount_days() -> u32 {
4284    10
4285}
4286
4287impl Default for CashDiscountConfig {
4288    fn default() -> Self {
4289        Self {
4290            eligible_rate: default_discount_eligible_rate(),
4291            taken_rate: default_discount_taken_rate(),
4292            discount_percent: default_discount_percent(),
4293            discount_days: default_discount_days(),
4294        }
4295    }
4296}
4297
4298// ============================================================================
4299// Intercompany Configuration
4300// ============================================================================
4301
4302/// Intercompany transaction configuration.
4303#[derive(Debug, Clone, Serialize, Deserialize)]
4304pub struct IntercompanyConfig {
4305    /// Enable intercompany transaction generation
4306    #[serde(default)]
4307    pub enabled: bool,
4308    /// Rate of transactions that are intercompany
4309    #[serde(default = "default_ic_transaction_rate")]
4310    pub ic_transaction_rate: f64,
4311    /// Transfer pricing method
4312    #[serde(default)]
4313    pub transfer_pricing_method: TransferPricingMethod,
4314    /// Transfer pricing markup percentage (for cost-plus)
4315    #[serde(default = "default_markup_percent")]
4316    pub markup_percent: f64,
4317    /// Generate matched IC pairs (offsetting entries)
4318    #[serde(default = "default_true")]
4319    pub generate_matched_pairs: bool,
4320    /// IC transaction type distribution
4321    #[serde(default)]
4322    pub transaction_type_distribution: ICTransactionTypeDistribution,
4323    /// Generate elimination entries for consolidation
4324    #[serde(default)]
4325    pub generate_eliminations: bool,
4326}
4327
4328fn default_ic_transaction_rate() -> f64 {
4329    0.15
4330}
4331
4332fn default_markup_percent() -> f64 {
4333    0.05
4334}
4335
4336impl Default for IntercompanyConfig {
4337    fn default() -> Self {
4338        Self {
4339            enabled: false,
4340            ic_transaction_rate: default_ic_transaction_rate(),
4341            transfer_pricing_method: TransferPricingMethod::default(),
4342            markup_percent: default_markup_percent(),
4343            generate_matched_pairs: true,
4344            transaction_type_distribution: ICTransactionTypeDistribution::default(),
4345            generate_eliminations: false,
4346        }
4347    }
4348}
4349
4350/// Transfer pricing method.
4351#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
4352#[serde(rename_all = "snake_case")]
4353pub enum TransferPricingMethod {
4354    /// Cost plus a markup
4355    #[default]
4356    CostPlus,
4357    /// Comparable uncontrolled price
4358    ComparableUncontrolled,
4359    /// Resale price method
4360    ResalePrice,
4361    /// Transactional net margin method
4362    TransactionalNetMargin,
4363    /// Profit split method
4364    ProfitSplit,
4365}
4366
4367/// IC transaction type distribution.
4368#[derive(Debug, Clone, Serialize, Deserialize)]
4369pub struct ICTransactionTypeDistribution {
4370    /// Goods sales between entities
4371    pub goods_sale: f64,
4372    /// Services provided
4373    pub service_provided: f64,
4374    /// Intercompany loans
4375    pub loan: f64,
4376    /// Dividends
4377    pub dividend: f64,
4378    /// Management fees
4379    pub management_fee: f64,
4380    /// Royalties
4381    pub royalty: f64,
4382    /// Cost sharing
4383    pub cost_sharing: f64,
4384}
4385
4386impl Default for ICTransactionTypeDistribution {
4387    fn default() -> Self {
4388        Self {
4389            goods_sale: 0.35,
4390            service_provided: 0.20,
4391            loan: 0.10,
4392            dividend: 0.05,
4393            management_fee: 0.15,
4394            royalty: 0.10,
4395            cost_sharing: 0.05,
4396        }
4397    }
4398}
4399
4400// ============================================================================
4401// Balance Configuration
4402// ============================================================================
4403
4404/// Balance and trial balance configuration.
4405#[derive(Debug, Clone, Serialize, Deserialize)]
4406pub struct BalanceConfig {
4407    /// Generate opening balances
4408    #[serde(default)]
4409    pub generate_opening_balances: bool,
4410    /// Generate trial balances
4411    #[serde(default = "default_true")]
4412    pub generate_trial_balances: bool,
4413    /// Target gross margin (for revenue/COGS coherence)
4414    #[serde(default = "default_gross_margin")]
4415    pub target_gross_margin: f64,
4416    /// Target DSO (Days Sales Outstanding)
4417    #[serde(default = "default_dso")]
4418    pub target_dso_days: u32,
4419    /// Target DPO (Days Payable Outstanding)
4420    #[serde(default = "default_dpo")]
4421    pub target_dpo_days: u32,
4422    /// Target current ratio
4423    #[serde(default = "default_current_ratio")]
4424    pub target_current_ratio: f64,
4425    /// Target debt-to-equity ratio
4426    #[serde(default = "default_debt_equity")]
4427    pub target_debt_to_equity: f64,
4428    /// Validate balance sheet equation (A = L + E)
4429    #[serde(default = "default_true")]
4430    pub validate_balance_equation: bool,
4431    /// Reconcile subledgers to GL control accounts
4432    #[serde(default = "default_true")]
4433    pub reconcile_subledgers: bool,
4434}
4435
4436fn default_gross_margin() -> f64 {
4437    0.35
4438}
4439
4440fn default_dso() -> u32 {
4441    45
4442}
4443
4444fn default_dpo() -> u32 {
4445    30
4446}
4447
4448fn default_current_ratio() -> f64 {
4449    1.5
4450}
4451
4452fn default_debt_equity() -> f64 {
4453    0.5
4454}
4455
4456impl Default for BalanceConfig {
4457    fn default() -> Self {
4458        Self {
4459            generate_opening_balances: false,
4460            generate_trial_balances: true,
4461            target_gross_margin: default_gross_margin(),
4462            target_dso_days: default_dso(),
4463            target_dpo_days: default_dpo(),
4464            target_current_ratio: default_current_ratio(),
4465            target_debt_to_equity: default_debt_equity(),
4466            validate_balance_equation: true,
4467            reconcile_subledgers: true,
4468        }
4469    }
4470}
4471
4472// ==========================================================================
4473// OCPM (Object-Centric Process Mining) Configuration
4474// ==========================================================================
4475
4476/// OCPM (Object-Centric Process Mining) configuration.
4477///
4478/// Controls generation of OCEL 2.0 compatible event logs with
4479/// many-to-many event-to-object relationships.
4480#[derive(Debug, Clone, Serialize, Deserialize)]
4481pub struct OcpmConfig {
4482    /// Enable OCPM event log generation
4483    #[serde(default)]
4484    pub enabled: bool,
4485
4486    /// Generate lifecycle events (Start/Complete pairs vs atomic events)
4487    #[serde(default = "default_true")]
4488    pub generate_lifecycle_events: bool,
4489
4490    /// Include object-to-object relationships in output
4491    #[serde(default = "default_true")]
4492    pub include_object_relationships: bool,
4493
4494    /// Compute and export process variants
4495    #[serde(default = "default_true")]
4496    pub compute_variants: bool,
4497
4498    /// Maximum variants to track (0 = unlimited)
4499    #[serde(default)]
4500    pub max_variants: usize,
4501
4502    /// P2P process configuration
4503    #[serde(default)]
4504    pub p2p_process: OcpmProcessConfig,
4505
4506    /// O2C process configuration
4507    #[serde(default)]
4508    pub o2c_process: OcpmProcessConfig,
4509
4510    /// Output format configuration
4511    #[serde(default)]
4512    pub output: OcpmOutputConfig,
4513}
4514
4515impl Default for OcpmConfig {
4516    fn default() -> Self {
4517        Self {
4518            enabled: false,
4519            generate_lifecycle_events: true,
4520            include_object_relationships: true,
4521            compute_variants: true,
4522            max_variants: 0,
4523            p2p_process: OcpmProcessConfig::default(),
4524            o2c_process: OcpmProcessConfig::default(),
4525            output: OcpmOutputConfig::default(),
4526        }
4527    }
4528}
4529
4530/// Process-specific OCPM configuration.
4531#[derive(Debug, Clone, Serialize, Deserialize)]
4532pub struct OcpmProcessConfig {
4533    /// Rework probability (0.0-1.0)
4534    #[serde(default = "default_rework_probability")]
4535    pub rework_probability: f64,
4536
4537    /// Skip step probability (0.0-1.0)
4538    #[serde(default = "default_skip_probability")]
4539    pub skip_step_probability: f64,
4540
4541    /// Out-of-order step probability (0.0-1.0)
4542    #[serde(default = "default_out_of_order_probability")]
4543    pub out_of_order_probability: f64,
4544}
4545
4546// Defaults deliberately produce variant counts and Inductive-Miner fitness
4547// in the range seen in real ERP data (dozens of variants, ~0.7–0.9 fitness).
4548// Lowering them all to 0 yields a single-variant happy-path log.
4549fn default_rework_probability() -> f64 {
4550    0.15
4551}
4552
4553fn default_skip_probability() -> f64 {
4554    0.10
4555}
4556
4557fn default_out_of_order_probability() -> f64 {
4558    0.08
4559}
4560
4561impl Default for OcpmProcessConfig {
4562    fn default() -> Self {
4563        Self {
4564            rework_probability: default_rework_probability(),
4565            skip_step_probability: default_skip_probability(),
4566            out_of_order_probability: default_out_of_order_probability(),
4567        }
4568    }
4569}
4570
4571/// OCPM output format configuration.
4572#[derive(Debug, Clone, Serialize, Deserialize)]
4573pub struct OcpmOutputConfig {
4574    /// Export OCEL 2.0 JSON format
4575    #[serde(default = "default_true")]
4576    pub ocel_json: bool,
4577
4578    /// Export OCEL 2.0 XML format
4579    #[serde(default)]
4580    pub ocel_xml: bool,
4581
4582    /// Export XES 2.0 XML format (IEEE standard for process mining tools)
4583    #[serde(default)]
4584    pub xes: bool,
4585
4586    /// Include lifecycle transitions in XES output (start/complete pairs)
4587    #[serde(default = "default_true")]
4588    pub xes_include_lifecycle: bool,
4589
4590    /// Include resource attributes in XES output
4591    #[serde(default = "default_true")]
4592    pub xes_include_resources: bool,
4593
4594    /// Export flattened CSV for each object type
4595    #[serde(default = "default_true")]
4596    pub flattened_csv: bool,
4597
4598    /// Export event-object relationship table
4599    #[serde(default = "default_true")]
4600    pub event_object_csv: bool,
4601
4602    /// Export object-object relationship table
4603    #[serde(default = "default_true")]
4604    pub object_relationship_csv: bool,
4605
4606    /// Export process variants summary
4607    #[serde(default = "default_true")]
4608    pub variants_csv: bool,
4609
4610    /// Export reference process models (canonical P2P, O2C, R2R)
4611    #[serde(default)]
4612    pub export_reference_models: bool,
4613}
4614
4615impl Default for OcpmOutputConfig {
4616    fn default() -> Self {
4617        Self {
4618            ocel_json: true,
4619            ocel_xml: false,
4620            xes: false,
4621            xes_include_lifecycle: true,
4622            xes_include_resources: true,
4623            flattened_csv: true,
4624            event_object_csv: true,
4625            object_relationship_csv: true,
4626            variants_csv: true,
4627            export_reference_models: false,
4628        }
4629    }
4630}
4631
4632/// Audit engagement and workpaper generation configuration.
4633#[derive(Debug, Clone, Serialize, Deserialize)]
4634pub struct AuditGenerationConfig {
4635    /// Enable audit engagement generation
4636    #[serde(default)]
4637    pub enabled: bool,
4638
4639    /// Gate for workpaper generation (v3.3.2+).
4640    /// When `false`, workpapers and dependent evidence are skipped
4641    /// while engagements / risk assessments / findings still generate.
4642    #[serde(default = "default_true")]
4643    pub generate_workpapers: bool,
4644
4645    /// Engagement type distribution (v3.3.2+). Drives per-engagement
4646    /// type draw via `AuditEngagementGenerator::draw_engagement_type`.
4647    #[serde(default)]
4648    pub engagement_types: AuditEngagementTypesConfig,
4649
4650    /// Workpaper configuration (v3.3.2+). `average_per_phase` maps onto
4651    /// `WorkpaperGenerator.workpapers_per_section` as a ±50% band
4652    /// around the average. Sampling / ISA / cross-reference flags are
4653    /// surfaced for downstream formatting overlays.
4654    #[serde(default)]
4655    pub workpapers: WorkpaperConfig,
4656
4657    /// Audit team configuration (v3.3.2+). `min_team_size` /
4658    /// `max_team_size` map directly onto
4659    /// `AuditEngagementGenerator.team_size_range`.
4660    /// `specialist_probability` is reserved for v3.4 (explicit
4661    /// specialist-role support).
4662    #[serde(default)]
4663    pub team: AuditTeamConfig,
4664
4665    /// Review workflow configuration (v3.3.2+).
4666    /// `average_review_delay_days` drives both
4667    /// `first_review_delay_range` and `second_review_delay_range` as
4668    /// a ±1-day band around the average. `rework_probability` and
4669    /// `require_partner_signoff` are reserved for v3.4 workflow
4670    /// modeling.
4671    #[serde(default)]
4672    pub review: ReviewWorkflowConfig,
4673
4674    /// FSM-driven audit generation configuration.
4675    #[serde(default)]
4676    pub fsm: Option<AuditFsmConfig>,
4677
4678    /// v3.3.0: IT general controls (access logs, change management
4679    /// records) emitted alongside audit engagements. Requires both
4680    /// `audit.enabled = true` and `audit.it_controls.enabled = true`
4681    /// to take effect — the latter defaults to `false` so current
4682    /// archives are byte-identical to v3.2.1.
4683    #[serde(default)]
4684    pub it_controls: ItControlsConfig,
4685}
4686
4687/// IT general controls config (v3.3.0+).
4688#[derive(Debug, Clone, Serialize, Deserialize)]
4689pub struct ItControlsConfig {
4690    /// Master switch — when `false`, no access logs or change records
4691    /// are generated.
4692    #[serde(default)]
4693    pub enabled: bool,
4694    /// Number of access-log entries per engagement (approximate — the
4695    /// generator may round or scale based on company size).
4696    #[serde(default = "default_access_log_count")]
4697    pub access_logs_per_engagement: usize,
4698    /// Number of change-management records per engagement.
4699    #[serde(default = "default_change_record_count")]
4700    pub change_records_per_engagement: usize,
4701}
4702
4703fn default_access_log_count() -> usize {
4704    500
4705}
4706fn default_change_record_count() -> usize {
4707    50
4708}
4709
4710impl Default for ItControlsConfig {
4711    fn default() -> Self {
4712        Self {
4713            enabled: false,
4714            access_logs_per_engagement: default_access_log_count(),
4715            change_records_per_engagement: default_change_record_count(),
4716        }
4717    }
4718}
4719
4720impl Default for AuditGenerationConfig {
4721    fn default() -> Self {
4722        Self {
4723            enabled: false,
4724            generate_workpapers: true,
4725            engagement_types: AuditEngagementTypesConfig::default(),
4726            workpapers: WorkpaperConfig::default(),
4727            team: AuditTeamConfig::default(),
4728            review: ReviewWorkflowConfig::default(),
4729            fsm: None,
4730            it_controls: ItControlsConfig::default(),
4731        }
4732    }
4733}
4734
4735/// FSM-driven audit generation configuration.
4736#[derive(Debug, Clone, Serialize, Deserialize)]
4737pub struct AuditFsmConfig {
4738    /// Enable FSM-driven audit generation.
4739    #[serde(default)]
4740    pub enabled: bool,
4741
4742    /// Blueprint source: "builtin:fsa", "builtin:ia", or a file path.
4743    #[serde(default = "default_audit_fsm_blueprint")]
4744    pub blueprint: String,
4745
4746    /// Overlay source: "builtin:default", "builtin:thorough", "builtin:rushed", or a file path.
4747    #[serde(default = "default_audit_fsm_overlay")]
4748    pub overlay: String,
4749
4750    /// Depth level override.
4751    #[serde(default)]
4752    pub depth: Option<String>,
4753
4754    /// Discriminator filter.
4755    #[serde(default)]
4756    pub discriminators: std::collections::HashMap<String, Vec<String>>,
4757
4758    /// Event trail output config.
4759    #[serde(default)]
4760    pub event_trail: AuditEventTrailConfig,
4761
4762    /// RNG seed override.
4763    #[serde(default)]
4764    pub seed: Option<u64>,
4765}
4766
4767impl Default for AuditFsmConfig {
4768    fn default() -> Self {
4769        Self {
4770            enabled: false,
4771            blueprint: default_audit_fsm_blueprint(),
4772            overlay: default_audit_fsm_overlay(),
4773            depth: None,
4774            discriminators: std::collections::HashMap::new(),
4775            event_trail: AuditEventTrailConfig::default(),
4776            seed: None,
4777        }
4778    }
4779}
4780
4781fn default_audit_fsm_blueprint() -> String {
4782    "builtin:fsa".to_string()
4783}
4784
4785fn default_audit_fsm_overlay() -> String {
4786    "builtin:default".to_string()
4787}
4788
4789/// Event trail output configuration for FSM-driven audit generation.
4790#[derive(Debug, Clone, Serialize, Deserialize)]
4791pub struct AuditEventTrailConfig {
4792    /// Emit a flat event log.
4793    #[serde(default = "default_true")]
4794    pub flat_log: bool,
4795    /// Project events to OCEL 2.0 format.
4796    #[serde(default)]
4797    pub ocel_projection: bool,
4798}
4799
4800impl Default for AuditEventTrailConfig {
4801    fn default() -> Self {
4802        Self {
4803            flat_log: true,
4804            ocel_projection: false,
4805        }
4806    }
4807}
4808
4809/// Engagement type distribution configuration.
4810#[derive(Debug, Clone, Serialize, Deserialize)]
4811pub struct AuditEngagementTypesConfig {
4812    /// Financial statement audit probability
4813    #[serde(default = "default_financial_audit_prob")]
4814    pub financial_statement: f64,
4815    /// SOX/ICFR audit probability
4816    #[serde(default = "default_sox_audit_prob")]
4817    pub sox_icfr: f64,
4818    /// Integrated audit probability
4819    #[serde(default = "default_integrated_audit_prob")]
4820    pub integrated: f64,
4821    /// Review engagement probability
4822    #[serde(default = "default_review_prob")]
4823    pub review: f64,
4824    /// Agreed-upon procedures probability
4825    #[serde(default = "default_aup_prob")]
4826    pub agreed_upon_procedures: f64,
4827}
4828
4829fn default_financial_audit_prob() -> f64 {
4830    0.40
4831}
4832fn default_sox_audit_prob() -> f64 {
4833    0.20
4834}
4835fn default_integrated_audit_prob() -> f64 {
4836    0.25
4837}
4838fn default_review_prob() -> f64 {
4839    0.10
4840}
4841fn default_aup_prob() -> f64 {
4842    0.05
4843}
4844
4845impl Default for AuditEngagementTypesConfig {
4846    fn default() -> Self {
4847        Self {
4848            financial_statement: default_financial_audit_prob(),
4849            sox_icfr: default_sox_audit_prob(),
4850            integrated: default_integrated_audit_prob(),
4851            review: default_review_prob(),
4852            agreed_upon_procedures: default_aup_prob(),
4853        }
4854    }
4855}
4856
4857/// Workpaper generation configuration.
4858#[derive(Debug, Clone, Serialize, Deserialize)]
4859pub struct WorkpaperConfig {
4860    /// Average workpapers per engagement phase
4861    #[serde(default = "default_workpapers_per_phase")]
4862    pub average_per_phase: usize,
4863
4864    /// Include ISA compliance references
4865    #[serde(default = "default_true")]
4866    pub include_isa_references: bool,
4867
4868    /// Generate sample details
4869    #[serde(default = "default_true")]
4870    pub include_sample_details: bool,
4871
4872    /// Include cross-references between workpapers
4873    #[serde(default = "default_true")]
4874    pub include_cross_references: bool,
4875
4876    /// Sampling configuration
4877    #[serde(default)]
4878    pub sampling: SamplingConfig,
4879}
4880
4881fn default_workpapers_per_phase() -> usize {
4882    5
4883}
4884
4885impl Default for WorkpaperConfig {
4886    fn default() -> Self {
4887        Self {
4888            average_per_phase: default_workpapers_per_phase(),
4889            include_isa_references: true,
4890            include_sample_details: true,
4891            include_cross_references: true,
4892            sampling: SamplingConfig::default(),
4893        }
4894    }
4895}
4896
4897/// Sampling method configuration.
4898#[derive(Debug, Clone, Serialize, Deserialize)]
4899pub struct SamplingConfig {
4900    /// Statistical sampling rate (0.0-1.0)
4901    #[serde(default = "default_statistical_rate")]
4902    pub statistical_rate: f64,
4903    /// Judgmental sampling rate (0.0-1.0)
4904    #[serde(default = "default_judgmental_rate")]
4905    pub judgmental_rate: f64,
4906    /// Haphazard sampling rate (0.0-1.0)
4907    #[serde(default = "default_haphazard_rate")]
4908    pub haphazard_rate: f64,
4909    /// 100% examination rate (0.0-1.0)
4910    #[serde(default = "default_complete_examination_rate")]
4911    pub complete_examination_rate: f64,
4912}
4913
4914fn default_statistical_rate() -> f64 {
4915    0.40
4916}
4917fn default_judgmental_rate() -> f64 {
4918    0.30
4919}
4920fn default_haphazard_rate() -> f64 {
4921    0.20
4922}
4923fn default_complete_examination_rate() -> f64 {
4924    0.10
4925}
4926
4927impl Default for SamplingConfig {
4928    fn default() -> Self {
4929        Self {
4930            statistical_rate: default_statistical_rate(),
4931            judgmental_rate: default_judgmental_rate(),
4932            haphazard_rate: default_haphazard_rate(),
4933            complete_examination_rate: default_complete_examination_rate(),
4934        }
4935    }
4936}
4937
4938/// Audit team configuration.
4939#[derive(Debug, Clone, Serialize, Deserialize)]
4940pub struct AuditTeamConfig {
4941    /// Minimum team size
4942    #[serde(default = "default_min_team_size")]
4943    pub min_team_size: usize,
4944    /// Maximum team size
4945    #[serde(default = "default_max_team_size")]
4946    pub max_team_size: usize,
4947    /// Probability of having a specialist on the team
4948    #[serde(default = "default_specialist_probability")]
4949    pub specialist_probability: f64,
4950}
4951
4952fn default_min_team_size() -> usize {
4953    3
4954}
4955fn default_max_team_size() -> usize {
4956    8
4957}
4958fn default_specialist_probability() -> f64 {
4959    0.30
4960}
4961
4962impl Default for AuditTeamConfig {
4963    fn default() -> Self {
4964        Self {
4965            min_team_size: default_min_team_size(),
4966            max_team_size: default_max_team_size(),
4967            specialist_probability: default_specialist_probability(),
4968        }
4969    }
4970}
4971
4972/// Review workflow configuration.
4973#[derive(Debug, Clone, Serialize, Deserialize)]
4974pub struct ReviewWorkflowConfig {
4975    /// Average days between preparer completion and first review
4976    #[serde(default = "default_review_delay_days")]
4977    pub average_review_delay_days: u32,
4978    /// Probability of review notes requiring rework
4979    #[serde(default = "default_rework_probability_review")]
4980    pub rework_probability: f64,
4981    /// Require partner sign-off for all workpapers
4982    #[serde(default = "default_true")]
4983    pub require_partner_signoff: bool,
4984}
4985
4986fn default_review_delay_days() -> u32 {
4987    2
4988}
4989fn default_rework_probability_review() -> f64 {
4990    0.15
4991}
4992
4993impl Default for ReviewWorkflowConfig {
4994    fn default() -> Self {
4995        Self {
4996            average_review_delay_days: default_review_delay_days(),
4997            rework_probability: default_rework_probability_review(),
4998            require_partner_signoff: true,
4999        }
5000    }
5001}
5002
5003// =============================================================================
5004// Data Quality Configuration
5005// =============================================================================
5006
5007/// Data quality variation settings for realistic flakiness injection.
5008#[derive(Debug, Clone, Serialize, Deserialize)]
5009pub struct DataQualitySchemaConfig {
5010    /// Enable data quality variations
5011    #[serde(default)]
5012    pub enabled: bool,
5013    /// Preset to use (overrides individual settings if set)
5014    #[serde(default)]
5015    pub preset: DataQualityPreset,
5016    /// Missing value injection settings
5017    #[serde(default)]
5018    pub missing_values: MissingValuesSchemaConfig,
5019    /// Typo injection settings
5020    #[serde(default)]
5021    pub typos: TypoSchemaConfig,
5022    /// Format variation settings
5023    #[serde(default)]
5024    pub format_variations: FormatVariationSchemaConfig,
5025    /// Duplicate injection settings
5026    #[serde(default)]
5027    pub duplicates: DuplicateSchemaConfig,
5028    /// Encoding issue settings
5029    #[serde(default)]
5030    pub encoding_issues: EncodingIssueSchemaConfig,
5031    /// Generate quality issue labels for ML training
5032    #[serde(default)]
5033    pub generate_labels: bool,
5034    /// Per-sink quality profiles (different settings for CSV vs JSON etc.)
5035    #[serde(default)]
5036    pub sink_profiles: SinkQualityProfiles,
5037}
5038
5039impl Default for DataQualitySchemaConfig {
5040    fn default() -> Self {
5041        Self {
5042            enabled: false,
5043            preset: DataQualityPreset::None,
5044            missing_values: MissingValuesSchemaConfig::default(),
5045            typos: TypoSchemaConfig::default(),
5046            format_variations: FormatVariationSchemaConfig::default(),
5047            duplicates: DuplicateSchemaConfig::default(),
5048            encoding_issues: EncodingIssueSchemaConfig::default(),
5049            generate_labels: true,
5050            sink_profiles: SinkQualityProfiles::default(),
5051        }
5052    }
5053}
5054
5055impl DataQualitySchemaConfig {
5056    /// Creates a config for a specific preset profile.
5057    pub fn with_preset(preset: DataQualityPreset) -> Self {
5058        let mut config = Self {
5059            preset,
5060            ..Default::default()
5061        };
5062        config.apply_preset();
5063        config
5064    }
5065
5066    /// Applies the preset settings to the individual configuration fields.
5067    /// Call this after deserializing if preset is not Custom or None.
5068    pub fn apply_preset(&mut self) {
5069        if !self.preset.overrides_settings() {
5070            return;
5071        }
5072
5073        self.enabled = true;
5074
5075        // Missing values
5076        self.missing_values.enabled = self.preset.missing_rate() > 0.0;
5077        self.missing_values.rate = self.preset.missing_rate();
5078
5079        // Typos
5080        self.typos.enabled = self.preset.typo_rate() > 0.0;
5081        self.typos.char_error_rate = self.preset.typo_rate();
5082
5083        // Duplicates
5084        self.duplicates.enabled = self.preset.duplicate_rate() > 0.0;
5085        self.duplicates.exact_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
5086        self.duplicates.near_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
5087        self.duplicates.fuzzy_duplicate_ratio = self.preset.duplicate_rate() * 0.2;
5088
5089        // Format variations
5090        self.format_variations.enabled = self.preset.format_variations_enabled();
5091
5092        // Encoding issues
5093        self.encoding_issues.enabled = self.preset.encoding_issues_enabled();
5094        self.encoding_issues.rate = self.preset.encoding_issue_rate();
5095
5096        // OCR errors for typos in legacy preset
5097        if self.preset.ocr_errors_enabled() {
5098            self.typos.type_weights.ocr_errors = 0.3;
5099        }
5100    }
5101
5102    /// Returns the effective missing value rate (considering preset).
5103    pub fn effective_missing_rate(&self) -> f64 {
5104        if self.preset.overrides_settings() {
5105            self.preset.missing_rate()
5106        } else {
5107            self.missing_values.rate
5108        }
5109    }
5110
5111    /// Returns the effective typo rate (considering preset).
5112    pub fn effective_typo_rate(&self) -> f64 {
5113        if self.preset.overrides_settings() {
5114            self.preset.typo_rate()
5115        } else {
5116            self.typos.char_error_rate
5117        }
5118    }
5119
5120    /// Returns the effective duplicate rate (considering preset).
5121    pub fn effective_duplicate_rate(&self) -> f64 {
5122        if self.preset.overrides_settings() {
5123            self.preset.duplicate_rate()
5124        } else {
5125            self.duplicates.exact_duplicate_ratio
5126                + self.duplicates.near_duplicate_ratio
5127                + self.duplicates.fuzzy_duplicate_ratio
5128        }
5129    }
5130
5131    /// Creates a clean profile config.
5132    pub fn clean() -> Self {
5133        Self::with_preset(DataQualityPreset::Clean)
5134    }
5135
5136    /// Creates a noisy profile config.
5137    pub fn noisy() -> Self {
5138        Self::with_preset(DataQualityPreset::Noisy)
5139    }
5140
5141    /// Creates a legacy profile config.
5142    pub fn legacy() -> Self {
5143        Self::with_preset(DataQualityPreset::Legacy)
5144    }
5145}
5146
5147/// Preset configurations for common data quality scenarios.
5148#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5149#[serde(rename_all = "snake_case")]
5150pub enum DataQualityPreset {
5151    /// No data quality variations (clean data)
5152    #[default]
5153    None,
5154    /// Minimal variations (very clean data with rare issues)
5155    Minimal,
5156    /// Normal variations (realistic enterprise data quality)
5157    Normal,
5158    /// High variations (messy data for stress testing)
5159    High,
5160    /// Custom (use individual settings)
5161    Custom,
5162
5163    // ========================================
5164    // ML-Oriented Profiles (Phase 2.1)
5165    // ========================================
5166    /// Clean profile for ML training - minimal data quality issues
5167    /// Missing: 0.1%, Typos: 0.05%, Duplicates: 0%, Format: None
5168    Clean,
5169    /// Noisy profile simulating typical production data issues
5170    /// Missing: 5%, Typos: 2%, Duplicates: 1%, Format: Medium
5171    Noisy,
5172    /// Legacy profile simulating migrated/OCR'd historical data
5173    /// Missing: 10%, Typos: 5%, Duplicates: 3%, Format: Heavy + OCR
5174    Legacy,
5175}
5176
5177impl DataQualityPreset {
5178    /// Returns the missing value rate for this preset.
5179    pub fn missing_rate(&self) -> f64 {
5180        match self {
5181            DataQualityPreset::None => 0.0,
5182            DataQualityPreset::Minimal => 0.005,
5183            DataQualityPreset::Normal => 0.02,
5184            DataQualityPreset::High => 0.08,
5185            DataQualityPreset::Custom => 0.01, // Use config value
5186            DataQualityPreset::Clean => 0.001,
5187            DataQualityPreset::Noisy => 0.05,
5188            DataQualityPreset::Legacy => 0.10,
5189        }
5190    }
5191
5192    /// Returns the typo rate for this preset.
5193    pub fn typo_rate(&self) -> f64 {
5194        match self {
5195            DataQualityPreset::None => 0.0,
5196            DataQualityPreset::Minimal => 0.0005,
5197            DataQualityPreset::Normal => 0.002,
5198            DataQualityPreset::High => 0.01,
5199            DataQualityPreset::Custom => 0.001, // Use config value
5200            DataQualityPreset::Clean => 0.0005,
5201            DataQualityPreset::Noisy => 0.02,
5202            DataQualityPreset::Legacy => 0.05,
5203        }
5204    }
5205
5206    /// Returns the duplicate rate for this preset.
5207    pub fn duplicate_rate(&self) -> f64 {
5208        match self {
5209            DataQualityPreset::None => 0.0,
5210            DataQualityPreset::Minimal => 0.001,
5211            DataQualityPreset::Normal => 0.005,
5212            DataQualityPreset::High => 0.02,
5213            DataQualityPreset::Custom => 0.0, // Use config value
5214            DataQualityPreset::Clean => 0.0,
5215            DataQualityPreset::Noisy => 0.01,
5216            DataQualityPreset::Legacy => 0.03,
5217        }
5218    }
5219
5220    /// Returns whether format variations are enabled for this preset.
5221    pub fn format_variations_enabled(&self) -> bool {
5222        match self {
5223            DataQualityPreset::None | DataQualityPreset::Clean => false,
5224            DataQualityPreset::Minimal => true,
5225            DataQualityPreset::Normal => true,
5226            DataQualityPreset::High => true,
5227            DataQualityPreset::Custom => true,
5228            DataQualityPreset::Noisy => true,
5229            DataQualityPreset::Legacy => true,
5230        }
5231    }
5232
5233    /// Returns whether OCR-style errors are enabled for this preset.
5234    pub fn ocr_errors_enabled(&self) -> bool {
5235        matches!(self, DataQualityPreset::Legacy | DataQualityPreset::High)
5236    }
5237
5238    /// Returns whether encoding issues are enabled for this preset.
5239    pub fn encoding_issues_enabled(&self) -> bool {
5240        matches!(
5241            self,
5242            DataQualityPreset::Legacy | DataQualityPreset::High | DataQualityPreset::Noisy
5243        )
5244    }
5245
5246    /// Returns the encoding issue rate for this preset.
5247    pub fn encoding_issue_rate(&self) -> f64 {
5248        match self {
5249            DataQualityPreset::None | DataQualityPreset::Clean | DataQualityPreset::Minimal => 0.0,
5250            DataQualityPreset::Normal => 0.002,
5251            DataQualityPreset::High => 0.01,
5252            DataQualityPreset::Custom => 0.0,
5253            DataQualityPreset::Noisy => 0.005,
5254            DataQualityPreset::Legacy => 0.02,
5255        }
5256    }
5257
5258    /// Returns true if this preset overrides individual settings.
5259    pub fn overrides_settings(&self) -> bool {
5260        !matches!(self, DataQualityPreset::Custom | DataQualityPreset::None)
5261    }
5262
5263    /// Returns a human-readable description of this preset.
5264    pub fn description(&self) -> &'static str {
5265        match self {
5266            DataQualityPreset::None => "No data quality issues (pristine data)",
5267            DataQualityPreset::Minimal => "Very rare data quality issues",
5268            DataQualityPreset::Normal => "Realistic enterprise data quality",
5269            DataQualityPreset::High => "Messy data for stress testing",
5270            DataQualityPreset::Custom => "Custom settings from configuration",
5271            DataQualityPreset::Clean => "ML-ready clean data with minimal issues",
5272            DataQualityPreset::Noisy => "Typical production data with moderate issues",
5273            DataQualityPreset::Legacy => "Legacy/migrated data with heavy issues and OCR errors",
5274        }
5275    }
5276}
5277
5278/// Missing value injection configuration.
5279#[derive(Debug, Clone, Serialize, Deserialize)]
5280pub struct MissingValuesSchemaConfig {
5281    /// Enable missing value injection
5282    #[serde(default)]
5283    pub enabled: bool,
5284    /// Global missing rate (0.0 to 1.0)
5285    #[serde(default = "default_missing_rate")]
5286    pub rate: f64,
5287    /// Missing value strategy
5288    #[serde(default)]
5289    pub strategy: MissingValueStrategy,
5290    /// Field-specific rates (field name -> rate)
5291    #[serde(default)]
5292    pub field_rates: std::collections::HashMap<String, f64>,
5293    /// Fields that should never have missing values
5294    #[serde(default)]
5295    pub protected_fields: Vec<String>,
5296}
5297
5298fn default_missing_rate() -> f64 {
5299    0.01
5300}
5301
5302impl Default for MissingValuesSchemaConfig {
5303    fn default() -> Self {
5304        Self {
5305            enabled: false,
5306            rate: default_missing_rate(),
5307            strategy: MissingValueStrategy::Mcar,
5308            field_rates: std::collections::HashMap::new(),
5309            protected_fields: vec![
5310                "document_id".to_string(),
5311                "company_code".to_string(),
5312                "posting_date".to_string(),
5313            ],
5314        }
5315    }
5316}
5317
5318/// Missing value strategy types.
5319#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5320#[serde(rename_all = "snake_case")]
5321pub enum MissingValueStrategy {
5322    /// Missing Completely At Random - equal probability for all values
5323    #[default]
5324    Mcar,
5325    /// Missing At Random - depends on other observed values
5326    Mar,
5327    /// Missing Not At Random - depends on the value itself
5328    Mnar,
5329    /// Systematic - entire field groups missing together
5330    Systematic,
5331}
5332
5333/// Typo injection configuration.
5334#[derive(Debug, Clone, Serialize, Deserialize)]
5335pub struct TypoSchemaConfig {
5336    /// Enable typo injection
5337    #[serde(default)]
5338    pub enabled: bool,
5339    /// Character error rate (per character, not per field)
5340    #[serde(default = "default_typo_rate")]
5341    pub char_error_rate: f64,
5342    /// Typo type weights
5343    #[serde(default)]
5344    pub type_weights: TypoTypeWeights,
5345    /// Fields that should never have typos
5346    #[serde(default)]
5347    pub protected_fields: Vec<String>,
5348}
5349
5350fn default_typo_rate() -> f64 {
5351    0.001
5352}
5353
5354impl Default for TypoSchemaConfig {
5355    fn default() -> Self {
5356        Self {
5357            enabled: false,
5358            char_error_rate: default_typo_rate(),
5359            type_weights: TypoTypeWeights::default(),
5360            protected_fields: vec![
5361                "document_id".to_string(),
5362                "gl_account".to_string(),
5363                "company_code".to_string(),
5364            ],
5365        }
5366    }
5367}
5368
5369/// Weights for different typo types.
5370#[derive(Debug, Clone, Serialize, Deserialize)]
5371pub struct TypoTypeWeights {
5372    /// Keyboard-adjacent substitution (e.g., 'a' -> 's')
5373    #[serde(default = "default_substitution_weight")]
5374    pub substitution: f64,
5375    /// Adjacent character transposition (e.g., 'ab' -> 'ba')
5376    #[serde(default = "default_transposition_weight")]
5377    pub transposition: f64,
5378    /// Character insertion
5379    #[serde(default = "default_insertion_weight")]
5380    pub insertion: f64,
5381    /// Character deletion
5382    #[serde(default = "default_deletion_weight")]
5383    pub deletion: f64,
5384    /// OCR-style errors (e.g., '0' -> 'O')
5385    #[serde(default = "default_ocr_weight")]
5386    pub ocr_errors: f64,
5387    /// Homophone substitution (e.g., 'their' -> 'there')
5388    #[serde(default = "default_homophone_weight")]
5389    pub homophones: f64,
5390}
5391
5392fn default_substitution_weight() -> f64 {
5393    0.35
5394}
5395fn default_transposition_weight() -> f64 {
5396    0.25
5397}
5398fn default_insertion_weight() -> f64 {
5399    0.10
5400}
5401fn default_deletion_weight() -> f64 {
5402    0.15
5403}
5404fn default_ocr_weight() -> f64 {
5405    0.10
5406}
5407fn default_homophone_weight() -> f64 {
5408    0.05
5409}
5410
5411impl Default for TypoTypeWeights {
5412    fn default() -> Self {
5413        Self {
5414            substitution: default_substitution_weight(),
5415            transposition: default_transposition_weight(),
5416            insertion: default_insertion_weight(),
5417            deletion: default_deletion_weight(),
5418            ocr_errors: default_ocr_weight(),
5419            homophones: default_homophone_weight(),
5420        }
5421    }
5422}
5423
5424/// Format variation configuration.
5425#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5426pub struct FormatVariationSchemaConfig {
5427    /// Enable format variations
5428    #[serde(default)]
5429    pub enabled: bool,
5430    /// Date format variation settings
5431    #[serde(default)]
5432    pub dates: DateFormatVariationConfig,
5433    /// Amount format variation settings
5434    #[serde(default)]
5435    pub amounts: AmountFormatVariationConfig,
5436    /// Identifier format variation settings
5437    #[serde(default)]
5438    pub identifiers: IdentifierFormatVariationConfig,
5439}
5440
5441/// Date format variation configuration.
5442#[derive(Debug, Clone, Serialize, Deserialize)]
5443pub struct DateFormatVariationConfig {
5444    /// Enable date format variations
5445    #[serde(default)]
5446    pub enabled: bool,
5447    /// Overall variation rate
5448    #[serde(default = "default_date_variation_rate")]
5449    pub rate: f64,
5450    /// Include ISO format (2024-01-15)
5451    #[serde(default = "default_true")]
5452    pub iso_format: bool,
5453    /// Include US format (01/15/2024)
5454    #[serde(default)]
5455    pub us_format: bool,
5456    /// Include EU format (15.01.2024)
5457    #[serde(default)]
5458    pub eu_format: bool,
5459    /// Include long format (January 15, 2024)
5460    #[serde(default)]
5461    pub long_format: bool,
5462}
5463
5464fn default_date_variation_rate() -> f64 {
5465    0.05
5466}
5467
5468impl Default for DateFormatVariationConfig {
5469    fn default() -> Self {
5470        Self {
5471            enabled: false,
5472            rate: default_date_variation_rate(),
5473            iso_format: true,
5474            us_format: false,
5475            eu_format: false,
5476            long_format: false,
5477        }
5478    }
5479}
5480
5481/// Amount format variation configuration.
5482#[derive(Debug, Clone, Serialize, Deserialize)]
5483pub struct AmountFormatVariationConfig {
5484    /// Enable amount format variations
5485    #[serde(default)]
5486    pub enabled: bool,
5487    /// Overall variation rate
5488    #[serde(default = "default_amount_variation_rate")]
5489    pub rate: f64,
5490    /// Include US comma format (1,234.56)
5491    #[serde(default)]
5492    pub us_comma_format: bool,
5493    /// Include EU format (1.234,56)
5494    #[serde(default)]
5495    pub eu_format: bool,
5496    /// Include currency prefix ($1,234.56)
5497    #[serde(default)]
5498    pub currency_prefix: bool,
5499    /// Include accounting format with parentheses for negatives
5500    #[serde(default)]
5501    pub accounting_format: bool,
5502}
5503
5504fn default_amount_variation_rate() -> f64 {
5505    0.02
5506}
5507
5508impl Default for AmountFormatVariationConfig {
5509    fn default() -> Self {
5510        Self {
5511            enabled: false,
5512            rate: default_amount_variation_rate(),
5513            us_comma_format: false,
5514            eu_format: false,
5515            currency_prefix: false,
5516            accounting_format: false,
5517        }
5518    }
5519}
5520
5521/// Identifier format variation configuration.
5522#[derive(Debug, Clone, Serialize, Deserialize)]
5523pub struct IdentifierFormatVariationConfig {
5524    /// Enable identifier format variations
5525    #[serde(default)]
5526    pub enabled: bool,
5527    /// Overall variation rate
5528    #[serde(default = "default_identifier_variation_rate")]
5529    pub rate: f64,
5530    /// Case variations (uppercase, lowercase, mixed)
5531    #[serde(default)]
5532    pub case_variations: bool,
5533    /// Padding variations (leading zeros)
5534    #[serde(default)]
5535    pub padding_variations: bool,
5536    /// Separator variations (dash vs underscore)
5537    #[serde(default)]
5538    pub separator_variations: bool,
5539}
5540
5541fn default_identifier_variation_rate() -> f64 {
5542    0.02
5543}
5544
5545impl Default for IdentifierFormatVariationConfig {
5546    fn default() -> Self {
5547        Self {
5548            enabled: false,
5549            rate: default_identifier_variation_rate(),
5550            case_variations: false,
5551            padding_variations: false,
5552            separator_variations: false,
5553        }
5554    }
5555}
5556
5557/// Duplicate injection configuration.
5558#[derive(Debug, Clone, Serialize, Deserialize)]
5559pub struct DuplicateSchemaConfig {
5560    /// Enable duplicate injection
5561    #[serde(default)]
5562    pub enabled: bool,
5563    /// Overall duplicate rate
5564    #[serde(default = "default_duplicate_rate")]
5565    pub rate: f64,
5566    /// Exact duplicate proportion (out of duplicates)
5567    #[serde(default = "default_exact_duplicate_ratio")]
5568    pub exact_duplicate_ratio: f64,
5569    /// Near duplicate proportion (slight variations)
5570    #[serde(default = "default_near_duplicate_ratio")]
5571    pub near_duplicate_ratio: f64,
5572    /// Fuzzy duplicate proportion (typos in key fields)
5573    #[serde(default = "default_fuzzy_duplicate_ratio")]
5574    pub fuzzy_duplicate_ratio: f64,
5575    /// Maximum date offset for near/fuzzy duplicates (days)
5576    #[serde(default = "default_max_date_offset")]
5577    pub max_date_offset_days: u32,
5578    /// Maximum amount variance for near duplicates (fraction)
5579    #[serde(default = "default_max_amount_variance")]
5580    pub max_amount_variance: f64,
5581}
5582
5583fn default_duplicate_rate() -> f64 {
5584    0.005
5585}
5586fn default_exact_duplicate_ratio() -> f64 {
5587    0.4
5588}
5589fn default_near_duplicate_ratio() -> f64 {
5590    0.35
5591}
5592fn default_fuzzy_duplicate_ratio() -> f64 {
5593    0.25
5594}
5595fn default_max_date_offset() -> u32 {
5596    3
5597}
5598fn default_max_amount_variance() -> f64 {
5599    0.01
5600}
5601
5602impl Default for DuplicateSchemaConfig {
5603    fn default() -> Self {
5604        Self {
5605            enabled: false,
5606            rate: default_duplicate_rate(),
5607            exact_duplicate_ratio: default_exact_duplicate_ratio(),
5608            near_duplicate_ratio: default_near_duplicate_ratio(),
5609            fuzzy_duplicate_ratio: default_fuzzy_duplicate_ratio(),
5610            max_date_offset_days: default_max_date_offset(),
5611            max_amount_variance: default_max_amount_variance(),
5612        }
5613    }
5614}
5615
5616/// Encoding issue configuration.
5617#[derive(Debug, Clone, Serialize, Deserialize)]
5618pub struct EncodingIssueSchemaConfig {
5619    /// Enable encoding issue injection
5620    #[serde(default)]
5621    pub enabled: bool,
5622    /// Overall encoding issue rate
5623    #[serde(default = "default_encoding_rate")]
5624    pub rate: f64,
5625    /// Include mojibake (UTF-8/Latin-1 confusion)
5626    #[serde(default)]
5627    pub mojibake: bool,
5628    /// Include HTML entity corruption
5629    #[serde(default)]
5630    pub html_entities: bool,
5631    /// Include BOM issues
5632    #[serde(default)]
5633    pub bom_issues: bool,
5634}
5635
5636fn default_encoding_rate() -> f64 {
5637    0.001
5638}
5639
5640impl Default for EncodingIssueSchemaConfig {
5641    fn default() -> Self {
5642        Self {
5643            enabled: false,
5644            rate: default_encoding_rate(),
5645            mojibake: false,
5646            html_entities: false,
5647            bom_issues: false,
5648        }
5649    }
5650}
5651
5652/// Per-sink quality profiles for different output formats.
5653#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5654pub struct SinkQualityProfiles {
5655    /// CSV-specific quality settings
5656    #[serde(default)]
5657    pub csv: Option<SinkQualityOverride>,
5658    /// JSON-specific quality settings
5659    #[serde(default)]
5660    pub json: Option<SinkQualityOverride>,
5661    /// Parquet-specific quality settings
5662    #[serde(default)]
5663    pub parquet: Option<SinkQualityOverride>,
5664}
5665
5666/// Quality setting overrides for a specific sink type.
5667#[derive(Debug, Clone, Serialize, Deserialize)]
5668pub struct SinkQualityOverride {
5669    /// Override enabled state
5670    pub enabled: Option<bool>,
5671    /// Override missing value rate
5672    pub missing_rate: Option<f64>,
5673    /// Override typo rate
5674    pub typo_rate: Option<f64>,
5675    /// Override format variation rate
5676    pub format_variation_rate: Option<f64>,
5677    /// Override duplicate rate
5678    pub duplicate_rate: Option<f64>,
5679}
5680
5681// =============================================================================
5682// Accounting Standards Configuration
5683// =============================================================================
5684
5685/// Accounting standards framework configuration for generating standards-compliant data.
5686///
5687/// Supports US GAAP, IFRS, and French GAAP (PCG) frameworks with specific standards:
5688/// - ASC 606/IFRS 15/PCG: Revenue Recognition
5689/// - ASC 842/IFRS 16/PCG: Leases
5690/// - ASC 820/IFRS 13/PCG: Fair Value Measurement
5691/// - ASC 360/IAS 36/PCG: Impairment
5692#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5693pub struct AccountingStandardsConfig {
5694    /// Enable accounting standards generation
5695    #[serde(default)]
5696    pub enabled: bool,
5697
5698    /// Accounting framework to use.
5699    /// When `None`, the country pack's `accounting.framework` is used as fallback;
5700    /// if that is also absent the orchestrator defaults to US GAAP.
5701    #[serde(default, skip_serializing_if = "Option::is_none")]
5702    pub framework: Option<AccountingFrameworkConfig>,
5703
5704    /// Revenue recognition configuration (ASC 606/IFRS 15)
5705    #[serde(default)]
5706    pub revenue_recognition: RevenueRecognitionConfig,
5707
5708    /// Lease accounting configuration (ASC 842/IFRS 16)
5709    #[serde(default)]
5710    pub leases: LeaseAccountingConfig,
5711
5712    /// Fair value measurement configuration (ASC 820/IFRS 13)
5713    #[serde(default)]
5714    pub fair_value: FairValueConfig,
5715
5716    /// Impairment testing configuration (ASC 360/IAS 36)
5717    #[serde(default)]
5718    pub impairment: ImpairmentConfig,
5719
5720    /// Business combination configuration (IFRS 3 / ASC 805)
5721    #[serde(default)]
5722    pub business_combinations: BusinessCombinationsConfig,
5723
5724    /// Expected Credit Loss configuration (IFRS 9 / ASC 326)
5725    #[serde(default)]
5726    pub expected_credit_loss: EclConfig,
5727
5728    /// Generate framework differences for dual reporting
5729    #[serde(default)]
5730    pub generate_differences: bool,
5731}
5732
5733/// Accounting framework selection.
5734#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5735#[serde(rename_all = "snake_case")]
5736pub enum AccountingFrameworkConfig {
5737    /// US Generally Accepted Accounting Principles
5738    #[default]
5739    UsGaap,
5740    /// International Financial Reporting Standards
5741    Ifrs,
5742    /// Generate data for both frameworks with reconciliation
5743    DualReporting,
5744    /// French GAAP (Plan Comptable Général – PCG)
5745    FrenchGaap,
5746    /// German GAAP (Handelsgesetzbuch – HGB, §238-263)
5747    GermanGaap,
5748}
5749
5750/// Revenue recognition configuration (ASC 606/IFRS 15).
5751#[derive(Debug, Clone, Serialize, Deserialize)]
5752pub struct RevenueRecognitionConfig {
5753    /// Enable revenue recognition generation
5754    #[serde(default)]
5755    pub enabled: bool,
5756
5757    /// Generate customer contracts
5758    #[serde(default = "default_true")]
5759    pub generate_contracts: bool,
5760
5761    /// Average number of performance obligations per contract
5762    #[serde(default = "default_avg_obligations")]
5763    pub avg_obligations_per_contract: f64,
5764
5765    /// Rate of contracts with variable consideration
5766    #[serde(default = "default_variable_consideration_rate")]
5767    pub variable_consideration_rate: f64,
5768
5769    /// Rate of over-time revenue recognition (vs point-in-time)
5770    #[serde(default = "default_over_time_rate")]
5771    pub over_time_recognition_rate: f64,
5772
5773    /// Number of contracts to generate
5774    #[serde(default = "default_contract_count")]
5775    pub contract_count: usize,
5776}
5777
5778fn default_avg_obligations() -> f64 {
5779    2.0
5780}
5781
5782fn default_variable_consideration_rate() -> f64 {
5783    0.15
5784}
5785
5786fn default_over_time_rate() -> f64 {
5787    0.30
5788}
5789
5790fn default_contract_count() -> usize {
5791    100
5792}
5793
5794impl Default for RevenueRecognitionConfig {
5795    fn default() -> Self {
5796        Self {
5797            enabled: false,
5798            generate_contracts: true,
5799            avg_obligations_per_contract: default_avg_obligations(),
5800            variable_consideration_rate: default_variable_consideration_rate(),
5801            over_time_recognition_rate: default_over_time_rate(),
5802            contract_count: default_contract_count(),
5803        }
5804    }
5805}
5806
5807/// Lease accounting configuration (ASC 842/IFRS 16).
5808#[derive(Debug, Clone, Serialize, Deserialize)]
5809pub struct LeaseAccountingConfig {
5810    /// Enable lease accounting generation
5811    #[serde(default)]
5812    pub enabled: bool,
5813
5814    /// Number of leases to generate
5815    #[serde(default = "default_lease_count")]
5816    pub lease_count: usize,
5817
5818    /// Percentage of finance leases (vs operating)
5819    #[serde(default = "default_finance_lease_pct")]
5820    pub finance_lease_percent: f64,
5821
5822    /// Average lease term in months
5823    #[serde(default = "default_avg_lease_term")]
5824    pub avg_lease_term_months: u32,
5825
5826    /// Generate amortization schedules
5827    #[serde(default = "default_true")]
5828    pub generate_amortization: bool,
5829
5830    /// Real estate lease percentage
5831    #[serde(default = "default_real_estate_pct")]
5832    pub real_estate_percent: f64,
5833}
5834
5835fn default_lease_count() -> usize {
5836    50
5837}
5838
5839fn default_finance_lease_pct() -> f64 {
5840    0.30
5841}
5842
5843fn default_avg_lease_term() -> u32 {
5844    60
5845}
5846
5847fn default_real_estate_pct() -> f64 {
5848    0.40
5849}
5850
5851impl Default for LeaseAccountingConfig {
5852    fn default() -> Self {
5853        Self {
5854            enabled: false,
5855            lease_count: default_lease_count(),
5856            finance_lease_percent: default_finance_lease_pct(),
5857            avg_lease_term_months: default_avg_lease_term(),
5858            generate_amortization: true,
5859            real_estate_percent: default_real_estate_pct(),
5860        }
5861    }
5862}
5863
5864/// Fair value measurement configuration (ASC 820/IFRS 13).
5865#[derive(Debug, Clone, Serialize, Deserialize)]
5866pub struct FairValueConfig {
5867    /// Enable fair value measurement generation
5868    #[serde(default)]
5869    pub enabled: bool,
5870
5871    /// Number of fair value measurements to generate
5872    #[serde(default = "default_fv_count")]
5873    pub measurement_count: usize,
5874
5875    /// Level 1 (quoted prices) percentage
5876    #[serde(default = "default_level1_pct")]
5877    pub level1_percent: f64,
5878
5879    /// Level 2 (observable inputs) percentage
5880    #[serde(default = "default_level2_pct")]
5881    pub level2_percent: f64,
5882
5883    /// Level 3 (unobservable inputs) percentage
5884    #[serde(default = "default_level3_pct")]
5885    pub level3_percent: f64,
5886
5887    /// Include sensitivity analysis for Level 3
5888    #[serde(default)]
5889    pub include_sensitivity_analysis: bool,
5890}
5891
5892fn default_fv_count() -> usize {
5893    25
5894}
5895
5896fn default_level1_pct() -> f64 {
5897    0.40
5898}
5899
5900fn default_level2_pct() -> f64 {
5901    0.35
5902}
5903
5904fn default_level3_pct() -> f64 {
5905    0.25
5906}
5907
5908impl Default for FairValueConfig {
5909    fn default() -> Self {
5910        Self {
5911            enabled: false,
5912            measurement_count: default_fv_count(),
5913            level1_percent: default_level1_pct(),
5914            level2_percent: default_level2_pct(),
5915            level3_percent: default_level3_pct(),
5916            include_sensitivity_analysis: false,
5917        }
5918    }
5919}
5920
5921/// Impairment testing configuration (ASC 360/IAS 36).
5922#[derive(Debug, Clone, Serialize, Deserialize)]
5923pub struct ImpairmentConfig {
5924    /// Enable impairment testing generation
5925    #[serde(default)]
5926    pub enabled: bool,
5927
5928    /// Number of impairment tests to generate
5929    #[serde(default = "default_impairment_count")]
5930    pub test_count: usize,
5931
5932    /// Rate of tests resulting in impairment
5933    #[serde(default = "default_impairment_rate")]
5934    pub impairment_rate: f64,
5935
5936    /// Generate cash flow projections
5937    #[serde(default = "default_true")]
5938    pub generate_projections: bool,
5939
5940    /// Include goodwill impairment tests
5941    #[serde(default)]
5942    pub include_goodwill: bool,
5943}
5944
5945fn default_impairment_count() -> usize {
5946    15
5947}
5948
5949fn default_impairment_rate() -> f64 {
5950    0.10
5951}
5952
5953impl Default for ImpairmentConfig {
5954    fn default() -> Self {
5955        Self {
5956            enabled: false,
5957            test_count: default_impairment_count(),
5958            impairment_rate: default_impairment_rate(),
5959            generate_projections: true,
5960            include_goodwill: false,
5961        }
5962    }
5963}
5964
5965// =============================================================================
5966// Business Combinations Configuration (IFRS 3 / ASC 805)
5967// =============================================================================
5968
5969/// Configuration for generating business combination (acquisition) data.
5970#[derive(Debug, Clone, Serialize, Deserialize)]
5971pub struct BusinessCombinationsConfig {
5972    /// Enable business combination generation
5973    #[serde(default)]
5974    pub enabled: bool,
5975
5976    /// Number of acquisitions to generate per company (1-5)
5977    #[serde(default = "default_bc_acquisition_count")]
5978    pub acquisition_count: usize,
5979}
5980
5981fn default_bc_acquisition_count() -> usize {
5982    2
5983}
5984
5985impl Default for BusinessCombinationsConfig {
5986    fn default() -> Self {
5987        Self {
5988            enabled: false,
5989            acquisition_count: default_bc_acquisition_count(),
5990        }
5991    }
5992}
5993
5994// =============================================================================
5995// ECL Configuration (IFRS 9 / ASC 326)
5996// =============================================================================
5997
5998/// Configuration for Expected Credit Loss generation.
5999#[derive(Debug, Clone, Serialize, Deserialize)]
6000pub struct EclConfig {
6001    /// Enable ECL generation.
6002    #[serde(default)]
6003    pub enabled: bool,
6004
6005    /// Weight for base economic scenario (0–1).
6006    #[serde(default = "default_ecl_base_weight")]
6007    pub base_scenario_weight: f64,
6008
6009    /// Multiplier for base scenario (typically 1.0).
6010    #[serde(default = "default_ecl_base_multiplier")]
6011    pub base_scenario_multiplier: f64,
6012
6013    /// Weight for optimistic economic scenario (0–1).
6014    #[serde(default = "default_ecl_optimistic_weight")]
6015    pub optimistic_scenario_weight: f64,
6016
6017    /// Multiplier for optimistic scenario (< 1.0 means lower losses).
6018    #[serde(default = "default_ecl_optimistic_multiplier")]
6019    pub optimistic_scenario_multiplier: f64,
6020
6021    /// Weight for pessimistic economic scenario (0–1).
6022    #[serde(default = "default_ecl_pessimistic_weight")]
6023    pub pessimistic_scenario_weight: f64,
6024
6025    /// Multiplier for pessimistic scenario (> 1.0 means higher losses).
6026    #[serde(default = "default_ecl_pessimistic_multiplier")]
6027    pub pessimistic_scenario_multiplier: f64,
6028}
6029
6030fn default_ecl_base_weight() -> f64 {
6031    0.50
6032}
6033fn default_ecl_base_multiplier() -> f64 {
6034    1.0
6035}
6036fn default_ecl_optimistic_weight() -> f64 {
6037    0.30
6038}
6039fn default_ecl_optimistic_multiplier() -> f64 {
6040    0.8
6041}
6042fn default_ecl_pessimistic_weight() -> f64 {
6043    0.20
6044}
6045fn default_ecl_pessimistic_multiplier() -> f64 {
6046    1.4
6047}
6048
6049impl Default for EclConfig {
6050    fn default() -> Self {
6051        Self {
6052            enabled: false,
6053            base_scenario_weight: default_ecl_base_weight(),
6054            base_scenario_multiplier: default_ecl_base_multiplier(),
6055            optimistic_scenario_weight: default_ecl_optimistic_weight(),
6056            optimistic_scenario_multiplier: default_ecl_optimistic_multiplier(),
6057            pessimistic_scenario_weight: default_ecl_pessimistic_weight(),
6058            pessimistic_scenario_multiplier: default_ecl_pessimistic_multiplier(),
6059        }
6060    }
6061}
6062
6063// =============================================================================
6064// Audit Standards Configuration
6065// =============================================================================
6066
6067/// Audit standards framework configuration for generating standards-compliant audit data.
6068///
6069/// Supports ISA (International Standards on Auditing) and PCAOB standards:
6070/// - ISA 200-720: Complete coverage of audit standards
6071/// - ISA 520: Analytical Procedures
6072/// - ISA 505: External Confirmations
6073/// - ISA 700/705/706/701: Audit Reports
6074/// - PCAOB AS 2201: ICFR Auditing
6075#[derive(Debug, Clone, Serialize, Deserialize, Default)]
6076pub struct AuditStandardsConfig {
6077    /// Enable audit standards generation
6078    #[serde(default)]
6079    pub enabled: bool,
6080
6081    /// ISA compliance configuration
6082    #[serde(default)]
6083    pub isa_compliance: IsaComplianceConfig,
6084
6085    /// Analytical procedures configuration (ISA 520)
6086    #[serde(default)]
6087    pub analytical_procedures: AnalyticalProceduresConfig,
6088
6089    /// External confirmations configuration (ISA 505)
6090    #[serde(default)]
6091    pub confirmations: ConfirmationsConfig,
6092
6093    /// Audit opinion configuration (ISA 700/705/706/701)
6094    #[serde(default)]
6095    pub opinion: AuditOpinionConfig,
6096
6097    /// Generate complete audit trail with traceability
6098    #[serde(default)]
6099    pub generate_audit_trail: bool,
6100
6101    /// SOX 302/404 compliance configuration
6102    #[serde(default)]
6103    pub sox: SoxComplianceConfig,
6104
6105    /// PCAOB-specific configuration
6106    #[serde(default)]
6107    pub pcaob: PcaobConfig,
6108}
6109
6110/// ISA compliance level configuration.
6111#[derive(Debug, Clone, Serialize, Deserialize)]
6112pub struct IsaComplianceConfig {
6113    /// Enable ISA compliance tracking
6114    #[serde(default)]
6115    pub enabled: bool,
6116
6117    /// Compliance level: "basic", "standard", "comprehensive"
6118    #[serde(default = "default_compliance_level")]
6119    pub compliance_level: String,
6120
6121    /// Generate ISA requirement mappings
6122    #[serde(default = "default_true")]
6123    pub generate_isa_mappings: bool,
6124
6125    /// Generate ISA coverage summary
6126    #[serde(default = "default_true")]
6127    pub generate_coverage_summary: bool,
6128
6129    /// Include PCAOB standard mappings (for dual framework)
6130    #[serde(default)]
6131    pub include_pcaob: bool,
6132
6133    /// Framework to use: "isa", "pcaob", "dual"
6134    #[serde(default = "default_audit_framework")]
6135    pub framework: String,
6136}
6137
6138fn default_compliance_level() -> String {
6139    "standard".to_string()
6140}
6141
6142fn default_audit_framework() -> String {
6143    "isa".to_string()
6144}
6145
6146impl Default for IsaComplianceConfig {
6147    fn default() -> Self {
6148        Self {
6149            enabled: false,
6150            compliance_level: default_compliance_level(),
6151            generate_isa_mappings: true,
6152            generate_coverage_summary: true,
6153            include_pcaob: false,
6154            framework: default_audit_framework(),
6155        }
6156    }
6157}
6158
6159/// Analytical procedures configuration (ISA 520).
6160#[derive(Debug, Clone, Serialize, Deserialize)]
6161pub struct AnalyticalProceduresConfig {
6162    /// Enable analytical procedures generation
6163    #[serde(default)]
6164    pub enabled: bool,
6165
6166    /// Number of procedures per account/area
6167    #[serde(default = "default_procedures_per_account")]
6168    pub procedures_per_account: usize,
6169
6170    /// Probability of variance exceeding threshold
6171    #[serde(default = "default_variance_probability")]
6172    pub variance_probability: f64,
6173
6174    /// Include variance investigations
6175    #[serde(default = "default_true")]
6176    pub generate_investigations: bool,
6177
6178    /// Include financial ratio analysis
6179    #[serde(default = "default_true")]
6180    pub include_ratio_analysis: bool,
6181}
6182
6183fn default_procedures_per_account() -> usize {
6184    3
6185}
6186
6187fn default_variance_probability() -> f64 {
6188    0.20
6189}
6190
6191impl Default for AnalyticalProceduresConfig {
6192    fn default() -> Self {
6193        Self {
6194            enabled: false,
6195            procedures_per_account: default_procedures_per_account(),
6196            variance_probability: default_variance_probability(),
6197            generate_investigations: true,
6198            include_ratio_analysis: true,
6199        }
6200    }
6201}
6202
6203/// External confirmations configuration (ISA 505).
6204#[derive(Debug, Clone, Serialize, Deserialize)]
6205pub struct ConfirmationsConfig {
6206    /// Enable confirmation generation
6207    #[serde(default)]
6208    pub enabled: bool,
6209
6210    /// Number of confirmations to generate
6211    #[serde(default = "default_confirmation_count")]
6212    pub confirmation_count: usize,
6213
6214    /// Positive response rate
6215    #[serde(default = "default_positive_response_rate")]
6216    pub positive_response_rate: f64,
6217
6218    /// Exception rate (responses with differences)
6219    #[serde(default = "default_exception_rate_confirm")]
6220    pub exception_rate: f64,
6221
6222    /// Non-response rate
6223    #[serde(default = "default_non_response_rate")]
6224    pub non_response_rate: f64,
6225
6226    /// Generate alternative procedures for non-responses
6227    #[serde(default = "default_true")]
6228    pub generate_alternative_procedures: bool,
6229}
6230
6231fn default_confirmation_count() -> usize {
6232    50
6233}
6234
6235fn default_positive_response_rate() -> f64 {
6236    0.85
6237}
6238
6239fn default_exception_rate_confirm() -> f64 {
6240    0.10
6241}
6242
6243fn default_non_response_rate() -> f64 {
6244    0.05
6245}
6246
6247impl Default for ConfirmationsConfig {
6248    fn default() -> Self {
6249        Self {
6250            enabled: false,
6251            confirmation_count: default_confirmation_count(),
6252            positive_response_rate: default_positive_response_rate(),
6253            exception_rate: default_exception_rate_confirm(),
6254            non_response_rate: default_non_response_rate(),
6255            generate_alternative_procedures: true,
6256        }
6257    }
6258}
6259
6260/// Audit opinion configuration (ISA 700/705/706/701).
6261#[derive(Debug, Clone, Serialize, Deserialize)]
6262pub struct AuditOpinionConfig {
6263    /// Enable audit opinion generation
6264    #[serde(default)]
6265    pub enabled: bool,
6266
6267    /// Generate Key Audit Matters (KAM) / Critical Audit Matters (CAM)
6268    #[serde(default = "default_true")]
6269    pub generate_kam: bool,
6270
6271    /// Average number of KAMs/CAMs per opinion
6272    #[serde(default = "default_kam_count")]
6273    pub average_kam_count: usize,
6274
6275    /// Rate of modified opinions
6276    #[serde(default = "default_modified_opinion_rate")]
6277    pub modified_opinion_rate: f64,
6278
6279    /// Include emphasis of matter paragraphs
6280    #[serde(default)]
6281    pub include_emphasis_of_matter: bool,
6282
6283    /// Include going concern conclusions
6284    #[serde(default = "default_true")]
6285    pub include_going_concern: bool,
6286}
6287
6288fn default_kam_count() -> usize {
6289    3
6290}
6291
6292fn default_modified_opinion_rate() -> f64 {
6293    0.05
6294}
6295
6296impl Default for AuditOpinionConfig {
6297    fn default() -> Self {
6298        Self {
6299            enabled: false,
6300            generate_kam: true,
6301            average_kam_count: default_kam_count(),
6302            modified_opinion_rate: default_modified_opinion_rate(),
6303            include_emphasis_of_matter: false,
6304            include_going_concern: true,
6305        }
6306    }
6307}
6308
6309/// SOX compliance configuration (Sections 302/404).
6310#[derive(Debug, Clone, Serialize, Deserialize)]
6311pub struct SoxComplianceConfig {
6312    /// Enable SOX compliance generation
6313    #[serde(default)]
6314    pub enabled: bool,
6315
6316    /// Generate Section 302 CEO/CFO certifications
6317    #[serde(default = "default_true")]
6318    pub generate_302_certifications: bool,
6319
6320    /// Generate Section 404 ICFR assessments
6321    #[serde(default = "default_true")]
6322    pub generate_404_assessments: bool,
6323
6324    /// Materiality threshold for SOX testing
6325    #[serde(default = "default_sox_materiality_threshold")]
6326    pub materiality_threshold: f64,
6327
6328    /// Rate of material weaknesses
6329    #[serde(default = "default_material_weakness_rate")]
6330    pub material_weakness_rate: f64,
6331
6332    /// Rate of significant deficiencies
6333    #[serde(default = "default_significant_deficiency_rate")]
6334    pub significant_deficiency_rate: f64,
6335}
6336
6337fn default_material_weakness_rate() -> f64 {
6338    0.02
6339}
6340
6341fn default_significant_deficiency_rate() -> f64 {
6342    0.08
6343}
6344
6345impl Default for SoxComplianceConfig {
6346    fn default() -> Self {
6347        Self {
6348            enabled: false,
6349            generate_302_certifications: true,
6350            generate_404_assessments: true,
6351            materiality_threshold: default_sox_materiality_threshold(),
6352            material_weakness_rate: default_material_weakness_rate(),
6353            significant_deficiency_rate: default_significant_deficiency_rate(),
6354        }
6355    }
6356}
6357
6358/// PCAOB-specific configuration.
6359#[derive(Debug, Clone, Serialize, Deserialize)]
6360pub struct PcaobConfig {
6361    /// Enable PCAOB-specific elements
6362    #[serde(default)]
6363    pub enabled: bool,
6364
6365    /// Treat as PCAOB audit (vs ISA-only)
6366    #[serde(default)]
6367    pub is_pcaob_audit: bool,
6368
6369    /// Generate Critical Audit Matters (CAM)
6370    #[serde(default = "default_true")]
6371    pub generate_cam: bool,
6372
6373    /// Include ICFR opinion (for integrated audits)
6374    #[serde(default)]
6375    pub include_icfr_opinion: bool,
6376
6377    /// Generate PCAOB-ISA standard mappings
6378    #[serde(default)]
6379    pub generate_standard_mappings: bool,
6380}
6381
6382impl Default for PcaobConfig {
6383    fn default() -> Self {
6384        Self {
6385            enabled: false,
6386            is_pcaob_audit: false,
6387            generate_cam: true,
6388            include_icfr_opinion: false,
6389            generate_standard_mappings: false,
6390        }
6391    }
6392}
6393
6394// =============================================================================
6395// Advanced Distribution Configuration
6396// =============================================================================
6397
6398/// Advanced distribution configuration for realistic data generation.
6399///
6400/// This section enables sophisticated distribution models including:
6401/// - Mixture models (multi-modal distributions)
6402/// - Cross-field correlations
6403/// - Conditional distributions
6404/// - Regime changes and economic cycles
6405/// - Statistical validation
6406#[derive(Debug, Clone, Serialize, Deserialize, Default)]
6407pub struct AdvancedDistributionConfig {
6408    /// Enable advanced distribution features.
6409    #[serde(default)]
6410    pub enabled: bool,
6411
6412    /// Mixture model configuration for amounts.
6413    #[serde(default)]
6414    pub amounts: MixtureDistributionSchemaConfig,
6415
6416    /// Cross-field correlation configuration.
6417    #[serde(default)]
6418    pub correlations: CorrelationSchemaConfig,
6419
6420    /// Conditional distribution configurations.
6421    #[serde(default)]
6422    pub conditional: Vec<ConditionalDistributionSchemaConfig>,
6423
6424    /// Regime change configuration.
6425    #[serde(default)]
6426    pub regime_changes: RegimeChangeSchemaConfig,
6427
6428    /// Industry-specific distribution profile.
6429    ///
6430    /// Accepts either the legacy bare-name form (`industry_profile: retail`) or
6431    /// the SP3 extended struct form with optional `priors` sub-section.
6432    #[serde(default)]
6433    pub industry_profile: Option<IndustryProfileField>,
6434
6435    /// Statistical validation configuration.
6436    #[serde(default)]
6437    pub validation: StatisticalValidationSchemaConfig,
6438
6439    /// v3.4.4+ — Pareto heavy-tailed distribution for monetary amounts.
6440    /// When set and `enabled`, overrides `amounts` mixture model for the
6441    /// non-fraud amount-sampling path (fraud patterns remain orthogonal).
6442    /// Useful for capex, strategic contracts, and any domain where a small
6443    /// number of very large values dominates the tail.
6444    #[serde(default)]
6445    pub pareto: Option<ParetoSchemaConfig>,
6446}
6447
6448/// Schema-level Pareto distribution configuration (v3.4.4+).
6449///
6450/// Thin wrapper around `datasynth_core::distributions::ParetoConfig` that
6451/// adds an `enabled` gate and serde-friendly field names.
6452#[derive(Debug, Clone, Serialize, Deserialize)]
6453pub struct ParetoSchemaConfig {
6454    /// Enable Pareto sampling. When true, replaces the `amounts` mixture
6455    /// model for the non-fraud amount-sampling path.
6456    #[serde(default)]
6457    pub enabled: bool,
6458
6459    /// Shape parameter (tail heaviness). Lower values → heavier tail.
6460    /// Typical range: 1.5-3.0. Default: 2.0.
6461    #[serde(default = "default_pareto_alpha")]
6462    pub alpha: f64,
6463
6464    /// Scale / minimum value. All samples are >= x_min.
6465    /// Typical: 1000 (for capex) to 100,000 (for large contracts). Default: 100.
6466    #[serde(default = "default_pareto_x_min")]
6467    pub x_min: f64,
6468
6469    /// Optional upper clamp. `None` = unbounded (recommended for realistic
6470    /// heavy tails).
6471    #[serde(default)]
6472    pub max_value: Option<f64>,
6473
6474    /// Decimal places for rounding. Default: 2.
6475    #[serde(default = "default_pareto_decimal_places")]
6476    pub decimal_places: u8,
6477}
6478
6479fn default_pareto_alpha() -> f64 {
6480    2.0
6481}
6482
6483fn default_pareto_x_min() -> f64 {
6484    100.0
6485}
6486
6487fn default_pareto_decimal_places() -> u8 {
6488    2
6489}
6490
6491impl Default for ParetoSchemaConfig {
6492    fn default() -> Self {
6493        Self {
6494            enabled: false,
6495            alpha: default_pareto_alpha(),
6496            x_min: default_pareto_x_min(),
6497            max_value: None,
6498            decimal_places: default_pareto_decimal_places(),
6499        }
6500    }
6501}
6502
6503impl ParetoSchemaConfig {
6504    /// Convert this schema config into a `datasynth_core::distributions::ParetoConfig`.
6505    pub fn to_core_config(&self) -> datasynth_core::distributions::ParetoConfig {
6506        datasynth_core::distributions::ParetoConfig {
6507            alpha: self.alpha,
6508            x_min: self.x_min,
6509            max_value: self.max_value,
6510            decimal_places: self.decimal_places,
6511        }
6512    }
6513}
6514
6515/// Industry profile types for pre-configured distribution settings.
6516#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
6517#[serde(rename_all = "snake_case")]
6518pub enum IndustryProfileType {
6519    /// Retail industry profile (POS sales, inventory, seasonal)
6520    Retail,
6521    /// Manufacturing industry profile (raw materials, maintenance, capital)
6522    Manufacturing,
6523    /// Financial services profile (wire transfers, ACH, fee income)
6524    FinancialServices,
6525    /// Healthcare profile (claims, procedures, supplies)
6526    Healthcare,
6527    /// Technology profile (subscriptions, services, R&D)
6528    Technology,
6529}
6530
6531impl IndustryProfileType {
6532    /// Return the lowercase ASCII slug used for bundled-priors filenames.
6533    ///
6534    /// E.g. `IndustryProfileType::FinancialServices => "financial_services"`.
6535    pub fn slug(self) -> &'static str {
6536        match self {
6537            Self::Retail => "retail",
6538            Self::Manufacturing => "manufacturing",
6539            Self::FinancialServices => "financial_services",
6540            // Matches SP2's bundle naming (corpus uses "Health", not "Healthcare").
6541            Self::Healthcare => "health",
6542            Self::Technology => "technology",
6543        }
6544    }
6545}
6546
6547// ---------------------------------------------------------------------------
6548// SP3 — IndustryProfileField: backward-compatible wrapper
6549// ---------------------------------------------------------------------------
6550
6551/// The value of `distributions.industry_profile` in config YAML.
6552///
6553/// Accepts both the legacy bare-name form:
6554/// ```yaml
6555/// distributions:
6556///   industry_profile: retail
6557/// ```
6558/// and the new SP3 extended struct form with optional `priors` sub-section:
6559/// ```yaml
6560/// distributions:
6561///   industry_profile:
6562///     name: retail
6563///     priors:
6564///       enabled: true
6565///       source: bundled
6566/// ```
6567#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6568#[serde(untagged)]
6569pub enum IndustryProfileField {
6570    /// Legacy form: `industry_profile: retail`.
6571    Name(IndustryProfileType),
6572    /// New form: `industry_profile: { name: retail, priors: { ... } }`.
6573    Full(IndustryProfileFull),
6574}
6575
6576impl IndustryProfileField {
6577    /// Return the bare `IndustryProfileType` regardless of which form was used.
6578    pub fn profile_type(&self) -> IndustryProfileType {
6579        match self {
6580            IndustryProfileField::Name(t) => *t,
6581            IndustryProfileField::Full(f) => f.name,
6582        }
6583    }
6584
6585    /// Return the optional `priors` sub-section, if present.
6586    pub fn priors(&self) -> Option<&IndustryPriorsConfig> {
6587        match self {
6588            IndustryProfileField::Name(_) => None,
6589            IndustryProfileField::Full(f) => f.priors.as_ref(),
6590        }
6591    }
6592}
6593
6594/// Extended industry profile struct used when `priors` is needed (SP3).
6595#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6596pub struct IndustryProfileFull {
6597    /// The industry variant (same values as the bare-name legacy form).
6598    pub name: IndustryProfileType,
6599    /// Optional SP3 priors sub-section.
6600    #[serde(default, skip_serializing_if = "Option::is_none")]
6601    pub priors: Option<IndustryPriorsConfig>,
6602}
6603
6604/// SP3 — configuration for industry-prior injection.
6605///
6606/// When `enabled = true`, the generator uses pre-baked statistical priors
6607/// for the given industry. `source` selects whether to use bundled priors or
6608/// load from a user-supplied file (requires `path`).
6609#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
6610pub struct IndustryPriorsConfig {
6611    /// Enable prior injection. When false the rest of the struct is ignored.
6612    #[serde(default)]
6613    pub enabled: bool,
6614
6615    /// Where to load the priors from.
6616    #[serde(default)]
6617    pub source: PriorsSource,
6618
6619    /// Path to the priors file. Required when `source = file`.
6620    #[serde(default, skip_serializing_if = "Option::is_none")]
6621    pub path: Option<std::path::PathBuf>,
6622
6623    /// SP3.4 — enable online velocity-rule calibrator. Adds per-line overhead
6624    /// when `true`; default `false` keeps v5.12/v5.13-without-calibration behavior.
6625    #[serde(default)]
6626    pub velocity_calibration: bool,
6627}
6628
6629/// Source of industry priors.
6630#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)]
6631#[serde(rename_all = "lowercase")]
6632pub enum PriorsSource {
6633    /// Use the priors bundled with the binary (default).
6634    #[default]
6635    Bundled,
6636    /// Load priors from a user-supplied file (requires `path`).
6637    File,
6638}
6639
6640/// Mixture model distribution configuration.
6641#[derive(Debug, Clone, Serialize, Deserialize)]
6642pub struct MixtureDistributionSchemaConfig {
6643    /// Enable mixture model for amount generation.
6644    #[serde(default)]
6645    pub enabled: bool,
6646
6647    /// Distribution type: "gaussian" or "lognormal".
6648    #[serde(default = "default_mixture_type")]
6649    pub distribution_type: MixtureDistributionType,
6650
6651    /// Mixture components with weights.
6652    #[serde(default)]
6653    pub components: Vec<MixtureComponentConfig>,
6654
6655    /// Minimum value constraint.
6656    #[serde(default = "default_min_amount")]
6657    pub min_value: f64,
6658
6659    /// Maximum value constraint (optional).
6660    #[serde(default)]
6661    pub max_value: Option<f64>,
6662
6663    /// Decimal places for rounding.
6664    #[serde(default = "default_decimal_places")]
6665    pub decimal_places: u8,
6666}
6667
6668fn default_mixture_type() -> MixtureDistributionType {
6669    MixtureDistributionType::LogNormal
6670}
6671
6672fn default_min_amount() -> f64 {
6673    0.01
6674}
6675
6676fn default_decimal_places() -> u8 {
6677    2
6678}
6679
6680impl Default for MixtureDistributionSchemaConfig {
6681    fn default() -> Self {
6682        Self {
6683            enabled: false,
6684            distribution_type: MixtureDistributionType::LogNormal,
6685            components: Vec::new(),
6686            min_value: 0.01,
6687            max_value: None,
6688            decimal_places: 2,
6689        }
6690    }
6691}
6692
6693impl MixtureDistributionSchemaConfig {
6694    /// Convert this schema-level config into a `LogNormalMixtureConfig`
6695    /// suitable for `LogNormalMixtureSampler::new`. Returns `None` if there
6696    /// are no components (schema default is an empty list, which cannot
6697    /// drive a sampler).
6698    ///
6699    /// Callers should gate this with `self.enabled` before invoking.
6700    pub fn to_log_normal_config(
6701        &self,
6702    ) -> Option<datasynth_core::distributions::LogNormalMixtureConfig> {
6703        if self.components.is_empty() {
6704            return None;
6705        }
6706        Some(datasynth_core::distributions::LogNormalMixtureConfig {
6707            components: self
6708                .components
6709                .iter()
6710                .map(|c| match &c.label {
6711                    Some(lbl) => datasynth_core::distributions::LogNormalComponent::with_label(
6712                        c.weight,
6713                        c.mu,
6714                        c.sigma,
6715                        lbl.clone(),
6716                    ),
6717                    None => datasynth_core::distributions::LogNormalComponent::new(
6718                        c.weight, c.mu, c.sigma,
6719                    ),
6720                })
6721                .collect(),
6722            min_value: self.min_value,
6723            max_value: self.max_value,
6724            decimal_places: self.decimal_places,
6725        })
6726    }
6727
6728    /// Convert this schema-level config into a `GaussianMixtureConfig`.
6729    /// Returns `None` if there are no components.
6730    pub fn to_gaussian_config(
6731        &self,
6732    ) -> Option<datasynth_core::distributions::GaussianMixtureConfig> {
6733        if self.components.is_empty() {
6734            return None;
6735        }
6736        Some(datasynth_core::distributions::GaussianMixtureConfig {
6737            components: self
6738                .components
6739                .iter()
6740                .map(|c| {
6741                    datasynth_core::distributions::GaussianComponent::new(c.weight, c.mu, c.sigma)
6742                })
6743                .collect(),
6744            allow_negative: true,
6745            min_value: Some(self.min_value),
6746            max_value: self.max_value,
6747        })
6748    }
6749}
6750
6751/// Mixture distribution type.
6752#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
6753#[serde(rename_all = "snake_case")]
6754pub enum MixtureDistributionType {
6755    /// Gaussian (normal) mixture
6756    Gaussian,
6757    /// Log-normal mixture (for positive amounts)
6758    #[default]
6759    LogNormal,
6760}
6761
6762/// Configuration for a single mixture component.
6763#[derive(Debug, Clone, Serialize, Deserialize)]
6764pub struct MixtureComponentConfig {
6765    /// Weight of this component (must sum to 1.0 across all components).
6766    pub weight: f64,
6767
6768    /// Location parameter (mean for Gaussian, mu for log-normal).
6769    pub mu: f64,
6770
6771    /// Scale parameter (std dev for Gaussian, sigma for log-normal).
6772    pub sigma: f64,
6773
6774    /// Optional label for this component (e.g., "routine", "significant", "major").
6775    #[serde(default)]
6776    pub label: Option<String>,
6777}
6778
6779/// Cross-field correlation configuration.
6780#[derive(Debug, Clone, Serialize, Deserialize)]
6781pub struct CorrelationSchemaConfig {
6782    /// Enable correlation modeling.
6783    #[serde(default)]
6784    pub enabled: bool,
6785
6786    /// Copula type for dependency modeling.
6787    #[serde(default)]
6788    pub copula_type: CopulaSchemaType,
6789
6790    /// Field definitions for correlation.
6791    #[serde(default)]
6792    pub fields: Vec<CorrelatedFieldConfig>,
6793
6794    /// Correlation matrix (upper triangular, row-major).
6795    /// For n fields, this should have n*(n-1)/2 values.
6796    #[serde(default)]
6797    pub matrix: Vec<f64>,
6798
6799    /// Expected correlations for validation.
6800    #[serde(default)]
6801    pub expected_correlations: Vec<ExpectedCorrelationConfig>,
6802}
6803
6804impl Default for CorrelationSchemaConfig {
6805    fn default() -> Self {
6806        Self {
6807            enabled: false,
6808            copula_type: CopulaSchemaType::Gaussian,
6809            fields: Vec::new(),
6810            matrix: Vec::new(),
6811            expected_correlations: Vec::new(),
6812        }
6813    }
6814}
6815
6816impl CorrelationSchemaConfig {
6817    /// v3.5.4+: extract the correlation for a specific field pair from
6818    /// either the upper-triangular flat matrix (n*(n-1)/2 values) or a
6819    /// full symmetric n×n matrix (n*n values). Returns `None` when the
6820    /// named fields aren't both present or the matrix shape doesn't
6821    /// match.
6822    pub fn correlation_between(&self, field_a: &str, field_b: &str) -> Option<f64> {
6823        let idx_a = self.fields.iter().position(|f| f.name == field_a)?;
6824        let idx_b = self.fields.iter().position(|f| f.name == field_b)?;
6825        if idx_a == idx_b {
6826            return Some(1.0);
6827        }
6828        let (i, j) = if idx_a < idx_b {
6829            (idx_a, idx_b)
6830        } else {
6831            (idx_b, idx_a)
6832        };
6833        let n = self.fields.len();
6834        // Full n×n symmetric matrix?
6835        if self.matrix.len() == n * n {
6836            return self.matrix.get(idx_a * n + idx_b).copied();
6837        }
6838        // Upper triangular flat (row-major, excluding diagonal)?
6839        let expected_tri = n * (n - 1) / 2;
6840        if self.matrix.len() == expected_tri {
6841            // Row i, col j where j > i: flat index is
6842            //   sum_{k=0..i}((n-1-k)) + (j - i - 1)
6843            // = i*(n-1) - i*(i-1)/2 + (j - i - 1)
6844            let flat = i * (n - 1) - i * (i.saturating_sub(1)) / 2 + (j - i - 1);
6845            return self.matrix.get(flat).copied();
6846        }
6847        None
6848    }
6849
6850    /// Convert this schema config to a core `CopulaConfig` when the
6851    /// declared field pair `(field_a, field_b)` has a valid correlation
6852    /// entry. Returns `None` when disabled, fields missing, or matrix
6853    /// malformed.
6854    pub fn to_core_config_for_pair(
6855        &self,
6856        field_a: &str,
6857        field_b: &str,
6858    ) -> Option<datasynth_core::distributions::CopulaConfig> {
6859        if !self.enabled {
6860            return None;
6861        }
6862        let rho = self.correlation_between(field_a, field_b)?;
6863        use datasynth_core::distributions::{CopulaConfig, CopulaType};
6864        let copula_type = match self.copula_type {
6865            CopulaSchemaType::Gaussian => CopulaType::Gaussian,
6866            CopulaSchemaType::Clayton => CopulaType::Clayton,
6867            CopulaSchemaType::Gumbel => CopulaType::Gumbel,
6868            CopulaSchemaType::Frank => CopulaType::Frank,
6869            CopulaSchemaType::StudentT => CopulaType::StudentT,
6870        };
6871        // Gaussian / StudentT interpret theta as correlation; others
6872        // as a shape parameter. Minimal v3.5.4 only wires Gaussian in
6873        // the runtime, but the converter is general so follow-ups can
6874        // light up the other copulas.
6875        let theta = rho.clamp(-0.999, 0.999);
6876        Some(CopulaConfig {
6877            copula_type,
6878            theta,
6879            degrees_of_freedom: 4.0,
6880        })
6881    }
6882}
6883
6884/// Copula type for dependency modeling.
6885#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
6886#[serde(rename_all = "snake_case")]
6887pub enum CopulaSchemaType {
6888    /// Gaussian copula (symmetric, no tail dependence)
6889    #[default]
6890    Gaussian,
6891    /// Clayton copula (lower tail dependence)
6892    Clayton,
6893    /// Gumbel copula (upper tail dependence)
6894    Gumbel,
6895    /// Frank copula (symmetric, no tail dependence)
6896    Frank,
6897    /// Student-t copula (both tail dependencies)
6898    StudentT,
6899}
6900
6901/// Configuration for a correlated field.
6902#[derive(Debug, Clone, Serialize, Deserialize)]
6903pub struct CorrelatedFieldConfig {
6904    /// Field name.
6905    pub name: String,
6906
6907    /// Marginal distribution type.
6908    #[serde(default)]
6909    pub distribution: MarginalDistributionConfig,
6910}
6911
6912/// Marginal distribution configuration.
6913#[derive(Debug, Clone, Serialize, Deserialize)]
6914#[serde(tag = "type", rename_all = "snake_case")]
6915pub enum MarginalDistributionConfig {
6916    /// Normal distribution.
6917    Normal {
6918        /// Mean
6919        mu: f64,
6920        /// Standard deviation
6921        sigma: f64,
6922    },
6923    /// Log-normal distribution.
6924    LogNormal {
6925        /// Location parameter
6926        mu: f64,
6927        /// Scale parameter
6928        sigma: f64,
6929    },
6930    /// Uniform distribution.
6931    Uniform {
6932        /// Minimum value
6933        min: f64,
6934        /// Maximum value
6935        max: f64,
6936    },
6937    /// Discrete uniform distribution.
6938    DiscreteUniform {
6939        /// Minimum integer value
6940        min: i32,
6941        /// Maximum integer value
6942        max: i32,
6943    },
6944}
6945
6946impl Default for MarginalDistributionConfig {
6947    fn default() -> Self {
6948        Self::Normal {
6949            mu: 0.0,
6950            sigma: 1.0,
6951        }
6952    }
6953}
6954
6955/// Expected correlation for validation.
6956#[derive(Debug, Clone, Serialize, Deserialize)]
6957pub struct ExpectedCorrelationConfig {
6958    /// First field name.
6959    pub field1: String,
6960    /// Second field name.
6961    pub field2: String,
6962    /// Expected correlation coefficient.
6963    pub expected_r: f64,
6964    /// Acceptable tolerance.
6965    #[serde(default = "default_correlation_tolerance")]
6966    pub tolerance: f64,
6967}
6968
6969fn default_correlation_tolerance() -> f64 {
6970    0.10
6971}
6972
6973/// Conditional distribution configuration.
6974#[derive(Debug, Clone, Serialize, Deserialize)]
6975pub struct ConditionalDistributionSchemaConfig {
6976    /// Output field name to generate.
6977    pub output_field: String,
6978
6979    /// Input field name that conditions the distribution.
6980    pub input_field: String,
6981
6982    /// Breakpoints defining distribution changes.
6983    #[serde(default)]
6984    pub breakpoints: Vec<ConditionalBreakpointConfig>,
6985
6986    /// Default distribution when below all breakpoints.
6987    #[serde(default)]
6988    pub default_distribution: ConditionalDistributionParamsConfig,
6989
6990    /// Minimum output value constraint.
6991    #[serde(default)]
6992    pub min_value: Option<f64>,
6993
6994    /// Maximum output value constraint.
6995    #[serde(default)]
6996    pub max_value: Option<f64>,
6997
6998    /// Decimal places for output rounding.
6999    #[serde(default = "default_decimal_places")]
7000    pub decimal_places: u8,
7001}
7002
7003/// Breakpoint for conditional distribution.
7004#[derive(Debug, Clone, Serialize, Deserialize)]
7005pub struct ConditionalBreakpointConfig {
7006    /// Input value threshold.
7007    pub threshold: f64,
7008
7009    /// Distribution to use when input >= threshold.
7010    pub distribution: ConditionalDistributionParamsConfig,
7011}
7012
7013impl ConditionalDistributionSchemaConfig {
7014    /// Convert this schema config into a core
7015    /// `ConditionalDistributionConfig` suitable for
7016    /// `ConditionalSampler::new`. v3.5.3+.
7017    pub fn to_core_config(&self) -> datasynth_core::distributions::ConditionalDistributionConfig {
7018        use datasynth_core::distributions::{
7019            Breakpoint, ConditionalDistributionConfig, ConditionalDistributionParams,
7020        };
7021
7022        let default_distribution = convert_conditional_params(&self.default_distribution);
7023        let breakpoints: Vec<Breakpoint> = self
7024            .breakpoints
7025            .iter()
7026            .map(|bp| Breakpoint {
7027                threshold: bp.threshold,
7028                distribution: convert_conditional_params(&bp.distribution),
7029            })
7030            .collect();
7031
7032        // Use a sentinel default_distribution when the schema default is
7033        // its factory default (Fixed { value: 0.0 })  and we have
7034        // breakpoints — we don't want to clobber data for values below
7035        // the first breakpoint.
7036        let final_default = if breakpoints.is_empty() {
7037            default_distribution
7038        } else {
7039            match default_distribution {
7040                ConditionalDistributionParams::Fixed { value: 0.0 } => {
7041                    // Reuse the first breakpoint's distribution as the
7042                    // default to avoid surprising zeros.
7043                    breakpoints[0].distribution.clone()
7044                }
7045                other => other,
7046            }
7047        };
7048
7049        ConditionalDistributionConfig {
7050            output_field: self.output_field.clone(),
7051            input_field: self.input_field.clone(),
7052            breakpoints,
7053            default_distribution: final_default,
7054            min_value: self.min_value,
7055            max_value: self.max_value,
7056            decimal_places: self.decimal_places,
7057        }
7058    }
7059}
7060
7061fn convert_conditional_params(
7062    p: &ConditionalDistributionParamsConfig,
7063) -> datasynth_core::distributions::ConditionalDistributionParams {
7064    use datasynth_core::distributions::ConditionalDistributionParams as Core;
7065    match p {
7066        ConditionalDistributionParamsConfig::Fixed { value } => Core::Fixed { value: *value },
7067        ConditionalDistributionParamsConfig::Normal { mu, sigma } => Core::Normal {
7068            mu: *mu,
7069            sigma: *sigma,
7070        },
7071        ConditionalDistributionParamsConfig::LogNormal { mu, sigma } => Core::LogNormal {
7072            mu: *mu,
7073            sigma: *sigma,
7074        },
7075        ConditionalDistributionParamsConfig::Uniform { min, max } => Core::Uniform {
7076            min: *min,
7077            max: *max,
7078        },
7079        ConditionalDistributionParamsConfig::Beta {
7080            alpha,
7081            beta,
7082            min,
7083            max,
7084        } => Core::Beta {
7085            alpha: *alpha,
7086            beta: *beta,
7087            min: *min,
7088            max: *max,
7089        },
7090        ConditionalDistributionParamsConfig::Discrete { values, weights } => Core::Discrete {
7091            values: values.clone(),
7092            weights: weights.clone(),
7093        },
7094    }
7095}
7096
7097/// Distribution parameters for conditional distributions.
7098#[derive(Debug, Clone, Serialize, Deserialize)]
7099#[serde(tag = "type", rename_all = "snake_case")]
7100pub enum ConditionalDistributionParamsConfig {
7101    /// Fixed value.
7102    Fixed {
7103        /// The fixed value
7104        value: f64,
7105    },
7106    /// Normal distribution.
7107    Normal {
7108        /// Mean
7109        mu: f64,
7110        /// Standard deviation
7111        sigma: f64,
7112    },
7113    /// Log-normal distribution.
7114    LogNormal {
7115        /// Location parameter
7116        mu: f64,
7117        /// Scale parameter
7118        sigma: f64,
7119    },
7120    /// Uniform distribution.
7121    Uniform {
7122        /// Minimum
7123        min: f64,
7124        /// Maximum
7125        max: f64,
7126    },
7127    /// Beta distribution (scaled).
7128    Beta {
7129        /// Alpha parameter
7130        alpha: f64,
7131        /// Beta parameter
7132        beta: f64,
7133        /// Minimum output value
7134        min: f64,
7135        /// Maximum output value
7136        max: f64,
7137    },
7138    /// Discrete values with weights.
7139    Discrete {
7140        /// Possible values
7141        values: Vec<f64>,
7142        /// Weights (should sum to 1.0)
7143        weights: Vec<f64>,
7144    },
7145}
7146
7147impl Default for ConditionalDistributionParamsConfig {
7148    fn default() -> Self {
7149        Self::Normal {
7150            mu: 0.0,
7151            sigma: 1.0,
7152        }
7153    }
7154}
7155
7156/// Regime change configuration.
7157#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7158pub struct RegimeChangeSchemaConfig {
7159    /// Enable regime change modeling.
7160    #[serde(default)]
7161    pub enabled: bool,
7162
7163    /// List of regime changes.
7164    #[serde(default)]
7165    pub changes: Vec<RegimeChangeEventConfig>,
7166
7167    /// Economic cycle configuration.
7168    #[serde(default)]
7169    pub economic_cycle: Option<EconomicCycleSchemaConfig>,
7170
7171    /// Parameter drift configurations.
7172    #[serde(default)]
7173    pub parameter_drifts: Vec<ParameterDriftSchemaConfig>,
7174}
7175
7176/// A single regime change event.
7177#[derive(Debug, Clone, Serialize, Deserialize)]
7178pub struct RegimeChangeEventConfig {
7179    /// Date when the change occurs (ISO 8601 format).
7180    pub date: String,
7181
7182    /// Type of regime change.
7183    pub change_type: RegimeChangeTypeConfig,
7184
7185    /// Description of the change.
7186    #[serde(default)]
7187    pub description: Option<String>,
7188
7189    /// Effects of this regime change.
7190    #[serde(default)]
7191    pub effects: Vec<RegimeEffectConfig>,
7192}
7193
7194/// Type of regime change.
7195#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
7196#[serde(rename_all = "snake_case")]
7197pub enum RegimeChangeTypeConfig {
7198    /// Acquisition - sudden volume and amount increase
7199    Acquisition,
7200    /// Divestiture - sudden volume and amount decrease
7201    Divestiture,
7202    /// Price increase - amounts increase
7203    PriceIncrease,
7204    /// Price decrease - amounts decrease
7205    PriceDecrease,
7206    /// New product launch - volume ramp-up
7207    ProductLaunch,
7208    /// Product discontinuation - volume ramp-down
7209    ProductDiscontinuation,
7210    /// Policy change - affects patterns
7211    PolicyChange,
7212    /// Competitor entry - market disruption
7213    CompetitorEntry,
7214    /// Custom effect
7215    Custom,
7216}
7217
7218/// Effect of a regime change on a specific field.
7219#[derive(Debug, Clone, Serialize, Deserialize)]
7220pub struct RegimeEffectConfig {
7221    /// Field being affected.
7222    pub field: String,
7223
7224    /// Multiplier to apply (1.0 = no change, 1.5 = 50% increase).
7225    pub multiplier: f64,
7226}
7227
7228/// Economic cycle configuration.
7229#[derive(Debug, Clone, Serialize, Deserialize)]
7230pub struct EconomicCycleSchemaConfig {
7231    /// Enable economic cycle modeling.
7232    #[serde(default)]
7233    pub enabled: bool,
7234
7235    /// Cycle period in months (e.g., 48 for 4-year business cycle).
7236    #[serde(default = "default_cycle_period")]
7237    pub period_months: u32,
7238
7239    /// Amplitude of cycle effect (0.0-1.0).
7240    #[serde(default = "default_cycle_amplitude")]
7241    pub amplitude: f64,
7242
7243    /// Phase offset in months.
7244    #[serde(default)]
7245    pub phase_offset: u32,
7246
7247    /// Recession periods (start_month, duration_months).
7248    #[serde(default)]
7249    pub recessions: Vec<RecessionPeriodConfig>,
7250}
7251
7252fn default_cycle_period() -> u32 {
7253    48
7254}
7255
7256fn default_cycle_amplitude() -> f64 {
7257    0.15
7258}
7259
7260impl Default for EconomicCycleSchemaConfig {
7261    fn default() -> Self {
7262        Self {
7263            enabled: false,
7264            period_months: 48,
7265            amplitude: 0.15,
7266            phase_offset: 0,
7267            recessions: Vec::new(),
7268        }
7269    }
7270}
7271
7272/// Recession period configuration.
7273#[derive(Debug, Clone, Serialize, Deserialize)]
7274pub struct RecessionPeriodConfig {
7275    /// Start month (0-indexed from generation start).
7276    pub start_month: u32,
7277
7278    /// Duration in months.
7279    pub duration_months: u32,
7280
7281    /// Severity (0.0-1.0, affects volume reduction).
7282    #[serde(default = "default_recession_severity")]
7283    pub severity: f64,
7284}
7285
7286impl RegimeChangeSchemaConfig {
7287    /// Populate the regime-change, economic-cycle, and parameter-drift
7288    /// slots on a `DriftConfig` from this schema config. v3.5.2+.
7289    ///
7290    /// `generation_start` must match `config.global.start_date` so that
7291    /// absolute regime-change dates can be mapped to 0-indexed periods.
7292    /// Unparseable / out-of-range dates are silently skipped to keep
7293    /// runtime robust against user typos.
7294    pub fn apply_to(
7295        &self,
7296        drift: &mut datasynth_core::distributions::DriftConfig,
7297        generation_start: chrono::NaiveDate,
7298    ) {
7299        if !self.enabled {
7300            return;
7301        }
7302
7303        // Enable drift if any regime-change feature wants it.
7304        drift.enabled = true;
7305
7306        // Regime-change events (absolute dates → period offsets).
7307        for event in &self.changes {
7308            let period = match chrono::NaiveDate::parse_from_str(&event.date, "%Y-%m-%d") {
7309                Ok(d) => {
7310                    let days = (d - generation_start).num_days();
7311                    if days < 0 {
7312                        continue;
7313                    }
7314                    // Approximate month by dividing by 30.4 so we don't
7315                    // need chrono::Months arithmetic.
7316                    (days as f64 / 30.4).round() as u32
7317                }
7318                Err(_) => continue,
7319            };
7320            let change_type = convert_regime_change_type(event.change_type);
7321            let core_effects = event
7322                .effects
7323                .iter()
7324                .map(|e| datasynth_core::distributions::RegimeEffect {
7325                    field: e.field.clone(),
7326                    multiplier: e.multiplier,
7327                })
7328                .collect();
7329            drift
7330                .regime_changes
7331                .push(datasynth_core::distributions::RegimeChange {
7332                    period,
7333                    change_type,
7334                    description: event.description.clone(),
7335                    effects: core_effects,
7336                    transition_periods: 0,
7337                });
7338        }
7339
7340        // Economic cycle.
7341        if let Some(ec) = &self.economic_cycle {
7342            if ec.enabled {
7343                let recession_periods: Vec<u32> = ec
7344                    .recessions
7345                    .iter()
7346                    .flat_map(|r| r.start_month..r.start_month + r.duration_months)
7347                    .collect();
7348                // Use the most-severe recession as the severity driver;
7349                // fall back to default when none declared.
7350                let severity = ec
7351                    .recessions
7352                    .iter()
7353                    .map(|r| 1.0 - r.severity)
7354                    .fold(0.75f64, f64::min);
7355                drift.economic_cycle = datasynth_core::distributions::EconomicCycleConfig {
7356                    enabled: true,
7357                    cycle_length: ec.period_months,
7358                    amplitude: ec.amplitude,
7359                    phase_offset: ec.phase_offset,
7360                    recession_periods,
7361                    recession_severity: severity,
7362                };
7363                drift.drift_type = datasynth_core::distributions::DriftType::Mixed;
7364            }
7365        }
7366
7367        // Parameter drifts.
7368        for pd in &self.parameter_drifts {
7369            let drift_type = match pd.drift_type {
7370                ParameterDriftTypeConfig::Linear => {
7371                    datasynth_core::distributions::ParameterDriftType::Linear
7372                }
7373                ParameterDriftTypeConfig::Exponential => {
7374                    datasynth_core::distributions::ParameterDriftType::Exponential
7375                }
7376                ParameterDriftTypeConfig::Logistic => {
7377                    datasynth_core::distributions::ParameterDriftType::Logistic
7378                }
7379                ParameterDriftTypeConfig::Step => {
7380                    datasynth_core::distributions::ParameterDriftType::Step
7381                }
7382            };
7383            drift
7384                .parameter_drifts
7385                .push(datasynth_core::distributions::ParameterDrift {
7386                    parameter: pd.parameter.clone(),
7387                    drift_type,
7388                    initial_value: pd.start_value,
7389                    target_or_rate: pd.end_value,
7390                    start_period: pd.start_period,
7391                    end_period: pd.end_period,
7392                    steepness: 1.0,
7393                });
7394        }
7395    }
7396}
7397
7398fn convert_regime_change_type(
7399    t: RegimeChangeTypeConfig,
7400) -> datasynth_core::distributions::RegimeChangeType {
7401    use datasynth_core::distributions::RegimeChangeType as Core;
7402    match t {
7403        RegimeChangeTypeConfig::Acquisition => Core::Acquisition,
7404        RegimeChangeTypeConfig::Divestiture => Core::Divestiture,
7405        RegimeChangeTypeConfig::PriceIncrease => Core::PriceIncrease,
7406        RegimeChangeTypeConfig::PriceDecrease => Core::PriceDecrease,
7407        RegimeChangeTypeConfig::ProductLaunch => Core::ProductLaunch,
7408        RegimeChangeTypeConfig::ProductDiscontinuation => Core::ProductDiscontinuation,
7409        RegimeChangeTypeConfig::PolicyChange => Core::PolicyChange,
7410        RegimeChangeTypeConfig::CompetitorEntry => Core::CompetitorEntry,
7411        RegimeChangeTypeConfig::Custom => Core::Custom,
7412    }
7413}
7414
7415fn default_recession_severity() -> f64 {
7416    0.20
7417}
7418
7419/// Parameter drift configuration.
7420#[derive(Debug, Clone, Serialize, Deserialize)]
7421pub struct ParameterDriftSchemaConfig {
7422    /// Parameter being drifted.
7423    pub parameter: String,
7424
7425    /// Drift type.
7426    pub drift_type: ParameterDriftTypeConfig,
7427
7428    /// Start value.
7429    pub start_value: f64,
7430
7431    /// End value.
7432    pub end_value: f64,
7433
7434    /// Start period (month, 0-indexed).
7435    #[serde(default)]
7436    pub start_period: u32,
7437
7438    /// End period (month, optional - defaults to end of generation).
7439    #[serde(default)]
7440    pub end_period: Option<u32>,
7441}
7442
7443/// Parameter drift type.
7444#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7445#[serde(rename_all = "snake_case")]
7446pub enum ParameterDriftTypeConfig {
7447    /// Linear interpolation
7448    #[default]
7449    Linear,
7450    /// Exponential growth/decay
7451    Exponential,
7452    /// S-curve (logistic)
7453    Logistic,
7454    /// Step function
7455    Step,
7456}
7457
7458/// Statistical validation configuration.
7459#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7460pub struct StatisticalValidationSchemaConfig {
7461    /// Enable statistical validation.
7462    #[serde(default)]
7463    pub enabled: bool,
7464
7465    /// Statistical tests to run.
7466    #[serde(default)]
7467    pub tests: Vec<StatisticalTestConfig>,
7468
7469    /// Validation reporting configuration.
7470    #[serde(default)]
7471    pub reporting: ValidationReportingConfig,
7472}
7473
7474/// Statistical test configuration.
7475#[derive(Debug, Clone, Serialize, Deserialize)]
7476#[serde(tag = "type", rename_all = "snake_case")]
7477pub enum StatisticalTestConfig {
7478    /// Benford's Law first digit test.
7479    BenfordFirstDigit {
7480        /// Threshold MAD for failure.
7481        #[serde(default = "default_benford_threshold")]
7482        threshold_mad: f64,
7483        /// Warning MAD threshold.
7484        #[serde(default = "default_benford_warning")]
7485        warning_mad: f64,
7486    },
7487    /// Distribution fit test.
7488    DistributionFit {
7489        /// Target distribution to test.
7490        target: TargetDistributionConfig,
7491        /// K-S test significance level.
7492        #[serde(default = "default_ks_significance")]
7493        ks_significance: f64,
7494        /// Test method (ks, anderson_darling, chi_squared).
7495        #[serde(default)]
7496        method: DistributionFitMethod,
7497    },
7498    /// Correlation check.
7499    CorrelationCheck {
7500        /// Expected correlations to validate.
7501        expected_correlations: Vec<ExpectedCorrelationConfig>,
7502    },
7503    /// Chi-squared test.
7504    ChiSquared {
7505        /// Number of bins.
7506        #[serde(default = "default_chi_squared_bins")]
7507        bins: usize,
7508        /// Significance level.
7509        #[serde(default = "default_chi_squared_significance")]
7510        significance: f64,
7511    },
7512    /// Anderson-Darling test.
7513    AndersonDarling {
7514        /// Target distribution.
7515        target: TargetDistributionConfig,
7516        /// Significance level.
7517        #[serde(default = "default_ad_significance")]
7518        significance: f64,
7519    },
7520}
7521
7522fn default_benford_threshold() -> f64 {
7523    0.015
7524}
7525
7526fn default_benford_warning() -> f64 {
7527    0.010
7528}
7529
7530fn default_ks_significance() -> f64 {
7531    0.05
7532}
7533
7534fn default_chi_squared_bins() -> usize {
7535    10
7536}
7537
7538fn default_chi_squared_significance() -> f64 {
7539    0.05
7540}
7541
7542fn default_ad_significance() -> f64 {
7543    0.05
7544}
7545
7546/// Target distribution for fit tests.
7547#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7548#[serde(rename_all = "snake_case")]
7549pub enum TargetDistributionConfig {
7550    /// Normal distribution
7551    Normal,
7552    /// Log-normal distribution
7553    #[default]
7554    LogNormal,
7555    /// Exponential distribution
7556    Exponential,
7557    /// Uniform distribution
7558    Uniform,
7559}
7560
7561/// Distribution fit test method.
7562#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7563#[serde(rename_all = "snake_case")]
7564pub enum DistributionFitMethod {
7565    /// Kolmogorov-Smirnov test
7566    #[default]
7567    KolmogorovSmirnov,
7568    /// Anderson-Darling test
7569    AndersonDarling,
7570    /// Chi-squared test
7571    ChiSquared,
7572}
7573
7574/// Validation reporting configuration.
7575#[derive(Debug, Clone, Serialize, Deserialize)]
7576pub struct ValidationReportingConfig {
7577    /// Output validation report to file.
7578    #[serde(default)]
7579    pub output_report: bool,
7580
7581    /// Report format.
7582    #[serde(default)]
7583    pub format: ValidationReportFormat,
7584
7585    /// Fail generation if validation fails.
7586    #[serde(default)]
7587    pub fail_on_error: bool,
7588
7589    /// Include detailed statistics in report.
7590    #[serde(default = "default_true")]
7591    pub include_details: bool,
7592}
7593
7594impl Default for ValidationReportingConfig {
7595    fn default() -> Self {
7596        Self {
7597            output_report: false,
7598            format: ValidationReportFormat::Json,
7599            fail_on_error: false,
7600            include_details: true,
7601        }
7602    }
7603}
7604
7605/// Validation report format.
7606#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7607#[serde(rename_all = "snake_case")]
7608pub enum ValidationReportFormat {
7609    /// JSON format
7610    #[default]
7611    Json,
7612    /// YAML format
7613    Yaml,
7614    /// HTML report
7615    Html,
7616}
7617
7618// =============================================================================
7619// Temporal Patterns Configuration
7620// =============================================================================
7621
7622/// Temporal patterns configuration for business days, period-end dynamics, and processing lags.
7623///
7624/// This section enables sophisticated temporal modeling including:
7625/// - Business day calculations and settlement dates
7626/// - Regional holiday calendars
7627/// - Period-end decay curves (non-flat volume spikes)
7628/// - Processing lag modeling (event-to-posting delays)
7629#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7630pub struct TemporalPatternsConfig {
7631    /// Enable temporal patterns features.
7632    #[serde(default)]
7633    pub enabled: bool,
7634
7635    /// Business day calculation configuration.
7636    #[serde(default)]
7637    pub business_days: BusinessDaySchemaConfig,
7638
7639    /// Regional calendar configuration.
7640    #[serde(default)]
7641    pub calendars: CalendarSchemaConfig,
7642
7643    /// Period-end dynamics configuration.
7644    #[serde(default)]
7645    pub period_end: PeriodEndSchemaConfig,
7646
7647    /// Processing lag configuration.
7648    #[serde(default)]
7649    pub processing_lags: ProcessingLagSchemaConfig,
7650
7651    /// Fiscal calendar configuration (custom year start, 4-4-5, 13-period).
7652    #[serde(default)]
7653    pub fiscal_calendar: FiscalCalendarSchemaConfig,
7654
7655    /// Intra-day patterns configuration (morning spike, lunch dip, EOD rush).
7656    #[serde(default)]
7657    pub intraday: IntraDaySchemaConfig,
7658
7659    /// Timezone handling configuration.
7660    #[serde(default)]
7661    pub timezones: TimezoneSchemaConfig,
7662}
7663
7664/// Business day calculation configuration.
7665#[derive(Debug, Clone, Serialize, Deserialize)]
7666pub struct BusinessDaySchemaConfig {
7667    /// Enable business day calculations.
7668    #[serde(default = "default_true")]
7669    pub enabled: bool,
7670
7671    /// Half-day policy: "full_day", "half_day", "non_business_day".
7672    #[serde(default = "default_half_day_policy")]
7673    pub half_day_policy: String,
7674
7675    /// Settlement rules configuration.
7676    #[serde(default)]
7677    pub settlement_rules: SettlementRulesSchemaConfig,
7678
7679    /// Month-end convention: "modified_following", "preceding", "following", "end_of_month".
7680    #[serde(default = "default_month_end_convention")]
7681    pub month_end_convention: String,
7682
7683    /// Weekend days (e.g., ["saturday", "sunday"] or ["friday", "saturday"] for Middle East).
7684    #[serde(default)]
7685    pub weekend_days: Option<Vec<String>>,
7686}
7687
7688fn default_half_day_policy() -> String {
7689    "half_day".to_string()
7690}
7691
7692fn default_month_end_convention() -> String {
7693    "modified_following".to_string()
7694}
7695
7696impl Default for BusinessDaySchemaConfig {
7697    fn default() -> Self {
7698        Self {
7699            enabled: true,
7700            half_day_policy: "half_day".to_string(),
7701            settlement_rules: SettlementRulesSchemaConfig::default(),
7702            month_end_convention: "modified_following".to_string(),
7703            weekend_days: None,
7704        }
7705    }
7706}
7707
7708/// Settlement rules configuration.
7709#[derive(Debug, Clone, Serialize, Deserialize)]
7710pub struct SettlementRulesSchemaConfig {
7711    /// Equity settlement days (T+N).
7712    #[serde(default = "default_settlement_2")]
7713    pub equity_days: i32,
7714
7715    /// Government bonds settlement days.
7716    #[serde(default = "default_settlement_1")]
7717    pub government_bonds_days: i32,
7718
7719    /// FX spot settlement days.
7720    #[serde(default = "default_settlement_2")]
7721    pub fx_spot_days: i32,
7722
7723    /// Corporate bonds settlement days.
7724    #[serde(default = "default_settlement_2")]
7725    pub corporate_bonds_days: i32,
7726
7727    /// Wire transfer cutoff time (HH:MM format).
7728    #[serde(default = "default_wire_cutoff")]
7729    pub wire_cutoff_time: String,
7730
7731    /// International wire settlement days.
7732    #[serde(default = "default_settlement_1")]
7733    pub wire_international_days: i32,
7734
7735    /// ACH settlement days.
7736    #[serde(default = "default_settlement_1")]
7737    pub ach_days: i32,
7738}
7739
7740fn default_settlement_1() -> i32 {
7741    1
7742}
7743
7744fn default_settlement_2() -> i32 {
7745    2
7746}
7747
7748fn default_wire_cutoff() -> String {
7749    "14:00".to_string()
7750}
7751
7752impl Default for SettlementRulesSchemaConfig {
7753    fn default() -> Self {
7754        Self {
7755            equity_days: 2,
7756            government_bonds_days: 1,
7757            fx_spot_days: 2,
7758            corporate_bonds_days: 2,
7759            wire_cutoff_time: "14:00".to_string(),
7760            wire_international_days: 1,
7761            ach_days: 1,
7762        }
7763    }
7764}
7765
7766/// Regional calendar configuration.
7767#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7768pub struct CalendarSchemaConfig {
7769    /// List of regions to include (e.g., ["US", "DE", "BR", "SG", "KR"]).
7770    #[serde(default)]
7771    pub regions: Vec<String>,
7772
7773    /// Custom holidays (in addition to regional calendars).
7774    #[serde(default)]
7775    pub custom_holidays: Vec<CustomHolidaySchemaConfig>,
7776}
7777
7778/// Custom holiday configuration.
7779#[derive(Debug, Clone, Serialize, Deserialize)]
7780pub struct CustomHolidaySchemaConfig {
7781    /// Holiday name.
7782    pub name: String,
7783    /// Month (1-12).
7784    pub month: u8,
7785    /// Day of month.
7786    pub day: u8,
7787    /// Activity multiplier (0.0-1.0, default 0.05).
7788    #[serde(default = "default_holiday_multiplier")]
7789    pub activity_multiplier: f64,
7790}
7791
7792fn default_holiday_multiplier() -> f64 {
7793    0.05
7794}
7795
7796/// Period-end dynamics configuration.
7797#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7798pub struct PeriodEndSchemaConfig {
7799    /// Model type: "flat", "exponential", "extended_crunch", "daily_profile".
7800    #[serde(default)]
7801    pub model: Option<String>,
7802
7803    /// Month-end configuration.
7804    #[serde(default)]
7805    pub month_end: Option<PeriodEndModelSchemaConfig>,
7806
7807    /// Quarter-end configuration.
7808    #[serde(default)]
7809    pub quarter_end: Option<PeriodEndModelSchemaConfig>,
7810
7811    /// Year-end configuration.
7812    #[serde(default)]
7813    pub year_end: Option<PeriodEndModelSchemaConfig>,
7814}
7815
7816/// Period-end model configuration.
7817#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7818pub struct PeriodEndModelSchemaConfig {
7819    /// Inherit configuration from another period (e.g., "month_end").
7820    #[serde(default)]
7821    pub inherit_from: Option<String>,
7822
7823    /// Additional multiplier on top of inherited/base model.
7824    #[serde(default)]
7825    pub additional_multiplier: Option<f64>,
7826
7827    /// Days before period end to start acceleration (negative, e.g., -10).
7828    #[serde(default)]
7829    pub start_day: Option<i32>,
7830
7831    /// Base multiplier at start of acceleration.
7832    #[serde(default)]
7833    pub base_multiplier: Option<f64>,
7834
7835    /// Peak multiplier on last day.
7836    #[serde(default)]
7837    pub peak_multiplier: Option<f64>,
7838
7839    /// Decay rate for exponential model (0.1-0.5 typical).
7840    #[serde(default)]
7841    pub decay_rate: Option<f64>,
7842
7843    /// Sustained high days for crunch model.
7844    #[serde(default)]
7845    pub sustained_high_days: Option<i32>,
7846}
7847
7848/// Processing lag configuration.
7849#[derive(Debug, Clone, Serialize, Deserialize)]
7850pub struct ProcessingLagSchemaConfig {
7851    /// Enable processing lag calculations.
7852    #[serde(default = "default_true")]
7853    pub enabled: bool,
7854
7855    /// Sales order lag configuration (log-normal mu, sigma).
7856    #[serde(default)]
7857    pub sales_order_lag: Option<LagDistributionSchemaConfig>,
7858
7859    /// Purchase order lag configuration.
7860    #[serde(default)]
7861    pub purchase_order_lag: Option<LagDistributionSchemaConfig>,
7862
7863    /// Goods receipt lag configuration.
7864    #[serde(default)]
7865    pub goods_receipt_lag: Option<LagDistributionSchemaConfig>,
7866
7867    /// Invoice receipt lag configuration.
7868    #[serde(default)]
7869    pub invoice_receipt_lag: Option<LagDistributionSchemaConfig>,
7870
7871    /// Invoice issue lag configuration.
7872    #[serde(default)]
7873    pub invoice_issue_lag: Option<LagDistributionSchemaConfig>,
7874
7875    /// Payment lag configuration.
7876    #[serde(default)]
7877    pub payment_lag: Option<LagDistributionSchemaConfig>,
7878
7879    /// Journal entry lag configuration.
7880    #[serde(default)]
7881    pub journal_entry_lag: Option<LagDistributionSchemaConfig>,
7882
7883    /// Cross-day posting configuration.
7884    #[serde(default)]
7885    pub cross_day_posting: Option<CrossDayPostingSchemaConfig>,
7886}
7887
7888impl Default for ProcessingLagSchemaConfig {
7889    fn default() -> Self {
7890        Self {
7891            enabled: true,
7892            sales_order_lag: None,
7893            purchase_order_lag: None,
7894            goods_receipt_lag: None,
7895            invoice_receipt_lag: None,
7896            invoice_issue_lag: None,
7897            payment_lag: None,
7898            journal_entry_lag: None,
7899            cross_day_posting: None,
7900        }
7901    }
7902}
7903
7904/// Lag distribution configuration (log-normal parameters).
7905#[derive(Debug, Clone, Serialize, Deserialize)]
7906pub struct LagDistributionSchemaConfig {
7907    /// Log-scale mean (mu for log-normal).
7908    pub mu: f64,
7909    /// Log-scale standard deviation (sigma for log-normal).
7910    pub sigma: f64,
7911    /// Minimum lag in hours.
7912    #[serde(default)]
7913    pub min_hours: Option<f64>,
7914    /// Maximum lag in hours.
7915    #[serde(default)]
7916    pub max_hours: Option<f64>,
7917}
7918
7919/// Cross-day posting configuration.
7920#[derive(Debug, Clone, Serialize, Deserialize)]
7921pub struct CrossDayPostingSchemaConfig {
7922    /// Enable cross-day posting logic.
7923    #[serde(default = "default_true")]
7924    pub enabled: bool,
7925
7926    /// Probability of next-day posting by hour (map of hour -> probability).
7927    /// E.g., { 17: 0.7, 19: 0.9, 21: 0.99 }
7928    #[serde(default)]
7929    pub probability_by_hour: std::collections::HashMap<u8, f64>,
7930}
7931
7932impl Default for CrossDayPostingSchemaConfig {
7933    fn default() -> Self {
7934        let mut probability_by_hour = std::collections::HashMap::new();
7935        probability_by_hour.insert(17, 0.3);
7936        probability_by_hour.insert(18, 0.6);
7937        probability_by_hour.insert(19, 0.8);
7938        probability_by_hour.insert(20, 0.9);
7939        probability_by_hour.insert(21, 0.95);
7940        probability_by_hour.insert(22, 0.99);
7941
7942        Self {
7943            enabled: true,
7944            probability_by_hour,
7945        }
7946    }
7947}
7948
7949// =============================================================================
7950// Fiscal Calendar Configuration (P2)
7951// =============================================================================
7952
7953/// Fiscal calendar configuration.
7954///
7955/// Supports calendar year, custom year start, 4-4-5 retail calendar,
7956/// and 13-period calendars.
7957#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7958pub struct FiscalCalendarSchemaConfig {
7959    /// Enable non-standard fiscal calendar.
7960    #[serde(default)]
7961    pub enabled: bool,
7962
7963    /// Fiscal calendar type: "calendar_year", "custom", "four_four_five", "thirteen_period".
7964    #[serde(default = "default_fiscal_calendar_type")]
7965    pub calendar_type: String,
7966
7967    /// Month the fiscal year starts (1-12). Used for custom year start.
7968    #[serde(default)]
7969    pub year_start_month: Option<u8>,
7970
7971    /// Day the fiscal year starts (1-31). Used for custom year start.
7972    #[serde(default)]
7973    pub year_start_day: Option<u8>,
7974
7975    /// 4-4-5 calendar configuration (if calendar_type is "four_four_five").
7976    #[serde(default)]
7977    pub four_four_five: Option<FourFourFiveSchemaConfig>,
7978}
7979
7980fn default_fiscal_calendar_type() -> String {
7981    "calendar_year".to_string()
7982}
7983
7984/// 4-4-5 retail calendar configuration.
7985#[derive(Debug, Clone, Serialize, Deserialize)]
7986pub struct FourFourFiveSchemaConfig {
7987    /// Week pattern: "four_four_five", "four_five_four", "five_four_four".
7988    #[serde(default = "default_week_pattern")]
7989    pub pattern: String,
7990
7991    /// Anchor type: "first_sunday", "last_saturday", "nearest_saturday".
7992    #[serde(default = "default_anchor_type")]
7993    pub anchor_type: String,
7994
7995    /// Anchor month (1-12).
7996    #[serde(default = "default_anchor_month")]
7997    pub anchor_month: u8,
7998
7999    /// Where to place leap week: "q4_period3" or "q1_period1".
8000    #[serde(default = "default_leap_week_placement")]
8001    pub leap_week_placement: String,
8002}
8003
8004fn default_week_pattern() -> String {
8005    "four_four_five".to_string()
8006}
8007
8008fn default_anchor_type() -> String {
8009    "last_saturday".to_string()
8010}
8011
8012fn default_anchor_month() -> u8 {
8013    1 // January
8014}
8015
8016fn default_leap_week_placement() -> String {
8017    "q4_period3".to_string()
8018}
8019
8020impl Default for FourFourFiveSchemaConfig {
8021    fn default() -> Self {
8022        Self {
8023            pattern: "four_four_five".to_string(),
8024            anchor_type: "last_saturday".to_string(),
8025            anchor_month: 1,
8026            leap_week_placement: "q4_period3".to_string(),
8027        }
8028    }
8029}
8030
8031// =============================================================================
8032// Intra-Day Patterns Configuration (P2)
8033// =============================================================================
8034
8035/// Intra-day patterns configuration.
8036///
8037/// Defines time-of-day segments with different activity multipliers
8038/// for realistic modeling of morning spikes, lunch dips, and end-of-day rushes.
8039#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8040pub struct IntraDaySchemaConfig {
8041    /// Enable intra-day patterns.
8042    #[serde(default)]
8043    pub enabled: bool,
8044
8045    /// Custom intra-day segments.
8046    #[serde(default)]
8047    pub segments: Vec<IntraDaySegmentSchemaConfig>,
8048}
8049
8050/// Intra-day segment configuration.
8051#[derive(Debug, Clone, Serialize, Deserialize)]
8052pub struct IntraDaySegmentSchemaConfig {
8053    /// Name of the segment (e.g., "morning_spike", "lunch_dip").
8054    pub name: String,
8055
8056    /// Start time (HH:MM format).
8057    pub start: String,
8058
8059    /// End time (HH:MM format).
8060    pub end: String,
8061
8062    /// Activity multiplier (1.0 = normal).
8063    #[serde(default = "default_multiplier")]
8064    pub multiplier: f64,
8065
8066    /// Posting type: "human", "system", "both".
8067    #[serde(default = "default_posting_type")]
8068    pub posting_type: String,
8069}
8070
8071fn default_multiplier() -> f64 {
8072    1.0
8073}
8074
8075fn default_posting_type() -> String {
8076    "both".to_string()
8077}
8078
8079// =============================================================================
8080// Timezone Configuration
8081// =============================================================================
8082
8083/// Timezone handling configuration for multi-region entities.
8084#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8085pub struct TimezoneSchemaConfig {
8086    /// Enable timezone handling.
8087    #[serde(default)]
8088    pub enabled: bool,
8089
8090    /// Default timezone (IANA format, e.g., "America/New_York").
8091    #[serde(default = "default_timezone")]
8092    pub default_timezone: String,
8093
8094    /// Consolidation timezone for group reporting (IANA format).
8095    #[serde(default = "default_consolidation_timezone")]
8096    pub consolidation_timezone: String,
8097
8098    /// Entity-to-timezone mappings.
8099    /// Supports patterns like "EU_*" -> "Europe/London".
8100    #[serde(default)]
8101    pub entity_mappings: Vec<EntityTimezoneMapping>,
8102}
8103
8104fn default_timezone() -> String {
8105    "America/New_York".to_string()
8106}
8107
8108fn default_consolidation_timezone() -> String {
8109    "UTC".to_string()
8110}
8111
8112/// Mapping from entity pattern to timezone.
8113#[derive(Debug, Clone, Serialize, Deserialize)]
8114pub struct EntityTimezoneMapping {
8115    /// Entity code pattern (e.g., "EU_*", "*_APAC", "1000").
8116    pub pattern: String,
8117
8118    /// Timezone (IANA format, e.g., "Europe/London").
8119    pub timezone: String,
8120}
8121
8122// =============================================================================
8123// Vendor Network Configuration
8124// =============================================================================
8125
8126/// Configuration for multi-tier vendor network generation.
8127#[derive(Debug, Clone, Serialize, Deserialize)]
8128pub struct VendorNetworkSchemaConfig {
8129    /// Enable vendor network generation.
8130    #[serde(default)]
8131    pub enabled: bool,
8132
8133    /// Maximum depth of supply chain tiers (1-3).
8134    #[serde(default = "default_vendor_tier_depth")]
8135    pub depth: u8,
8136
8137    /// Tier 1 vendor count configuration.
8138    #[serde(default)]
8139    pub tier1: TierCountSchemaConfig,
8140
8141    /// Tier 2 vendors per Tier 1 parent.
8142    #[serde(default)]
8143    pub tier2_per_parent: TierCountSchemaConfig,
8144
8145    /// Tier 3 vendors per Tier 2 parent.
8146    #[serde(default)]
8147    pub tier3_per_parent: TierCountSchemaConfig,
8148
8149    /// Vendor cluster distribution.
8150    #[serde(default)]
8151    pub clusters: VendorClusterSchemaConfig,
8152
8153    /// Concentration limits.
8154    #[serde(default)]
8155    pub dependencies: DependencySchemaConfig,
8156}
8157
8158fn default_vendor_tier_depth() -> u8 {
8159    3
8160}
8161
8162impl Default for VendorNetworkSchemaConfig {
8163    fn default() -> Self {
8164        Self {
8165            enabled: false,
8166            depth: 3,
8167            tier1: TierCountSchemaConfig { min: 50, max: 100 },
8168            tier2_per_parent: TierCountSchemaConfig { min: 4, max: 10 },
8169            tier3_per_parent: TierCountSchemaConfig { min: 2, max: 5 },
8170            clusters: VendorClusterSchemaConfig::default(),
8171            dependencies: DependencySchemaConfig::default(),
8172        }
8173    }
8174}
8175
8176/// Tier count configuration.
8177#[derive(Debug, Clone, Serialize, Deserialize)]
8178pub struct TierCountSchemaConfig {
8179    /// Minimum count.
8180    #[serde(default = "default_tier_min")]
8181    pub min: usize,
8182
8183    /// Maximum count.
8184    #[serde(default = "default_tier_max")]
8185    pub max: usize,
8186}
8187
8188fn default_tier_min() -> usize {
8189    5
8190}
8191
8192fn default_tier_max() -> usize {
8193    20
8194}
8195
8196impl Default for TierCountSchemaConfig {
8197    fn default() -> Self {
8198        Self {
8199            min: default_tier_min(),
8200            max: default_tier_max(),
8201        }
8202    }
8203}
8204
8205/// Vendor cluster distribution configuration.
8206#[derive(Debug, Clone, Serialize, Deserialize)]
8207pub struct VendorClusterSchemaConfig {
8208    /// Reliable strategic vendors percentage (default: 0.20).
8209    #[serde(default = "default_reliable_strategic")]
8210    pub reliable_strategic: f64,
8211
8212    /// Standard operational vendors percentage (default: 0.50).
8213    #[serde(default = "default_standard_operational")]
8214    pub standard_operational: f64,
8215
8216    /// Transactional vendors percentage (default: 0.25).
8217    #[serde(default = "default_transactional")]
8218    pub transactional: f64,
8219
8220    /// Problematic vendors percentage (default: 0.05).
8221    #[serde(default = "default_problematic")]
8222    pub problematic: f64,
8223}
8224
8225fn default_reliable_strategic() -> f64 {
8226    0.20
8227}
8228
8229fn default_standard_operational() -> f64 {
8230    0.50
8231}
8232
8233fn default_transactional() -> f64 {
8234    0.25
8235}
8236
8237fn default_problematic() -> f64 {
8238    0.05
8239}
8240
8241impl Default for VendorClusterSchemaConfig {
8242    fn default() -> Self {
8243        Self {
8244            reliable_strategic: 0.20,
8245            standard_operational: 0.50,
8246            transactional: 0.25,
8247            problematic: 0.05,
8248        }
8249    }
8250}
8251
8252/// Dependency and concentration limits configuration.
8253#[derive(Debug, Clone, Serialize, Deserialize)]
8254pub struct DependencySchemaConfig {
8255    /// Maximum concentration for a single vendor (default: 0.15).
8256    #[serde(default = "default_max_single_vendor")]
8257    pub max_single_vendor_concentration: f64,
8258
8259    /// Maximum concentration for top 5 vendors (default: 0.45).
8260    #[serde(default = "default_max_top5")]
8261    pub top_5_concentration: f64,
8262
8263    /// Percentage of single-source vendors (default: 0.05).
8264    #[serde(default = "default_single_source_percent")]
8265    pub single_source_percent: f64,
8266}
8267
8268fn default_max_single_vendor() -> f64 {
8269    0.15
8270}
8271
8272fn default_max_top5() -> f64 {
8273    0.45
8274}
8275
8276fn default_single_source_percent() -> f64 {
8277    0.05
8278}
8279
8280impl Default for DependencySchemaConfig {
8281    fn default() -> Self {
8282        Self {
8283            max_single_vendor_concentration: 0.15,
8284            top_5_concentration: 0.45,
8285            single_source_percent: 0.05,
8286        }
8287    }
8288}
8289
8290// =============================================================================
8291// Customer Segmentation Configuration
8292// =============================================================================
8293
8294/// Configuration for customer segmentation generation.
8295#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8296pub struct CustomerSegmentationSchemaConfig {
8297    /// Enable customer segmentation generation.
8298    #[serde(default)]
8299    pub enabled: bool,
8300
8301    /// Value segment distribution.
8302    #[serde(default)]
8303    pub value_segments: ValueSegmentsSchemaConfig,
8304
8305    /// Lifecycle stage configuration.
8306    #[serde(default)]
8307    pub lifecycle: LifecycleSchemaConfig,
8308
8309    /// Network (referrals, hierarchies) configuration.
8310    #[serde(default)]
8311    pub networks: CustomerNetworksSchemaConfig,
8312}
8313
8314/// Customer value segments distribution configuration.
8315#[derive(Debug, Clone, Serialize, Deserialize)]
8316pub struct ValueSegmentsSchemaConfig {
8317    /// Enterprise segment configuration.
8318    #[serde(default)]
8319    pub enterprise: SegmentDetailSchemaConfig,
8320
8321    /// Mid-market segment configuration.
8322    #[serde(default)]
8323    pub mid_market: SegmentDetailSchemaConfig,
8324
8325    /// SMB segment configuration.
8326    #[serde(default)]
8327    pub smb: SegmentDetailSchemaConfig,
8328
8329    /// Consumer segment configuration.
8330    #[serde(default)]
8331    pub consumer: SegmentDetailSchemaConfig,
8332}
8333
8334impl Default for ValueSegmentsSchemaConfig {
8335    fn default() -> Self {
8336        Self {
8337            enterprise: SegmentDetailSchemaConfig {
8338                revenue_share: 0.40,
8339                customer_share: 0.05,
8340                avg_order_value_range: "50000+".to_string(),
8341            },
8342            mid_market: SegmentDetailSchemaConfig {
8343                revenue_share: 0.35,
8344                customer_share: 0.20,
8345                avg_order_value_range: "5000-50000".to_string(),
8346            },
8347            smb: SegmentDetailSchemaConfig {
8348                revenue_share: 0.20,
8349                customer_share: 0.50,
8350                avg_order_value_range: "500-5000".to_string(),
8351            },
8352            consumer: SegmentDetailSchemaConfig {
8353                revenue_share: 0.05,
8354                customer_share: 0.25,
8355                avg_order_value_range: "50-500".to_string(),
8356            },
8357        }
8358    }
8359}
8360
8361/// Individual segment detail configuration.
8362#[derive(Debug, Clone, Serialize, Deserialize)]
8363pub struct SegmentDetailSchemaConfig {
8364    /// Revenue share for this segment.
8365    #[serde(default)]
8366    pub revenue_share: f64,
8367
8368    /// Customer share for this segment.
8369    #[serde(default)]
8370    pub customer_share: f64,
8371
8372    /// Average order value range (e.g., "5000-50000" or "50000+").
8373    #[serde(default)]
8374    pub avg_order_value_range: String,
8375}
8376
8377impl Default for SegmentDetailSchemaConfig {
8378    fn default() -> Self {
8379        Self {
8380            revenue_share: 0.25,
8381            customer_share: 0.25,
8382            avg_order_value_range: "1000-10000".to_string(),
8383        }
8384    }
8385}
8386
8387/// Customer lifecycle stage configuration.
8388#[derive(Debug, Clone, Serialize, Deserialize)]
8389pub struct LifecycleSchemaConfig {
8390    /// Prospect stage rate.
8391    #[serde(default)]
8392    pub prospect_rate: f64,
8393
8394    /// New customer stage rate.
8395    #[serde(default = "default_new_rate")]
8396    pub new_rate: f64,
8397
8398    /// Growth stage rate.
8399    #[serde(default = "default_growth_rate")]
8400    pub growth_rate: f64,
8401
8402    /// Mature stage rate.
8403    #[serde(default = "default_mature_rate")]
8404    pub mature_rate: f64,
8405
8406    /// At-risk stage rate.
8407    #[serde(default = "default_at_risk_rate")]
8408    pub at_risk_rate: f64,
8409
8410    /// Churned stage rate.
8411    #[serde(default = "default_churned_rate")]
8412    pub churned_rate: f64,
8413
8414    /// Won-back stage rate (churned customers reacquired).
8415    #[serde(default)]
8416    pub won_back_rate: f64,
8417}
8418
8419fn default_new_rate() -> f64 {
8420    0.10
8421}
8422
8423fn default_growth_rate() -> f64 {
8424    0.15
8425}
8426
8427fn default_mature_rate() -> f64 {
8428    0.60
8429}
8430
8431fn default_at_risk_rate() -> f64 {
8432    0.10
8433}
8434
8435fn default_churned_rate() -> f64 {
8436    0.05
8437}
8438
8439impl Default for LifecycleSchemaConfig {
8440    fn default() -> Self {
8441        Self {
8442            prospect_rate: 0.0,
8443            new_rate: 0.10,
8444            growth_rate: 0.15,
8445            mature_rate: 0.60,
8446            at_risk_rate: 0.10,
8447            churned_rate: 0.05,
8448            won_back_rate: 0.0,
8449        }
8450    }
8451}
8452
8453/// Customer networks configuration (referrals, hierarchies).
8454#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8455pub struct CustomerNetworksSchemaConfig {
8456    /// Referral network configuration.
8457    #[serde(default)]
8458    pub referrals: ReferralSchemaConfig,
8459
8460    /// Corporate hierarchy configuration.
8461    #[serde(default)]
8462    pub corporate_hierarchies: HierarchySchemaConfig,
8463}
8464
8465/// Referral network configuration.
8466#[derive(Debug, Clone, Serialize, Deserialize)]
8467pub struct ReferralSchemaConfig {
8468    /// Enable referral generation.
8469    #[serde(default = "default_true")]
8470    pub enabled: bool,
8471
8472    /// Rate of customers acquired via referral.
8473    #[serde(default = "default_referral_rate")]
8474    pub referral_rate: f64,
8475}
8476
8477fn default_referral_rate() -> f64 {
8478    0.15
8479}
8480
8481impl Default for ReferralSchemaConfig {
8482    fn default() -> Self {
8483        Self {
8484            enabled: true,
8485            referral_rate: 0.15,
8486        }
8487    }
8488}
8489
8490/// Corporate hierarchy configuration.
8491#[derive(Debug, Clone, Serialize, Deserialize)]
8492pub struct HierarchySchemaConfig {
8493    /// Enable corporate hierarchy generation.
8494    #[serde(default = "default_true")]
8495    pub enabled: bool,
8496
8497    /// Rate of customers in hierarchies.
8498    #[serde(default = "default_hierarchy_rate")]
8499    pub probability: f64,
8500}
8501
8502fn default_hierarchy_rate() -> f64 {
8503    0.30
8504}
8505
8506impl Default for HierarchySchemaConfig {
8507    fn default() -> Self {
8508        Self {
8509            enabled: true,
8510            probability: 0.30,
8511        }
8512    }
8513}
8514
8515// =============================================================================
8516// Relationship Strength Configuration
8517// =============================================================================
8518
8519/// Configuration for relationship strength calculation.
8520#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8521pub struct RelationshipStrengthSchemaConfig {
8522    /// Enable relationship strength calculation.
8523    #[serde(default)]
8524    pub enabled: bool,
8525
8526    /// Calculation weights.
8527    #[serde(default)]
8528    pub calculation: StrengthCalculationSchemaConfig,
8529
8530    /// Strength thresholds for classification.
8531    #[serde(default)]
8532    pub thresholds: StrengthThresholdsSchemaConfig,
8533}
8534
8535/// Strength calculation weights configuration.
8536#[derive(Debug, Clone, Serialize, Deserialize)]
8537pub struct StrengthCalculationSchemaConfig {
8538    /// Weight for transaction volume (default: 0.30).
8539    #[serde(default = "default_volume_weight")]
8540    pub transaction_volume_weight: f64,
8541
8542    /// Weight for transaction count (default: 0.25).
8543    #[serde(default = "default_count_weight")]
8544    pub transaction_count_weight: f64,
8545
8546    /// Weight for relationship duration (default: 0.20).
8547    #[serde(default = "default_duration_weight")]
8548    pub relationship_duration_weight: f64,
8549
8550    /// Weight for recency (default: 0.15).
8551    #[serde(default = "default_recency_weight")]
8552    pub recency_weight: f64,
8553
8554    /// Weight for mutual connections (default: 0.10).
8555    #[serde(default = "default_mutual_weight")]
8556    pub mutual_connections_weight: f64,
8557
8558    /// Recency half-life in days (default: 90).
8559    #[serde(default = "default_recency_half_life")]
8560    pub recency_half_life_days: u32,
8561}
8562
8563fn default_volume_weight() -> f64 {
8564    0.30
8565}
8566
8567fn default_count_weight() -> f64 {
8568    0.25
8569}
8570
8571fn default_duration_weight() -> f64 {
8572    0.20
8573}
8574
8575fn default_recency_weight() -> f64 {
8576    0.15
8577}
8578
8579fn default_mutual_weight() -> f64 {
8580    0.10
8581}
8582
8583fn default_recency_half_life() -> u32 {
8584    90
8585}
8586
8587impl Default for StrengthCalculationSchemaConfig {
8588    fn default() -> Self {
8589        Self {
8590            transaction_volume_weight: 0.30,
8591            transaction_count_weight: 0.25,
8592            relationship_duration_weight: 0.20,
8593            recency_weight: 0.15,
8594            mutual_connections_weight: 0.10,
8595            recency_half_life_days: 90,
8596        }
8597    }
8598}
8599
8600/// Strength thresholds for relationship classification.
8601#[derive(Debug, Clone, Serialize, Deserialize)]
8602pub struct StrengthThresholdsSchemaConfig {
8603    /// Threshold for strong relationships (default: 0.7).
8604    #[serde(default = "default_strong_threshold")]
8605    pub strong: f64,
8606
8607    /// Threshold for moderate relationships (default: 0.4).
8608    #[serde(default = "default_moderate_threshold")]
8609    pub moderate: f64,
8610
8611    /// Threshold for weak relationships (default: 0.1).
8612    #[serde(default = "default_weak_threshold")]
8613    pub weak: f64,
8614}
8615
8616fn default_strong_threshold() -> f64 {
8617    0.7
8618}
8619
8620fn default_moderate_threshold() -> f64 {
8621    0.4
8622}
8623
8624fn default_weak_threshold() -> f64 {
8625    0.1
8626}
8627
8628impl Default for StrengthThresholdsSchemaConfig {
8629    fn default() -> Self {
8630        Self {
8631            strong: 0.7,
8632            moderate: 0.4,
8633            weak: 0.1,
8634        }
8635    }
8636}
8637
8638// =============================================================================
8639// Cross-Process Links Configuration
8640// =============================================================================
8641
8642/// Configuration for cross-process linkages.
8643#[derive(Debug, Clone, Serialize, Deserialize)]
8644pub struct CrossProcessLinksSchemaConfig {
8645    /// Enable cross-process link generation.
8646    #[serde(default)]
8647    pub enabled: bool,
8648
8649    /// Enable inventory links between P2P and O2C.
8650    #[serde(default = "default_true")]
8651    pub inventory_p2p_o2c: bool,
8652
8653    /// Enable payment to bank reconciliation links.
8654    #[serde(default = "default_true")]
8655    pub payment_bank_reconciliation: bool,
8656
8657    /// Enable intercompany bilateral matching.
8658    #[serde(default = "default_true")]
8659    pub intercompany_bilateral: bool,
8660
8661    /// Percentage of GR/Deliveries to link via inventory (0.0 - 1.0).
8662    #[serde(default = "default_inventory_link_rate")]
8663    pub inventory_link_rate: f64,
8664}
8665
8666fn default_inventory_link_rate() -> f64 {
8667    0.30
8668}
8669
8670impl Default for CrossProcessLinksSchemaConfig {
8671    fn default() -> Self {
8672        Self {
8673            enabled: false,
8674            inventory_p2p_o2c: true,
8675            payment_bank_reconciliation: true,
8676            intercompany_bilateral: true,
8677            inventory_link_rate: 0.30,
8678        }
8679    }
8680}
8681
8682// =============================================================================
8683// Organizational Events Configuration
8684// =============================================================================
8685
8686/// Configuration for organizational events (acquisitions, divestitures, etc.).
8687#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8688pub struct OrganizationalEventsSchemaConfig {
8689    /// Enable organizational events.
8690    #[serde(default)]
8691    pub enabled: bool,
8692
8693    /// Effect blending mode (multiplicative, additive, maximum, minimum).
8694    #[serde(default)]
8695    pub effect_blending: EffectBlendingModeConfig,
8696
8697    /// Organizational events (acquisitions, divestitures, reorganizations, etc.).
8698    #[serde(default)]
8699    pub events: Vec<OrganizationalEventSchemaConfig>,
8700
8701    /// Process evolution events.
8702    #[serde(default)]
8703    pub process_evolution: Vec<ProcessEvolutionSchemaConfig>,
8704
8705    /// Technology transition events.
8706    #[serde(default)]
8707    pub technology_transitions: Vec<TechnologyTransitionSchemaConfig>,
8708}
8709
8710/// Effect blending mode for combining multiple event effects.
8711#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
8712#[serde(rename_all = "snake_case")]
8713pub enum EffectBlendingModeConfig {
8714    /// Multiply effects together.
8715    #[default]
8716    Multiplicative,
8717    /// Add effects together.
8718    Additive,
8719    /// Take the maximum effect.
8720    Maximum,
8721    /// Take the minimum effect.
8722    Minimum,
8723}
8724
8725/// Configuration for a single organizational event.
8726#[derive(Debug, Clone, Serialize, Deserialize)]
8727pub struct OrganizationalEventSchemaConfig {
8728    /// Event ID.
8729    pub id: String,
8730
8731    /// Event type and configuration.
8732    pub event_type: OrganizationalEventTypeSchemaConfig,
8733
8734    /// Effective date.
8735    pub effective_date: String,
8736
8737    /// Transition duration in months.
8738    #[serde(default = "default_org_transition_months")]
8739    pub transition_months: u32,
8740
8741    /// Description.
8742    #[serde(default)]
8743    pub description: Option<String>,
8744}
8745
8746fn default_org_transition_months() -> u32 {
8747    6
8748}
8749
8750/// Organizational event type configuration.
8751#[derive(Debug, Clone, Serialize, Deserialize)]
8752#[serde(tag = "type", rename_all = "snake_case")]
8753pub enum OrganizationalEventTypeSchemaConfig {
8754    /// Acquisition event.
8755    Acquisition {
8756        /// Acquired entity code.
8757        acquired_entity: String,
8758        /// Volume increase multiplier.
8759        #[serde(default = "default_acquisition_volume")]
8760        volume_increase: f64,
8761        /// Integration error rate.
8762        #[serde(default = "default_acquisition_error")]
8763        integration_error_rate: f64,
8764        /// Parallel posting days.
8765        #[serde(default = "default_parallel_days")]
8766        parallel_posting_days: u32,
8767    },
8768    /// Divestiture event.
8769    Divestiture {
8770        /// Divested entity code.
8771        divested_entity: String,
8772        /// Volume reduction factor.
8773        #[serde(default = "default_divestiture_volume")]
8774        volume_reduction: f64,
8775        /// Remove entity from generation.
8776        #[serde(default = "default_true_val")]
8777        remove_entity: bool,
8778    },
8779    /// Reorganization event.
8780    Reorganization {
8781        /// Cost center remapping.
8782        #[serde(default)]
8783        cost_center_remapping: std::collections::HashMap<String, String>,
8784        /// Transition error rate.
8785        #[serde(default = "default_reorg_error")]
8786        transition_error_rate: f64,
8787    },
8788    /// Leadership change event.
8789    LeadershipChange {
8790        /// Role that changed.
8791        role: String,
8792        /// Policy changes.
8793        #[serde(default)]
8794        policy_changes: Vec<String>,
8795    },
8796    /// Workforce reduction event.
8797    WorkforceReduction {
8798        /// Reduction percentage.
8799        #[serde(default = "default_workforce_reduction")]
8800        reduction_percent: f64,
8801        /// Error rate increase.
8802        #[serde(default = "default_workforce_error")]
8803        error_rate_increase: f64,
8804    },
8805    /// Merger event.
8806    Merger {
8807        /// Merged entity code.
8808        merged_entity: String,
8809        /// Volume increase multiplier.
8810        #[serde(default = "default_merger_volume")]
8811        volume_increase: f64,
8812    },
8813}
8814
8815fn default_acquisition_volume() -> f64 {
8816    1.35
8817}
8818
8819fn default_acquisition_error() -> f64 {
8820    0.05
8821}
8822
8823fn default_parallel_days() -> u32 {
8824    30
8825}
8826
8827fn default_divestiture_volume() -> f64 {
8828    0.70
8829}
8830
8831fn default_true_val() -> bool {
8832    true
8833}
8834
8835fn default_reorg_error() -> f64 {
8836    0.04
8837}
8838
8839fn default_workforce_reduction() -> f64 {
8840    0.10
8841}
8842
8843fn default_workforce_error() -> f64 {
8844    0.05
8845}
8846
8847fn default_merger_volume() -> f64 {
8848    1.80
8849}
8850
8851/// Configuration for a process evolution event.
8852#[derive(Debug, Clone, Serialize, Deserialize)]
8853pub struct ProcessEvolutionSchemaConfig {
8854    /// Event ID.
8855    pub id: String,
8856
8857    /// Event type.
8858    pub event_type: ProcessEvolutionTypeSchemaConfig,
8859
8860    /// Effective date.
8861    pub effective_date: String,
8862
8863    /// Description.
8864    #[serde(default)]
8865    pub description: Option<String>,
8866}
8867
8868/// Process evolution type configuration.
8869#[derive(Debug, Clone, Serialize, Deserialize)]
8870#[serde(tag = "type", rename_all = "snake_case")]
8871pub enum ProcessEvolutionTypeSchemaConfig {
8872    /// Process automation.
8873    ProcessAutomation {
8874        /// Process name.
8875        process_name: String,
8876        /// Manual rate before.
8877        #[serde(default = "default_manual_before")]
8878        manual_rate_before: f64,
8879        /// Manual rate after.
8880        #[serde(default = "default_manual_after")]
8881        manual_rate_after: f64,
8882    },
8883    /// Approval workflow change.
8884    ApprovalWorkflowChange {
8885        /// Description.
8886        description: String,
8887    },
8888    /// Control enhancement.
8889    ControlEnhancement {
8890        /// Control ID.
8891        control_id: String,
8892        /// Error reduction.
8893        #[serde(default = "default_error_reduction")]
8894        error_reduction: f64,
8895    },
8896}
8897
8898fn default_manual_before() -> f64 {
8899    0.80
8900}
8901
8902fn default_manual_after() -> f64 {
8903    0.15
8904}
8905
8906fn default_error_reduction() -> f64 {
8907    0.02
8908}
8909
8910/// Configuration for a technology transition event.
8911#[derive(Debug, Clone, Serialize, Deserialize)]
8912pub struct TechnologyTransitionSchemaConfig {
8913    /// Event ID.
8914    pub id: String,
8915
8916    /// Event type.
8917    pub event_type: TechnologyTransitionTypeSchemaConfig,
8918
8919    /// Description.
8920    #[serde(default)]
8921    pub description: Option<String>,
8922}
8923
8924/// Technology transition type configuration.
8925#[derive(Debug, Clone, Serialize, Deserialize)]
8926#[serde(tag = "type", rename_all = "snake_case")]
8927pub enum TechnologyTransitionTypeSchemaConfig {
8928    /// ERP migration.
8929    ErpMigration {
8930        /// Source system.
8931        source_system: String,
8932        /// Target system.
8933        target_system: String,
8934        /// Cutover date.
8935        cutover_date: String,
8936        /// Stabilization end date.
8937        stabilization_end: String,
8938        /// Duplicate rate during migration.
8939        #[serde(default = "default_erp_duplicate_rate")]
8940        duplicate_rate: f64,
8941        /// Format mismatch rate.
8942        #[serde(default = "default_format_mismatch")]
8943        format_mismatch_rate: f64,
8944    },
8945    /// Module implementation.
8946    ModuleImplementation {
8947        /// Module name.
8948        module_name: String,
8949        /// Go-live date.
8950        go_live_date: String,
8951    },
8952}
8953
8954fn default_erp_duplicate_rate() -> f64 {
8955    0.02
8956}
8957
8958fn default_format_mismatch() -> f64 {
8959    0.03
8960}
8961
8962// =============================================================================
8963// Behavioral Drift Configuration
8964// =============================================================================
8965
8966/// Configuration for behavioral drift (vendor, customer, employee behavior).
8967///
8968/// **Deprecated (v4.1.2):** this schema section is currently
8969/// validated-but-inert — no runtime code consumes its fields. Users
8970/// who want behavioral drift-style effects should reach for
8971/// `distributions.regime_changes` (v3.5.2+), which drives the
8972/// `DriftController` via the parameter-drift path. The schema type
8973/// remains for backward-compatible YAML loading; it will be removed
8974/// in a future major version once `regime_changes` gains per-entity
8975/// (vendor / customer / employee) targeting.
8976#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8977pub struct BehavioralDriftSchemaConfig {
8978    /// Enable behavioral drift.
8979    #[serde(default)]
8980    pub enabled: bool,
8981
8982    /// Vendor behavior drift.
8983    #[serde(default)]
8984    pub vendor_behavior: VendorBehaviorSchemaConfig,
8985
8986    /// Customer behavior drift.
8987    #[serde(default)]
8988    pub customer_behavior: CustomerBehaviorSchemaConfig,
8989
8990    /// Employee behavior drift.
8991    #[serde(default)]
8992    pub employee_behavior: EmployeeBehaviorSchemaConfig,
8993
8994    /// Collective behavior drift.
8995    #[serde(default)]
8996    pub collective: CollectiveBehaviorSchemaConfig,
8997}
8998
8999/// Vendor behavior drift configuration.
9000#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9001pub struct VendorBehaviorSchemaConfig {
9002    /// Payment terms drift.
9003    #[serde(default)]
9004    pub payment_terms_drift: PaymentTermsDriftSchemaConfig,
9005
9006    /// Quality drift.
9007    #[serde(default)]
9008    pub quality_drift: QualityDriftSchemaConfig,
9009}
9010
9011/// Payment terms drift configuration.
9012#[derive(Debug, Clone, Serialize, Deserialize)]
9013pub struct PaymentTermsDriftSchemaConfig {
9014    /// Extension rate per year (days).
9015    #[serde(default = "default_extension_rate")]
9016    pub extension_rate_per_year: f64,
9017
9018    /// Economic sensitivity.
9019    #[serde(default = "default_economic_sensitivity")]
9020    pub economic_sensitivity: f64,
9021}
9022
9023fn default_extension_rate() -> f64 {
9024    2.5
9025}
9026
9027fn default_economic_sensitivity() -> f64 {
9028    1.0
9029}
9030
9031impl Default for PaymentTermsDriftSchemaConfig {
9032    fn default() -> Self {
9033        Self {
9034            extension_rate_per_year: 2.5,
9035            economic_sensitivity: 1.0,
9036        }
9037    }
9038}
9039
9040/// Quality drift configuration.
9041#[derive(Debug, Clone, Serialize, Deserialize)]
9042pub struct QualityDriftSchemaConfig {
9043    /// New vendor improvement rate (per year).
9044    #[serde(default = "default_improvement_rate")]
9045    pub new_vendor_improvement_rate: f64,
9046
9047    /// Complacency decline rate (per year after first year).
9048    #[serde(default = "default_decline_rate")]
9049    pub complacency_decline_rate: f64,
9050}
9051
9052fn default_improvement_rate() -> f64 {
9053    0.02
9054}
9055
9056fn default_decline_rate() -> f64 {
9057    0.01
9058}
9059
9060impl Default for QualityDriftSchemaConfig {
9061    fn default() -> Self {
9062        Self {
9063            new_vendor_improvement_rate: 0.02,
9064            complacency_decline_rate: 0.01,
9065        }
9066    }
9067}
9068
9069/// Customer behavior drift configuration.
9070#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9071pub struct CustomerBehaviorSchemaConfig {
9072    /// Payment drift.
9073    #[serde(default)]
9074    pub payment_drift: CustomerPaymentDriftSchemaConfig,
9075
9076    /// Order drift.
9077    #[serde(default)]
9078    pub order_drift: OrderDriftSchemaConfig,
9079}
9080
9081/// Customer payment drift configuration.
9082#[derive(Debug, Clone, Serialize, Deserialize)]
9083pub struct CustomerPaymentDriftSchemaConfig {
9084    /// Days extension during downturn (min, max).
9085    #[serde(default = "default_downturn_extension")]
9086    pub downturn_days_extension: (u32, u32),
9087
9088    /// Bad debt increase during downturn.
9089    #[serde(default = "default_bad_debt_increase")]
9090    pub downturn_bad_debt_increase: f64,
9091}
9092
9093fn default_downturn_extension() -> (u32, u32) {
9094    (5, 15)
9095}
9096
9097fn default_bad_debt_increase() -> f64 {
9098    0.02
9099}
9100
9101impl Default for CustomerPaymentDriftSchemaConfig {
9102    fn default() -> Self {
9103        Self {
9104            downturn_days_extension: (5, 15),
9105            downturn_bad_debt_increase: 0.02,
9106        }
9107    }
9108}
9109
9110/// Order drift configuration.
9111#[derive(Debug, Clone, Serialize, Deserialize)]
9112pub struct OrderDriftSchemaConfig {
9113    /// Digital shift rate (per year).
9114    #[serde(default = "default_digital_shift")]
9115    pub digital_shift_rate: f64,
9116}
9117
9118fn default_digital_shift() -> f64 {
9119    0.05
9120}
9121
9122impl Default for OrderDriftSchemaConfig {
9123    fn default() -> Self {
9124        Self {
9125            digital_shift_rate: 0.05,
9126        }
9127    }
9128}
9129
9130/// Employee behavior drift configuration.
9131#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9132pub struct EmployeeBehaviorSchemaConfig {
9133    /// Approval drift.
9134    #[serde(default)]
9135    pub approval_drift: ApprovalDriftSchemaConfig,
9136
9137    /// Error drift.
9138    #[serde(default)]
9139    pub error_drift: ErrorDriftSchemaConfig,
9140}
9141
9142/// Approval drift configuration.
9143#[derive(Debug, Clone, Serialize, Deserialize)]
9144pub struct ApprovalDriftSchemaConfig {
9145    /// EOM intensity increase per year.
9146    #[serde(default = "default_eom_intensity")]
9147    pub eom_intensity_increase_per_year: f64,
9148
9149    /// Rubber stamp volume threshold.
9150    #[serde(default = "default_rubber_stamp")]
9151    pub rubber_stamp_volume_threshold: u32,
9152}
9153
9154fn default_eom_intensity() -> f64 {
9155    0.05
9156}
9157
9158fn default_rubber_stamp() -> u32 {
9159    50
9160}
9161
9162impl Default for ApprovalDriftSchemaConfig {
9163    fn default() -> Self {
9164        Self {
9165            eom_intensity_increase_per_year: 0.05,
9166            rubber_stamp_volume_threshold: 50,
9167        }
9168    }
9169}
9170
9171/// Error drift configuration.
9172#[derive(Debug, Clone, Serialize, Deserialize)]
9173pub struct ErrorDriftSchemaConfig {
9174    /// New employee error rate.
9175    #[serde(default = "default_new_error")]
9176    pub new_employee_error_rate: f64,
9177
9178    /// Learning curve months.
9179    #[serde(default = "default_learning_months")]
9180    pub learning_curve_months: u32,
9181}
9182
9183fn default_new_error() -> f64 {
9184    0.08
9185}
9186
9187fn default_learning_months() -> u32 {
9188    6
9189}
9190
9191impl Default for ErrorDriftSchemaConfig {
9192    fn default() -> Self {
9193        Self {
9194            new_employee_error_rate: 0.08,
9195            learning_curve_months: 6,
9196        }
9197    }
9198}
9199
9200/// Collective behavior drift configuration.
9201#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9202pub struct CollectiveBehaviorSchemaConfig {
9203    /// Automation adoption configuration.
9204    #[serde(default)]
9205    pub automation_adoption: AutomationAdoptionSchemaConfig,
9206}
9207
9208/// Automation adoption configuration.
9209#[derive(Debug, Clone, Serialize, Deserialize)]
9210pub struct AutomationAdoptionSchemaConfig {
9211    /// Enable S-curve adoption model.
9212    #[serde(default)]
9213    pub s_curve_enabled: bool,
9214
9215    /// Adoption midpoint in months.
9216    #[serde(default = "default_midpoint")]
9217    pub adoption_midpoint_months: u32,
9218
9219    /// Steepness of adoption curve.
9220    #[serde(default = "default_steepness")]
9221    pub steepness: f64,
9222}
9223
9224fn default_midpoint() -> u32 {
9225    24
9226}
9227
9228fn default_steepness() -> f64 {
9229    0.15
9230}
9231
9232impl Default for AutomationAdoptionSchemaConfig {
9233    fn default() -> Self {
9234        Self {
9235            s_curve_enabled: false,
9236            adoption_midpoint_months: 24,
9237            steepness: 0.15,
9238        }
9239    }
9240}
9241
9242// =============================================================================
9243// Market Drift Configuration
9244// =============================================================================
9245
9246/// Configuration for market drift (economic cycles, commodities, price shocks).
9247///
9248/// **Deprecated (v4.1.2):** validated-but-inert. Use
9249/// `distributions.regime_changes.economic_cycle` +
9250/// `distributions.regime_changes.parameter_drifts` for the
9251/// equivalent runtime behaviour (shipped in v3.5.2). The schema
9252/// type remains for backward-compatible YAML loading; will be
9253/// removed in v5.0.
9254#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9255pub struct MarketDriftSchemaConfig {
9256    /// Enable market drift.
9257    #[serde(default)]
9258    pub enabled: bool,
9259
9260    /// Economic cycle configuration.
9261    #[serde(default)]
9262    pub economic_cycle: MarketEconomicCycleSchemaConfig,
9263
9264    /// Industry-specific cycles.
9265    #[serde(default)]
9266    pub industry_cycles: std::collections::HashMap<String, IndustryCycleSchemaConfig>,
9267
9268    /// Commodity drift configuration.
9269    #[serde(default)]
9270    pub commodities: CommoditiesSchemaConfig,
9271}
9272
9273/// Market economic cycle configuration.
9274#[derive(Debug, Clone, Serialize, Deserialize)]
9275pub struct MarketEconomicCycleSchemaConfig {
9276    /// Enable economic cycle.
9277    #[serde(default)]
9278    pub enabled: bool,
9279
9280    /// Cycle type.
9281    #[serde(default)]
9282    pub cycle_type: CycleTypeSchemaConfig,
9283
9284    /// Cycle period in months.
9285    #[serde(default = "default_market_cycle_period")]
9286    pub period_months: u32,
9287
9288    /// Amplitude.
9289    #[serde(default = "default_market_amplitude")]
9290    pub amplitude: f64,
9291
9292    /// Recession configuration.
9293    #[serde(default)]
9294    pub recession: RecessionSchemaConfig,
9295}
9296
9297fn default_market_cycle_period() -> u32 {
9298    48
9299}
9300
9301fn default_market_amplitude() -> f64 {
9302    0.15
9303}
9304
9305impl Default for MarketEconomicCycleSchemaConfig {
9306    fn default() -> Self {
9307        Self {
9308            enabled: false,
9309            cycle_type: CycleTypeSchemaConfig::Sinusoidal,
9310            period_months: 48,
9311            amplitude: 0.15,
9312            recession: RecessionSchemaConfig::default(),
9313        }
9314    }
9315}
9316
9317/// Cycle type configuration.
9318#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9319#[serde(rename_all = "snake_case")]
9320pub enum CycleTypeSchemaConfig {
9321    /// Sinusoidal cycle.
9322    #[default]
9323    Sinusoidal,
9324    /// Asymmetric cycle.
9325    Asymmetric,
9326    /// Mean-reverting cycle.
9327    MeanReverting,
9328}
9329
9330/// Recession configuration.
9331#[derive(Debug, Clone, Serialize, Deserialize)]
9332pub struct RecessionSchemaConfig {
9333    /// Enable recession simulation.
9334    #[serde(default)]
9335    pub enabled: bool,
9336
9337    /// Probability per year.
9338    #[serde(default = "default_recession_prob")]
9339    pub probability_per_year: f64,
9340
9341    /// Severity.
9342    #[serde(default)]
9343    pub severity: RecessionSeveritySchemaConfig,
9344
9345    /// Specific recession periods.
9346    #[serde(default)]
9347    pub recession_periods: Vec<RecessionPeriodSchemaConfig>,
9348}
9349
9350fn default_recession_prob() -> f64 {
9351    0.10
9352}
9353
9354impl Default for RecessionSchemaConfig {
9355    fn default() -> Self {
9356        Self {
9357            enabled: false,
9358            probability_per_year: 0.10,
9359            severity: RecessionSeveritySchemaConfig::Moderate,
9360            recession_periods: Vec::new(),
9361        }
9362    }
9363}
9364
9365/// Recession severity configuration.
9366#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9367#[serde(rename_all = "snake_case")]
9368pub enum RecessionSeveritySchemaConfig {
9369    /// Mild recession.
9370    Mild,
9371    /// Moderate recession.
9372    #[default]
9373    Moderate,
9374    /// Severe recession.
9375    Severe,
9376}
9377
9378/// Recession period configuration.
9379#[derive(Debug, Clone, Serialize, Deserialize)]
9380pub struct RecessionPeriodSchemaConfig {
9381    /// Start month.
9382    pub start_month: u32,
9383    /// Duration in months.
9384    pub duration_months: u32,
9385}
9386
9387/// Industry cycle configuration.
9388#[derive(Debug, Clone, Serialize, Deserialize)]
9389pub struct IndustryCycleSchemaConfig {
9390    /// Period in months.
9391    #[serde(default = "default_industry_period")]
9392    pub period_months: u32,
9393
9394    /// Amplitude.
9395    #[serde(default = "default_industry_amp")]
9396    pub amplitude: f64,
9397}
9398
9399fn default_industry_period() -> u32 {
9400    36
9401}
9402
9403fn default_industry_amp() -> f64 {
9404    0.20
9405}
9406
9407/// Commodities drift configuration.
9408#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9409pub struct CommoditiesSchemaConfig {
9410    /// Enable commodity drift.
9411    #[serde(default)]
9412    pub enabled: bool,
9413
9414    /// Commodity items.
9415    #[serde(default)]
9416    pub items: Vec<CommodityItemSchemaConfig>,
9417}
9418
9419/// Commodity item configuration.
9420#[derive(Debug, Clone, Serialize, Deserialize)]
9421pub struct CommodityItemSchemaConfig {
9422    /// Commodity name.
9423    pub name: String,
9424
9425    /// Volatility.
9426    #[serde(default = "default_volatility")]
9427    pub volatility: f64,
9428
9429    /// COGS pass-through.
9430    #[serde(default)]
9431    pub cogs_pass_through: f64,
9432
9433    /// Overhead pass-through.
9434    #[serde(default)]
9435    pub overhead_pass_through: f64,
9436}
9437
9438fn default_volatility() -> f64 {
9439    0.20
9440}
9441
9442// =============================================================================
9443// Drift Labeling Configuration
9444// =============================================================================
9445
9446/// Configuration for drift ground truth labeling.
9447///
9448/// **Deprecated (v4.1.2):** validated-but-inert. The v3.3.0
9449/// analytics-metadata phase (`DriftEventGenerator` +
9450/// `AnalyticsMetadataSnapshot.drift_events`) produces drift labels
9451/// at runtime — configure it via `analytics_metadata.drift_events`
9452/// instead. The schema type remains for backward-compatible YAML
9453/// loading; will be removed in v5.0.
9454#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9455pub struct DriftLabelingSchemaConfig {
9456    /// Enable drift labeling.
9457    #[serde(default)]
9458    pub enabled: bool,
9459
9460    /// Statistical drift labeling.
9461    #[serde(default)]
9462    pub statistical: StatisticalDriftLabelingSchemaConfig,
9463
9464    /// Categorical drift labeling.
9465    #[serde(default)]
9466    pub categorical: CategoricalDriftLabelingSchemaConfig,
9467
9468    /// Temporal drift labeling.
9469    #[serde(default)]
9470    pub temporal: TemporalDriftLabelingSchemaConfig,
9471
9472    /// Regulatory calendar preset.
9473    #[serde(default)]
9474    pub regulatory_calendar_preset: Option<String>,
9475}
9476
9477/// Statistical drift labeling configuration.
9478#[derive(Debug, Clone, Serialize, Deserialize)]
9479pub struct StatisticalDriftLabelingSchemaConfig {
9480    /// Enable statistical drift labeling.
9481    #[serde(default = "default_true_val")]
9482    pub enabled: bool,
9483
9484    /// Minimum magnitude threshold.
9485    #[serde(default = "default_min_magnitude")]
9486    pub min_magnitude_threshold: f64,
9487}
9488
9489fn default_min_magnitude() -> f64 {
9490    0.05
9491}
9492
9493impl Default for StatisticalDriftLabelingSchemaConfig {
9494    fn default() -> Self {
9495        Self {
9496            enabled: true,
9497            min_magnitude_threshold: 0.05,
9498        }
9499    }
9500}
9501
9502/// Categorical drift labeling configuration.
9503#[derive(Debug, Clone, Serialize, Deserialize)]
9504pub struct CategoricalDriftLabelingSchemaConfig {
9505    /// Enable categorical drift labeling.
9506    #[serde(default = "default_true_val")]
9507    pub enabled: bool,
9508}
9509
9510impl Default for CategoricalDriftLabelingSchemaConfig {
9511    fn default() -> Self {
9512        Self { enabled: true }
9513    }
9514}
9515
9516/// Temporal drift labeling configuration.
9517#[derive(Debug, Clone, Serialize, Deserialize)]
9518pub struct TemporalDriftLabelingSchemaConfig {
9519    /// Enable temporal drift labeling.
9520    #[serde(default = "default_true_val")]
9521    pub enabled: bool,
9522}
9523
9524impl Default for TemporalDriftLabelingSchemaConfig {
9525    fn default() -> Self {
9526        Self { enabled: true }
9527    }
9528}
9529
9530// =============================================================================
9531// Enhanced Anomaly Injection Configuration
9532// =============================================================================
9533
9534/// Enhanced anomaly injection configuration.
9535///
9536/// Provides comprehensive anomaly injection capabilities including:
9537/// - Multi-stage fraud schemes (embezzlement, revenue manipulation, kickbacks)
9538/// - Correlated anomaly injection (co-occurrence patterns, error cascades)
9539/// - Near-miss generation for false positive reduction
9540/// - Detection difficulty classification
9541/// - Context-aware injection based on entity behavior
9542#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9543pub struct EnhancedAnomalyConfig {
9544    /// Enable enhanced anomaly injection.
9545    #[serde(default)]
9546    pub enabled: bool,
9547
9548    /// Base anomaly rates.
9549    #[serde(default)]
9550    pub rates: AnomalyRateConfig,
9551
9552    /// Multi-stage fraud scheme configuration.
9553    #[serde(default)]
9554    pub multi_stage_schemes: MultiStageSchemeConfig,
9555
9556    /// Correlated anomaly injection configuration.
9557    #[serde(default)]
9558    pub correlated_injection: CorrelatedInjectionConfig,
9559
9560    /// Near-miss generation configuration.
9561    #[serde(default)]
9562    pub near_miss: NearMissConfig,
9563
9564    /// Detection difficulty classification configuration.
9565    #[serde(default)]
9566    pub difficulty_classification: DifficultyClassificationConfig,
9567
9568    /// Context-aware injection configuration.
9569    #[serde(default)]
9570    pub context_aware: ContextAwareConfig,
9571
9572    /// Enhanced labeling configuration.
9573    #[serde(default)]
9574    pub labeling: EnhancedLabelingConfig,
9575
9576    /// SOTA-12 (#140, FINDINGS §13): post-process tagger that tags the top
9577    /// `rate × n_jes` JEs whose `(source, gl_account)` is rare under the
9578    /// per-source empirical PMF as `RelationalAnomalyType::SourceConditional-
9579    /// Rarity`. `None` = disabled (default); typical value `0.01` matches the
9580    /// audit-packet hot-list size. Runs AFTER per-entry strategies — additive,
9581    /// doesn't replace them.
9582    ///
9583    /// **Phase 1 deprecation note:** this key remains the source of truth for
9584    /// back-compat. If `concentration.source_conditional_rarity.rate` is also
9585    /// set, that value wins (it's an opt-in to the unified DSL).
9586    #[serde(default)]
9587    pub source_conditional_rarity_rate: Option<f64>,
9588}
9589
9590// ---------------------------------------------------------------------------
9591// ConcentrationConfig — central post-process pass pipeline (#143, Phase 1).
9592//
9593// Design reference:
9594//   docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md
9595//
9596// Phase 1 fields: SourceConditionalRarityPass (wrapping shipped SOTA-12) +
9597// TradingPartnerPoolPass (closes SOTA-11.1 / #142).
9598// Phase 2 will add: account_pair_substitution (closes SOTA-8.1 / #141).
9599// ---------------------------------------------------------------------------
9600
9601/// Top-level configuration for the post-generation concentration pipeline.
9602///
9603/// Each sub-field is `Option<_>`; presence enables the corresponding pass.
9604/// `enabled = false` (default) disables the pipeline regardless of sub-fields,
9605/// matching the parent proposal's "opt-in" guidance.
9606#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9607pub struct ConcentrationConfig {
9608    /// Master switch. `false` (default) → pipeline is no-op.
9609    #[serde(default)]
9610    pub enabled: bool,
9611
9612    /// Phase 1: source-conditional rarity tagger (wraps shipped SOTA-12).
9613    /// If also `anomaly_injection.source_conditional_rarity_rate` is set, this
9614    /// field wins.
9615    #[serde(default)]
9616    pub source_conditional_rarity: Option<SourceConditionalRarityPassConfig>,
9617
9618    /// Phase 1: trading-partner pool resizing (closes SOTA-11.1 / #142).
9619    #[serde(default)]
9620    pub trading_partner_pool: Option<TradingPartnerPoolPassConfig>,
9621
9622    /// Phase 2: account-pair substitution against a corpus-derived PMF
9623    /// (closes SOTA-8.1 / #141). Defers to Phase 2 when wired.
9624    #[serde(default)]
9625    pub account_pair_substitution: Option<AccountPairSubstitutionPassConfig>,
9626
9627    /// Phase 1.5: blank-source post-process (closes SOTA-7 / #132). Nulls
9628    /// `sap_source_code` on a configurable fraction of JEs to match the
9629    /// corpus's ~21% blank-source rate. Runs LAST in the pipeline so
9630    /// earlier passes (`SourceConditionalRarityPass`,
9631    /// `AccountPairSubstitutionPass`) see full source coverage.
9632    #[serde(default)]
9633    pub source_blanking: Option<SourceBlankingPassConfig>,
9634}
9635
9636/// Per-pass config for SourceConditionalRarityPass.
9637#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9638pub struct SourceConditionalRarityPassConfig {
9639    /// Fraction of input JEs to tag (typically `0.01`).
9640    pub rate: f64,
9641    /// Optional min surprise floor (Σ -log P(account|source)). Default `5.0`.
9642    #[serde(default)]
9643    pub min_surprise: Option<f64>,
9644    /// Per-source line-count floor (sources below have unreliable PMFs).
9645    /// Default `5`.
9646    #[serde(default)]
9647    pub min_per_source_lines: Option<u32>,
9648}
9649
9650/// Per-pass config for TradingPartnerPoolPass.
9651#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9652pub struct TradingPartnerPoolPassConfig {
9653    /// Target distinct trading-partner pool size. `0` is clamped to `1` at
9654    /// runtime. Typical corpus value `~12`; synthetic default `~40`.
9655    pub target_size: usize,
9656}
9657
9658/// Per-pass config for SourceBlankingPass (Phase 1.5 / SOTA-7).
9659#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9660pub struct SourceBlankingPassConfig {
9661    /// Fraction of JEs whose `sap_source_code` should be nulled. Typical
9662    /// corpus-matching value `0.21`. Clamped to `[0.0, 1.0]` at runtime.
9663    pub rate: f64,
9664}
9665
9666/// Per-pass config for AccountPairSubstitutionPass (Phase 2).
9667#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9668pub struct AccountPairSubstitutionPassConfig {
9669    /// Path to a per-source pair-PMF JSON (produced by
9670    /// `corpus_vs_synth_gap.py --emit-pair-pmf`). Aggregate-only; never
9671    /// contains row content or client identifiers.
9672    pub pmf_path: String,
9673    /// JEs whose dominant (debit, credit) pair has corpus probability ≥ this
9674    /// threshold are left alone (they're already plausible). Default `0.005`.
9675    #[serde(default)]
9676    pub rarity_threshold: Option<f64>,
9677    /// When substituting, draw from the top-K corpus pairs (weighted by
9678    /// probability). Default `10`.
9679    #[serde(default)]
9680    pub top_k: Option<usize>,
9681}
9682
9683/// Base anomaly rate configuration.
9684#[derive(Debug, Clone, Serialize, Deserialize)]
9685pub struct AnomalyRateConfig {
9686    /// Total anomaly rate (0.0 to 1.0).
9687    #[serde(default = "default_total_anomaly_rate")]
9688    pub total_rate: f64,
9689
9690    /// Fraud anomaly rate.
9691    #[serde(default = "default_fraud_anomaly_rate")]
9692    pub fraud_rate: f64,
9693
9694    /// Error anomaly rate.
9695    #[serde(default = "default_error_anomaly_rate")]
9696    pub error_rate: f64,
9697
9698    /// Process issue rate.
9699    #[serde(default = "default_process_anomaly_rate")]
9700    pub process_rate: f64,
9701}
9702
9703fn default_total_anomaly_rate() -> f64 {
9704    0.03
9705}
9706fn default_fraud_anomaly_rate() -> f64 {
9707    0.01
9708}
9709fn default_error_anomaly_rate() -> f64 {
9710    0.015
9711}
9712fn default_process_anomaly_rate() -> f64 {
9713    0.005
9714}
9715
9716impl Default for AnomalyRateConfig {
9717    fn default() -> Self {
9718        Self {
9719            total_rate: default_total_anomaly_rate(),
9720            fraud_rate: default_fraud_anomaly_rate(),
9721            error_rate: default_error_anomaly_rate(),
9722            process_rate: default_process_anomaly_rate(),
9723        }
9724    }
9725}
9726
9727/// Multi-stage fraud scheme configuration.
9728#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9729pub struct MultiStageSchemeConfig {
9730    /// Enable multi-stage fraud schemes.
9731    #[serde(default)]
9732    pub enabled: bool,
9733
9734    /// Embezzlement scheme configuration.
9735    #[serde(default)]
9736    pub embezzlement: EmbezzlementSchemeConfig,
9737
9738    /// Revenue manipulation scheme configuration.
9739    #[serde(default)]
9740    pub revenue_manipulation: RevenueManipulationSchemeConfig,
9741
9742    /// Vendor kickback scheme configuration.
9743    #[serde(default)]
9744    pub kickback: KickbackSchemeConfig,
9745}
9746
9747/// Embezzlement scheme configuration.
9748#[derive(Debug, Clone, Serialize, Deserialize)]
9749pub struct EmbezzlementSchemeConfig {
9750    /// Probability of starting an embezzlement scheme per perpetrator per year.
9751    #[serde(default = "default_embezzlement_probability")]
9752    pub probability: f64,
9753
9754    /// Testing stage configuration.
9755    #[serde(default)]
9756    pub testing_stage: SchemeStageConfig,
9757
9758    /// Escalation stage configuration.
9759    #[serde(default)]
9760    pub escalation_stage: SchemeStageConfig,
9761
9762    /// Acceleration stage configuration.
9763    #[serde(default)]
9764    pub acceleration_stage: SchemeStageConfig,
9765
9766    /// Desperation stage configuration.
9767    #[serde(default)]
9768    pub desperation_stage: SchemeStageConfig,
9769}
9770
9771fn default_embezzlement_probability() -> f64 {
9772    0.02
9773}
9774
9775impl Default for EmbezzlementSchemeConfig {
9776    fn default() -> Self {
9777        Self {
9778            probability: default_embezzlement_probability(),
9779            testing_stage: SchemeStageConfig {
9780                duration_months: 2,
9781                amount_min: 100.0,
9782                amount_max: 500.0,
9783                transaction_count_min: 2,
9784                transaction_count_max: 5,
9785                difficulty: "hard".to_string(),
9786            },
9787            escalation_stage: SchemeStageConfig {
9788                duration_months: 6,
9789                amount_min: 500.0,
9790                amount_max: 2000.0,
9791                transaction_count_min: 3,
9792                transaction_count_max: 8,
9793                difficulty: "moderate".to_string(),
9794            },
9795            acceleration_stage: SchemeStageConfig {
9796                duration_months: 3,
9797                amount_min: 2000.0,
9798                amount_max: 10000.0,
9799                transaction_count_min: 5,
9800                transaction_count_max: 12,
9801                difficulty: "easy".to_string(),
9802            },
9803            desperation_stage: SchemeStageConfig {
9804                duration_months: 1,
9805                amount_min: 10000.0,
9806                amount_max: 50000.0,
9807                transaction_count_min: 3,
9808                transaction_count_max: 6,
9809                difficulty: "trivial".to_string(),
9810            },
9811        }
9812    }
9813}
9814
9815/// Revenue manipulation scheme configuration.
9816#[derive(Debug, Clone, Serialize, Deserialize)]
9817pub struct RevenueManipulationSchemeConfig {
9818    /// Probability of starting a revenue manipulation scheme per period.
9819    #[serde(default = "default_revenue_manipulation_probability")]
9820    pub probability: f64,
9821
9822    /// Early revenue recognition inflation target (Q4).
9823    #[serde(default = "default_early_recognition_target")]
9824    pub early_recognition_target: f64,
9825
9826    /// Expense deferral inflation target (Q1).
9827    #[serde(default = "default_expense_deferral_target")]
9828    pub expense_deferral_target: f64,
9829
9830    /// Reserve release inflation target (Q2).
9831    #[serde(default = "default_reserve_release_target")]
9832    pub reserve_release_target: f64,
9833
9834    /// Channel stuffing inflation target (Q4).
9835    #[serde(default = "default_channel_stuffing_target")]
9836    pub channel_stuffing_target: f64,
9837}
9838
9839fn default_revenue_manipulation_probability() -> f64 {
9840    0.01
9841}
9842fn default_early_recognition_target() -> f64 {
9843    0.02
9844}
9845fn default_expense_deferral_target() -> f64 {
9846    0.03
9847}
9848fn default_reserve_release_target() -> f64 {
9849    0.02
9850}
9851fn default_channel_stuffing_target() -> f64 {
9852    0.05
9853}
9854
9855impl Default for RevenueManipulationSchemeConfig {
9856    fn default() -> Self {
9857        Self {
9858            probability: default_revenue_manipulation_probability(),
9859            early_recognition_target: default_early_recognition_target(),
9860            expense_deferral_target: default_expense_deferral_target(),
9861            reserve_release_target: default_reserve_release_target(),
9862            channel_stuffing_target: default_channel_stuffing_target(),
9863        }
9864    }
9865}
9866
9867/// Vendor kickback scheme configuration.
9868#[derive(Debug, Clone, Serialize, Deserialize)]
9869pub struct KickbackSchemeConfig {
9870    /// Probability of starting a kickback scheme.
9871    #[serde(default = "default_kickback_probability")]
9872    pub probability: f64,
9873
9874    /// Minimum price inflation percentage.
9875    #[serde(default = "default_kickback_inflation_min")]
9876    pub inflation_min: f64,
9877
9878    /// Maximum price inflation percentage.
9879    #[serde(default = "default_kickback_inflation_max")]
9880    pub inflation_max: f64,
9881
9882    /// Kickback percentage (of inflation).
9883    #[serde(default = "default_kickback_percent")]
9884    pub kickback_percent: f64,
9885
9886    /// Setup duration in months.
9887    #[serde(default = "default_kickback_setup_months")]
9888    pub setup_months: u32,
9889
9890    /// Main operation duration in months.
9891    #[serde(default = "default_kickback_operation_months")]
9892    pub operation_months: u32,
9893}
9894
9895fn default_kickback_probability() -> f64 {
9896    0.01
9897}
9898fn default_kickback_inflation_min() -> f64 {
9899    0.10
9900}
9901fn default_kickback_inflation_max() -> f64 {
9902    0.25
9903}
9904fn default_kickback_percent() -> f64 {
9905    0.50
9906}
9907fn default_kickback_setup_months() -> u32 {
9908    3
9909}
9910fn default_kickback_operation_months() -> u32 {
9911    12
9912}
9913
9914impl Default for KickbackSchemeConfig {
9915    fn default() -> Self {
9916        Self {
9917            probability: default_kickback_probability(),
9918            inflation_min: default_kickback_inflation_min(),
9919            inflation_max: default_kickback_inflation_max(),
9920            kickback_percent: default_kickback_percent(),
9921            setup_months: default_kickback_setup_months(),
9922            operation_months: default_kickback_operation_months(),
9923        }
9924    }
9925}
9926
9927/// Individual scheme stage configuration.
9928#[derive(Debug, Clone, Serialize, Deserialize)]
9929pub struct SchemeStageConfig {
9930    /// Duration in months.
9931    pub duration_months: u32,
9932
9933    /// Minimum transaction amount.
9934    pub amount_min: f64,
9935
9936    /// Maximum transaction amount.
9937    pub amount_max: f64,
9938
9939    /// Minimum number of transactions.
9940    pub transaction_count_min: u32,
9941
9942    /// Maximum number of transactions.
9943    pub transaction_count_max: u32,
9944
9945    /// Detection difficulty level (trivial, easy, moderate, hard, expert).
9946    pub difficulty: String,
9947}
9948
9949impl Default for SchemeStageConfig {
9950    fn default() -> Self {
9951        Self {
9952            duration_months: 3,
9953            amount_min: 100.0,
9954            amount_max: 1000.0,
9955            transaction_count_min: 2,
9956            transaction_count_max: 10,
9957            difficulty: "moderate".to_string(),
9958        }
9959    }
9960}
9961
9962/// Correlated anomaly injection configuration.
9963#[derive(Debug, Clone, Serialize, Deserialize)]
9964pub struct CorrelatedInjectionConfig {
9965    /// Enable correlated anomaly injection.
9966    #[serde(default)]
9967    pub enabled: bool,
9968
9969    /// Enable fraud concealment co-occurrence patterns.
9970    #[serde(default = "default_true_val")]
9971    pub fraud_concealment: bool,
9972
9973    /// Enable error cascade patterns.
9974    #[serde(default = "default_true_val")]
9975    pub error_cascade: bool,
9976
9977    /// Enable temporal clustering (period-end spikes).
9978    #[serde(default = "default_true_val")]
9979    pub temporal_clustering: bool,
9980
9981    /// Temporal clustering configuration.
9982    #[serde(default)]
9983    pub temporal_clustering_config: TemporalClusteringConfig,
9984
9985    /// Co-occurrence patterns.
9986    #[serde(default)]
9987    pub co_occurrence_patterns: Vec<CoOccurrencePatternConfig>,
9988}
9989
9990impl Default for CorrelatedInjectionConfig {
9991    fn default() -> Self {
9992        Self {
9993            enabled: false,
9994            fraud_concealment: true,
9995            error_cascade: true,
9996            temporal_clustering: true,
9997            temporal_clustering_config: TemporalClusteringConfig::default(),
9998            co_occurrence_patterns: Vec::new(),
9999        }
10000    }
10001}
10002
10003/// Temporal clustering configuration.
10004#[derive(Debug, Clone, Serialize, Deserialize)]
10005pub struct TemporalClusteringConfig {
10006    /// Period-end error multiplier.
10007    #[serde(default = "default_period_end_multiplier")]
10008    pub period_end_multiplier: f64,
10009
10010    /// Number of business days before period end to apply multiplier.
10011    #[serde(default = "default_period_end_days")]
10012    pub period_end_days: u32,
10013
10014    /// Quarter-end additional multiplier.
10015    #[serde(default = "default_quarter_end_multiplier")]
10016    pub quarter_end_multiplier: f64,
10017
10018    /// Year-end additional multiplier.
10019    #[serde(default = "default_year_end_multiplier")]
10020    pub year_end_multiplier: f64,
10021}
10022
10023fn default_period_end_multiplier() -> f64 {
10024    2.5
10025}
10026fn default_period_end_days() -> u32 {
10027    5
10028}
10029fn default_quarter_end_multiplier() -> f64 {
10030    1.5
10031}
10032fn default_year_end_multiplier() -> f64 {
10033    2.0
10034}
10035
10036impl Default for TemporalClusteringConfig {
10037    fn default() -> Self {
10038        Self {
10039            period_end_multiplier: default_period_end_multiplier(),
10040            period_end_days: default_period_end_days(),
10041            quarter_end_multiplier: default_quarter_end_multiplier(),
10042            year_end_multiplier: default_year_end_multiplier(),
10043        }
10044    }
10045}
10046
10047/// Co-occurrence pattern configuration.
10048#[derive(Debug, Clone, Serialize, Deserialize)]
10049pub struct CoOccurrencePatternConfig {
10050    /// Pattern name.
10051    pub name: String,
10052
10053    /// Primary anomaly type that triggers the pattern.
10054    pub primary_type: String,
10055
10056    /// Correlated anomalies.
10057    pub correlated: Vec<CorrelatedAnomalyConfig>,
10058}
10059
10060/// Correlated anomaly configuration.
10061#[derive(Debug, Clone, Serialize, Deserialize)]
10062pub struct CorrelatedAnomalyConfig {
10063    /// Anomaly type.
10064    pub anomaly_type: String,
10065
10066    /// Probability of occurrence (0.0 to 1.0).
10067    pub probability: f64,
10068
10069    /// Minimum lag in days.
10070    pub lag_days_min: i32,
10071
10072    /// Maximum lag in days.
10073    pub lag_days_max: i32,
10074}
10075
10076/// Near-miss generation configuration.
10077#[derive(Debug, Clone, Serialize, Deserialize)]
10078pub struct NearMissConfig {
10079    /// Enable near-miss generation.
10080    #[serde(default)]
10081    pub enabled: bool,
10082
10083    /// Proportion of "anomalies" that are actually near-misses (0.0 to 1.0).
10084    #[serde(default = "default_near_miss_proportion")]
10085    pub proportion: f64,
10086
10087    /// Enable near-duplicate pattern.
10088    #[serde(default = "default_true_val")]
10089    pub near_duplicate: bool,
10090
10091    /// Near-duplicate date difference range in days.
10092    #[serde(default)]
10093    pub near_duplicate_days: NearDuplicateDaysConfig,
10094
10095    /// Enable threshold proximity pattern.
10096    #[serde(default = "default_true_val")]
10097    pub threshold_proximity: bool,
10098
10099    /// Threshold proximity range (e.g., 0.90-0.99 of threshold).
10100    #[serde(default)]
10101    pub threshold_proximity_range: ThresholdProximityRangeConfig,
10102
10103    /// Enable unusual but legitimate patterns.
10104    #[serde(default = "default_true_val")]
10105    pub unusual_legitimate: bool,
10106
10107    /// Types of unusual legitimate patterns to generate.
10108    #[serde(default = "default_unusual_legitimate_types")]
10109    pub unusual_legitimate_types: Vec<String>,
10110
10111    /// Enable corrected error patterns.
10112    #[serde(default = "default_true_val")]
10113    pub corrected_errors: bool,
10114
10115    /// Corrected error correction lag range in days.
10116    #[serde(default)]
10117    pub corrected_error_lag: CorrectedErrorLagConfig,
10118}
10119
10120fn default_near_miss_proportion() -> f64 {
10121    0.30
10122}
10123
10124fn default_unusual_legitimate_types() -> Vec<String> {
10125    vec![
10126        "year_end_bonus".to_string(),
10127        "contract_prepayment".to_string(),
10128        "insurance_claim".to_string(),
10129        "settlement_payment".to_string(),
10130    ]
10131}
10132
10133impl Default for NearMissConfig {
10134    fn default() -> Self {
10135        Self {
10136            enabled: false,
10137            proportion: default_near_miss_proportion(),
10138            near_duplicate: true,
10139            near_duplicate_days: NearDuplicateDaysConfig::default(),
10140            threshold_proximity: true,
10141            threshold_proximity_range: ThresholdProximityRangeConfig::default(),
10142            unusual_legitimate: true,
10143            unusual_legitimate_types: default_unusual_legitimate_types(),
10144            corrected_errors: true,
10145            corrected_error_lag: CorrectedErrorLagConfig::default(),
10146        }
10147    }
10148}
10149
10150/// Near-duplicate days configuration.
10151#[derive(Debug, Clone, Serialize, Deserialize)]
10152pub struct NearDuplicateDaysConfig {
10153    /// Minimum days apart.
10154    #[serde(default = "default_near_duplicate_min")]
10155    pub min: u32,
10156
10157    /// Maximum days apart.
10158    #[serde(default = "default_near_duplicate_max")]
10159    pub max: u32,
10160}
10161
10162fn default_near_duplicate_min() -> u32 {
10163    1
10164}
10165fn default_near_duplicate_max() -> u32 {
10166    3
10167}
10168
10169impl Default for NearDuplicateDaysConfig {
10170    fn default() -> Self {
10171        Self {
10172            min: default_near_duplicate_min(),
10173            max: default_near_duplicate_max(),
10174        }
10175    }
10176}
10177
10178/// Threshold proximity range configuration.
10179#[derive(Debug, Clone, Serialize, Deserialize)]
10180pub struct ThresholdProximityRangeConfig {
10181    /// Minimum proximity (e.g., 0.90 = 90% of threshold).
10182    #[serde(default = "default_threshold_proximity_min")]
10183    pub min: f64,
10184
10185    /// Maximum proximity (e.g., 0.99 = 99% of threshold).
10186    #[serde(default = "default_threshold_proximity_max")]
10187    pub max: f64,
10188}
10189
10190fn default_threshold_proximity_min() -> f64 {
10191    0.90
10192}
10193fn default_threshold_proximity_max() -> f64 {
10194    0.99
10195}
10196
10197impl Default for ThresholdProximityRangeConfig {
10198    fn default() -> Self {
10199        Self {
10200            min: default_threshold_proximity_min(),
10201            max: default_threshold_proximity_max(),
10202        }
10203    }
10204}
10205
10206/// Corrected error lag configuration.
10207#[derive(Debug, Clone, Serialize, Deserialize)]
10208pub struct CorrectedErrorLagConfig {
10209    /// Minimum correction lag in days.
10210    #[serde(default = "default_corrected_error_lag_min")]
10211    pub min: u32,
10212
10213    /// Maximum correction lag in days.
10214    #[serde(default = "default_corrected_error_lag_max")]
10215    pub max: u32,
10216}
10217
10218fn default_corrected_error_lag_min() -> u32 {
10219    1
10220}
10221fn default_corrected_error_lag_max() -> u32 {
10222    5
10223}
10224
10225impl Default for CorrectedErrorLagConfig {
10226    fn default() -> Self {
10227        Self {
10228            min: default_corrected_error_lag_min(),
10229            max: default_corrected_error_lag_max(),
10230        }
10231    }
10232}
10233
10234/// Detection difficulty classification configuration.
10235#[derive(Debug, Clone, Serialize, Deserialize)]
10236pub struct DifficultyClassificationConfig {
10237    /// Enable detection difficulty classification.
10238    #[serde(default)]
10239    pub enabled: bool,
10240
10241    /// Target distribution of difficulty levels.
10242    #[serde(default)]
10243    pub target_distribution: DifficultyDistributionConfig,
10244}
10245
10246impl Default for DifficultyClassificationConfig {
10247    fn default() -> Self {
10248        Self {
10249            enabled: true,
10250            target_distribution: DifficultyDistributionConfig::default(),
10251        }
10252    }
10253}
10254
10255/// Target distribution of detection difficulty levels.
10256#[derive(Debug, Clone, Serialize, Deserialize)]
10257pub struct DifficultyDistributionConfig {
10258    /// Proportion of trivial anomalies (expected 99% detection).
10259    #[serde(default = "default_difficulty_trivial")]
10260    pub trivial: f64,
10261
10262    /// Proportion of easy anomalies (expected 90% detection).
10263    #[serde(default = "default_difficulty_easy")]
10264    pub easy: f64,
10265
10266    /// Proportion of moderate anomalies (expected 70% detection).
10267    #[serde(default = "default_difficulty_moderate")]
10268    pub moderate: f64,
10269
10270    /// Proportion of hard anomalies (expected 40% detection).
10271    #[serde(default = "default_difficulty_hard")]
10272    pub hard: f64,
10273
10274    /// Proportion of expert anomalies (expected 15% detection).
10275    #[serde(default = "default_difficulty_expert")]
10276    pub expert: f64,
10277}
10278
10279fn default_difficulty_trivial() -> f64 {
10280    0.15
10281}
10282fn default_difficulty_easy() -> f64 {
10283    0.25
10284}
10285fn default_difficulty_moderate() -> f64 {
10286    0.30
10287}
10288fn default_difficulty_hard() -> f64 {
10289    0.20
10290}
10291fn default_difficulty_expert() -> f64 {
10292    0.10
10293}
10294
10295impl Default for DifficultyDistributionConfig {
10296    fn default() -> Self {
10297        Self {
10298            trivial: default_difficulty_trivial(),
10299            easy: default_difficulty_easy(),
10300            moderate: default_difficulty_moderate(),
10301            hard: default_difficulty_hard(),
10302            expert: default_difficulty_expert(),
10303        }
10304    }
10305}
10306
10307/// Context-aware injection configuration.
10308#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10309pub struct ContextAwareConfig {
10310    /// Enable context-aware injection.
10311    #[serde(default)]
10312    pub enabled: bool,
10313
10314    /// Vendor-specific anomaly rules.
10315    #[serde(default)]
10316    pub vendor_rules: VendorAnomalyRulesConfig,
10317
10318    /// Employee-specific anomaly rules.
10319    #[serde(default)]
10320    pub employee_rules: EmployeeAnomalyRulesConfig,
10321
10322    /// Account-specific anomaly rules.
10323    #[serde(default)]
10324    pub account_rules: AccountAnomalyRulesConfig,
10325
10326    /// Behavioral baseline configuration.
10327    #[serde(default)]
10328    pub behavioral_baseline: BehavioralBaselineConfig,
10329}
10330
10331/// Vendor-specific anomaly rules configuration.
10332#[derive(Debug, Clone, Serialize, Deserialize)]
10333pub struct VendorAnomalyRulesConfig {
10334    /// Error rate multiplier for new vendors (< threshold days).
10335    #[serde(default = "default_new_vendor_multiplier")]
10336    pub new_vendor_error_multiplier: f64,
10337
10338    /// Days threshold for "new" vendor classification.
10339    #[serde(default = "default_new_vendor_threshold")]
10340    pub new_vendor_threshold_days: u32,
10341
10342    /// Error rate multiplier for international vendors.
10343    #[serde(default = "default_international_multiplier")]
10344    pub international_error_multiplier: f64,
10345
10346    /// Strategic vendor anomaly types (may differ from general vendors).
10347    #[serde(default = "default_strategic_vendor_types")]
10348    pub strategic_vendor_anomaly_types: Vec<String>,
10349}
10350
10351fn default_new_vendor_multiplier() -> f64 {
10352    2.5
10353}
10354fn default_new_vendor_threshold() -> u32 {
10355    90
10356}
10357fn default_international_multiplier() -> f64 {
10358    1.5
10359}
10360fn default_strategic_vendor_types() -> Vec<String> {
10361    vec![
10362        "pricing_dispute".to_string(),
10363        "contract_violation".to_string(),
10364    ]
10365}
10366
10367impl Default for VendorAnomalyRulesConfig {
10368    fn default() -> Self {
10369        Self {
10370            new_vendor_error_multiplier: default_new_vendor_multiplier(),
10371            new_vendor_threshold_days: default_new_vendor_threshold(),
10372            international_error_multiplier: default_international_multiplier(),
10373            strategic_vendor_anomaly_types: default_strategic_vendor_types(),
10374        }
10375    }
10376}
10377
10378/// Employee-specific anomaly rules configuration.
10379#[derive(Debug, Clone, Serialize, Deserialize)]
10380pub struct EmployeeAnomalyRulesConfig {
10381    /// Error rate for new employees (< threshold days).
10382    #[serde(default = "default_new_employee_rate")]
10383    pub new_employee_error_rate: f64,
10384
10385    /// Days threshold for "new" employee classification.
10386    #[serde(default = "default_new_employee_threshold")]
10387    pub new_employee_threshold_days: u32,
10388
10389    /// Transaction volume threshold for fatigue errors.
10390    #[serde(default = "default_volume_fatigue_threshold")]
10391    pub volume_fatigue_threshold: u32,
10392
10393    /// Error rate multiplier when primary approver is absent.
10394    #[serde(default = "default_coverage_multiplier")]
10395    pub coverage_error_multiplier: f64,
10396}
10397
10398fn default_new_employee_rate() -> f64 {
10399    0.05
10400}
10401fn default_new_employee_threshold() -> u32 {
10402    180
10403}
10404fn default_volume_fatigue_threshold() -> u32 {
10405    50
10406}
10407fn default_coverage_multiplier() -> f64 {
10408    1.8
10409}
10410
10411impl Default for EmployeeAnomalyRulesConfig {
10412    fn default() -> Self {
10413        Self {
10414            new_employee_error_rate: default_new_employee_rate(),
10415            new_employee_threshold_days: default_new_employee_threshold(),
10416            volume_fatigue_threshold: default_volume_fatigue_threshold(),
10417            coverage_error_multiplier: default_coverage_multiplier(),
10418        }
10419    }
10420}
10421
10422/// Account-specific anomaly rules configuration.
10423#[derive(Debug, Clone, Serialize, Deserialize)]
10424pub struct AccountAnomalyRulesConfig {
10425    /// Error rate multiplier for high-risk accounts.
10426    #[serde(default = "default_high_risk_multiplier")]
10427    pub high_risk_account_multiplier: f64,
10428
10429    /// Account codes considered high-risk.
10430    #[serde(default = "default_high_risk_accounts")]
10431    pub high_risk_accounts: Vec<String>,
10432
10433    /// Error rate multiplier for suspense accounts.
10434    #[serde(default = "default_suspense_multiplier")]
10435    pub suspense_account_multiplier: f64,
10436
10437    /// Account codes considered suspense accounts.
10438    #[serde(default = "default_suspense_accounts")]
10439    pub suspense_accounts: Vec<String>,
10440
10441    /// Error rate multiplier for intercompany accounts.
10442    #[serde(default = "default_intercompany_multiplier")]
10443    pub intercompany_account_multiplier: f64,
10444}
10445
10446fn default_high_risk_multiplier() -> f64 {
10447    2.0
10448}
10449fn default_high_risk_accounts() -> Vec<String> {
10450    vec![
10451        "1100".to_string(), // AR Control
10452        "2000".to_string(), // AP Control
10453        "3000".to_string(), // Cash
10454    ]
10455}
10456fn default_suspense_multiplier() -> f64 {
10457    3.0
10458}
10459fn default_suspense_accounts() -> Vec<String> {
10460    vec!["9999".to_string(), "9998".to_string()]
10461}
10462fn default_intercompany_multiplier() -> f64 {
10463    1.5
10464}
10465
10466impl Default for AccountAnomalyRulesConfig {
10467    fn default() -> Self {
10468        Self {
10469            high_risk_account_multiplier: default_high_risk_multiplier(),
10470            high_risk_accounts: default_high_risk_accounts(),
10471            suspense_account_multiplier: default_suspense_multiplier(),
10472            suspense_accounts: default_suspense_accounts(),
10473            intercompany_account_multiplier: default_intercompany_multiplier(),
10474        }
10475    }
10476}
10477
10478/// Behavioral baseline configuration.
10479#[derive(Debug, Clone, Serialize, Deserialize)]
10480pub struct BehavioralBaselineConfig {
10481    /// Enable behavioral baseline tracking.
10482    #[serde(default)]
10483    pub enabled: bool,
10484
10485    /// Number of days to build baseline from.
10486    #[serde(default = "default_baseline_period")]
10487    pub baseline_period_days: u32,
10488
10489    /// Standard deviation threshold for amount anomalies.
10490    #[serde(default = "default_deviation_threshold")]
10491    pub deviation_threshold_std: f64,
10492
10493    /// Standard deviation threshold for frequency anomalies.
10494    #[serde(default = "default_frequency_deviation")]
10495    pub frequency_deviation_threshold: f64,
10496}
10497
10498fn default_baseline_period() -> u32 {
10499    90
10500}
10501fn default_deviation_threshold() -> f64 {
10502    3.0
10503}
10504fn default_frequency_deviation() -> f64 {
10505    2.0
10506}
10507
10508impl Default for BehavioralBaselineConfig {
10509    fn default() -> Self {
10510        Self {
10511            enabled: false,
10512            baseline_period_days: default_baseline_period(),
10513            deviation_threshold_std: default_deviation_threshold(),
10514            frequency_deviation_threshold: default_frequency_deviation(),
10515        }
10516    }
10517}
10518
10519/// Enhanced labeling configuration.
10520#[derive(Debug, Clone, Serialize, Deserialize)]
10521pub struct EnhancedLabelingConfig {
10522    /// Enable severity scoring.
10523    #[serde(default = "default_true_val")]
10524    pub severity_scoring: bool,
10525
10526    /// Enable difficulty classification.
10527    #[serde(default = "default_true_val")]
10528    pub difficulty_classification: bool,
10529
10530    /// Materiality thresholds for severity classification.
10531    #[serde(default)]
10532    pub materiality_thresholds: MaterialityThresholdsConfig,
10533}
10534
10535impl Default for EnhancedLabelingConfig {
10536    fn default() -> Self {
10537        Self {
10538            severity_scoring: true,
10539            difficulty_classification: true,
10540            materiality_thresholds: MaterialityThresholdsConfig::default(),
10541        }
10542    }
10543}
10544
10545/// Materiality thresholds configuration.
10546#[derive(Debug, Clone, Serialize, Deserialize)]
10547pub struct MaterialityThresholdsConfig {
10548    /// Threshold for trivial impact (as percentage of total).
10549    #[serde(default = "default_materiality_trivial")]
10550    pub trivial: f64,
10551
10552    /// Threshold for immaterial impact.
10553    #[serde(default = "default_materiality_immaterial")]
10554    pub immaterial: f64,
10555
10556    /// Threshold for material impact.
10557    #[serde(default = "default_materiality_material")]
10558    pub material: f64,
10559
10560    /// Threshold for highly material impact.
10561    #[serde(default = "default_materiality_highly_material")]
10562    pub highly_material: f64,
10563}
10564
10565fn default_materiality_trivial() -> f64 {
10566    0.001
10567}
10568fn default_materiality_immaterial() -> f64 {
10569    0.01
10570}
10571fn default_materiality_material() -> f64 {
10572    0.05
10573}
10574fn default_materiality_highly_material() -> f64 {
10575    0.10
10576}
10577
10578impl Default for MaterialityThresholdsConfig {
10579    fn default() -> Self {
10580        Self {
10581            trivial: default_materiality_trivial(),
10582            immaterial: default_materiality_immaterial(),
10583            material: default_materiality_material(),
10584            highly_material: default_materiality_highly_material(),
10585        }
10586    }
10587}
10588
10589// =============================================================================
10590// Industry-Specific Configuration
10591// =============================================================================
10592
10593/// Industry-specific transaction and anomaly generation configuration.
10594///
10595/// This configuration enables generation of industry-authentic:
10596/// - Transaction types with appropriate terminology
10597/// - Master data (BOM, routings, clinical codes, etc.)
10598/// - Industry-specific anomaly patterns
10599/// - Regulatory framework compliance
10600#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10601pub struct IndustrySpecificConfig {
10602    /// Enable industry-specific generation.
10603    #[serde(default)]
10604    pub enabled: bool,
10605
10606    /// Manufacturing industry settings.
10607    #[serde(default)]
10608    pub manufacturing: ManufacturingConfig,
10609
10610    /// Retail industry settings.
10611    #[serde(default)]
10612    pub retail: RetailConfig,
10613
10614    /// Healthcare industry settings.
10615    #[serde(default)]
10616    pub healthcare: HealthcareConfig,
10617
10618    /// Technology industry settings.
10619    #[serde(default)]
10620    pub technology: TechnologyConfig,
10621
10622    /// Financial services industry settings.
10623    #[serde(default)]
10624    pub financial_services: FinancialServicesConfig,
10625
10626    /// Professional services industry settings.
10627    #[serde(default)]
10628    pub professional_services: ProfessionalServicesConfig,
10629}
10630
10631/// Manufacturing industry configuration.
10632#[derive(Debug, Clone, Serialize, Deserialize)]
10633pub struct ManufacturingConfig {
10634    /// Enable manufacturing-specific generation.
10635    #[serde(default)]
10636    pub enabled: bool,
10637
10638    /// Bill of Materials depth (typical: 3-7).
10639    #[serde(default = "default_bom_depth")]
10640    pub bom_depth: u32,
10641
10642    /// Whether to use just-in-time inventory.
10643    #[serde(default)]
10644    pub just_in_time: bool,
10645
10646    /// Production order types to generate.
10647    #[serde(default = "default_production_order_types")]
10648    pub production_order_types: Vec<String>,
10649
10650    /// Quality framework (ISO_9001, Six_Sigma, etc.).
10651    #[serde(default)]
10652    pub quality_framework: Option<String>,
10653
10654    /// Number of supplier tiers to model (1-3).
10655    #[serde(default = "default_supplier_tiers")]
10656    pub supplier_tiers: u32,
10657
10658    /// Standard cost update frequency.
10659    #[serde(default = "default_cost_frequency")]
10660    pub standard_cost_frequency: String,
10661
10662    /// Target yield rate (0.95-0.99 typical).
10663    #[serde(default = "default_yield_rate")]
10664    pub target_yield_rate: f64,
10665
10666    /// Scrap percentage threshold for alerts.
10667    #[serde(default = "default_scrap_threshold")]
10668    pub scrap_alert_threshold: f64,
10669
10670    /// Manufacturing anomaly injection rates.
10671    #[serde(default)]
10672    pub anomaly_rates: ManufacturingAnomalyRates,
10673
10674    /// Cost accounting configuration (WIP → FG → COGS pipeline).
10675    #[serde(default)]
10676    pub cost_accounting: ManufacturingCostAccountingConfig,
10677}
10678
10679/// Configuration for manufacturing cost accounting JE generation.
10680#[derive(Debug, Clone, Serialize, Deserialize)]
10681pub struct ManufacturingCostAccountingConfig {
10682    /// Enable multi-stage cost flow (WIP → FG → COGS) instead of flat JEs.
10683    #[serde(default = "default_true")]
10684    pub enabled: bool,
10685
10686    /// Generate standard cost variance JEs.
10687    #[serde(default = "default_true")]
10688    pub variance_accounts_enabled: bool,
10689
10690    /// Generate warranty provisions from quality inspection failures.
10691    #[serde(default = "default_true")]
10692    pub warranty_provisions_enabled: bool,
10693
10694    /// Minimum defect rate (0.0-1.0) to trigger warranty provision generation.
10695    #[serde(default = "default_warranty_defect_threshold")]
10696    pub warranty_defect_threshold: f64,
10697}
10698
10699fn default_warranty_defect_threshold() -> f64 {
10700    0.01
10701}
10702
10703impl Default for ManufacturingCostAccountingConfig {
10704    fn default() -> Self {
10705        Self {
10706            enabled: true,
10707            variance_accounts_enabled: true,
10708            warranty_provisions_enabled: true,
10709            warranty_defect_threshold: 0.01,
10710        }
10711    }
10712}
10713
10714fn default_bom_depth() -> u32 {
10715    4
10716}
10717
10718fn default_production_order_types() -> Vec<String> {
10719    vec![
10720        "standard".to_string(),
10721        "rework".to_string(),
10722        "prototype".to_string(),
10723    ]
10724}
10725
10726fn default_supplier_tiers() -> u32 {
10727    2
10728}
10729
10730fn default_cost_frequency() -> String {
10731    "quarterly".to_string()
10732}
10733
10734fn default_yield_rate() -> f64 {
10735    0.97
10736}
10737
10738fn default_scrap_threshold() -> f64 {
10739    0.03
10740}
10741
10742impl Default for ManufacturingConfig {
10743    fn default() -> Self {
10744        Self {
10745            enabled: false,
10746            bom_depth: default_bom_depth(),
10747            just_in_time: false,
10748            production_order_types: default_production_order_types(),
10749            quality_framework: Some("ISO_9001".to_string()),
10750            supplier_tiers: default_supplier_tiers(),
10751            standard_cost_frequency: default_cost_frequency(),
10752            target_yield_rate: default_yield_rate(),
10753            scrap_alert_threshold: default_scrap_threshold(),
10754            anomaly_rates: ManufacturingAnomalyRates::default(),
10755            cost_accounting: ManufacturingCostAccountingConfig::default(),
10756        }
10757    }
10758}
10759
10760/// Manufacturing anomaly injection rates.
10761#[derive(Debug, Clone, Serialize, Deserialize)]
10762pub struct ManufacturingAnomalyRates {
10763    /// Yield manipulation rate.
10764    #[serde(default = "default_mfg_yield_rate")]
10765    pub yield_manipulation: f64,
10766
10767    /// Labor misallocation rate.
10768    #[serde(default = "default_mfg_labor_rate")]
10769    pub labor_misallocation: f64,
10770
10771    /// Phantom production rate.
10772    #[serde(default = "default_mfg_phantom_rate")]
10773    pub phantom_production: f64,
10774
10775    /// Standard cost manipulation rate.
10776    #[serde(default = "default_mfg_cost_rate")]
10777    pub standard_cost_manipulation: f64,
10778
10779    /// Inventory fraud rate.
10780    #[serde(default = "default_mfg_inventory_rate")]
10781    pub inventory_fraud: f64,
10782}
10783
10784fn default_mfg_yield_rate() -> f64 {
10785    0.015
10786}
10787
10788fn default_mfg_labor_rate() -> f64 {
10789    0.02
10790}
10791
10792fn default_mfg_phantom_rate() -> f64 {
10793    0.005
10794}
10795
10796fn default_mfg_cost_rate() -> f64 {
10797    0.01
10798}
10799
10800fn default_mfg_inventory_rate() -> f64 {
10801    0.008
10802}
10803
10804impl Default for ManufacturingAnomalyRates {
10805    fn default() -> Self {
10806        Self {
10807            yield_manipulation: default_mfg_yield_rate(),
10808            labor_misallocation: default_mfg_labor_rate(),
10809            phantom_production: default_mfg_phantom_rate(),
10810            standard_cost_manipulation: default_mfg_cost_rate(),
10811            inventory_fraud: default_mfg_inventory_rate(),
10812        }
10813    }
10814}
10815
10816/// Retail industry configuration.
10817#[derive(Debug, Clone, Serialize, Deserialize)]
10818pub struct RetailConfig {
10819    /// Enable retail-specific generation.
10820    #[serde(default)]
10821    pub enabled: bool,
10822
10823    /// Store type distribution.
10824    #[serde(default)]
10825    pub store_types: RetailStoreTypeConfig,
10826
10827    /// Average daily transactions per store.
10828    #[serde(default = "default_retail_daily_txns")]
10829    pub avg_daily_transactions: u32,
10830
10831    /// Enable loss prevention tracking.
10832    #[serde(default = "default_true")]
10833    pub loss_prevention: bool,
10834
10835    /// Shrinkage rate (0.01-0.03 typical).
10836    #[serde(default = "default_shrinkage_rate")]
10837    pub shrinkage_rate: f64,
10838
10839    /// Retail anomaly injection rates.
10840    #[serde(default)]
10841    pub anomaly_rates: RetailAnomalyRates,
10842}
10843
10844fn default_retail_daily_txns() -> u32 {
10845    500
10846}
10847
10848fn default_shrinkage_rate() -> f64 {
10849    0.015
10850}
10851
10852impl Default for RetailConfig {
10853    fn default() -> Self {
10854        Self {
10855            enabled: false,
10856            store_types: RetailStoreTypeConfig::default(),
10857            avg_daily_transactions: default_retail_daily_txns(),
10858            loss_prevention: true,
10859            shrinkage_rate: default_shrinkage_rate(),
10860            anomaly_rates: RetailAnomalyRates::default(),
10861        }
10862    }
10863}
10864
10865/// Retail store type distribution.
10866#[derive(Debug, Clone, Serialize, Deserialize)]
10867pub struct RetailStoreTypeConfig {
10868    /// Percentage of flagship stores.
10869    #[serde(default = "default_flagship_pct")]
10870    pub flagship: f64,
10871
10872    /// Percentage of regional stores.
10873    #[serde(default = "default_regional_pct")]
10874    pub regional: f64,
10875
10876    /// Percentage of outlet stores.
10877    #[serde(default = "default_outlet_pct")]
10878    pub outlet: f64,
10879
10880    /// Percentage of e-commerce.
10881    #[serde(default = "default_ecommerce_pct")]
10882    pub ecommerce: f64,
10883}
10884
10885fn default_flagship_pct() -> f64 {
10886    0.10
10887}
10888
10889fn default_regional_pct() -> f64 {
10890    0.50
10891}
10892
10893fn default_outlet_pct() -> f64 {
10894    0.25
10895}
10896
10897fn default_ecommerce_pct() -> f64 {
10898    0.15
10899}
10900
10901impl Default for RetailStoreTypeConfig {
10902    fn default() -> Self {
10903        Self {
10904            flagship: default_flagship_pct(),
10905            regional: default_regional_pct(),
10906            outlet: default_outlet_pct(),
10907            ecommerce: default_ecommerce_pct(),
10908        }
10909    }
10910}
10911
10912/// Retail anomaly injection rates.
10913#[derive(Debug, Clone, Serialize, Deserialize)]
10914pub struct RetailAnomalyRates {
10915    /// Sweethearting rate.
10916    #[serde(default = "default_sweethearting_rate")]
10917    pub sweethearting: f64,
10918
10919    /// Skimming rate.
10920    #[serde(default = "default_skimming_rate")]
10921    pub skimming: f64,
10922
10923    /// Refund fraud rate.
10924    #[serde(default = "default_refund_fraud_rate")]
10925    pub refund_fraud: f64,
10926
10927    /// Void abuse rate.
10928    #[serde(default = "default_void_abuse_rate")]
10929    pub void_abuse: f64,
10930
10931    /// Gift card fraud rate.
10932    #[serde(default = "default_gift_card_rate")]
10933    pub gift_card_fraud: f64,
10934
10935    /// Vendor kickback rate.
10936    #[serde(default = "default_retail_kickback_rate")]
10937    pub vendor_kickback: f64,
10938}
10939
10940fn default_sweethearting_rate() -> f64 {
10941    0.02
10942}
10943
10944fn default_skimming_rate() -> f64 {
10945    0.005
10946}
10947
10948fn default_refund_fraud_rate() -> f64 {
10949    0.015
10950}
10951
10952fn default_void_abuse_rate() -> f64 {
10953    0.01
10954}
10955
10956fn default_gift_card_rate() -> f64 {
10957    0.008
10958}
10959
10960fn default_retail_kickback_rate() -> f64 {
10961    0.003
10962}
10963
10964impl Default for RetailAnomalyRates {
10965    fn default() -> Self {
10966        Self {
10967            sweethearting: default_sweethearting_rate(),
10968            skimming: default_skimming_rate(),
10969            refund_fraud: default_refund_fraud_rate(),
10970            void_abuse: default_void_abuse_rate(),
10971            gift_card_fraud: default_gift_card_rate(),
10972            vendor_kickback: default_retail_kickback_rate(),
10973        }
10974    }
10975}
10976
10977/// Healthcare industry configuration.
10978#[derive(Debug, Clone, Serialize, Deserialize)]
10979pub struct HealthcareConfig {
10980    /// Enable healthcare-specific generation.
10981    #[serde(default)]
10982    pub enabled: bool,
10983
10984    /// Healthcare facility type.
10985    #[serde(default = "default_facility_type")]
10986    pub facility_type: String,
10987
10988    /// Payer mix distribution.
10989    #[serde(default)]
10990    pub payer_mix: HealthcarePayerMix,
10991
10992    /// Coding systems enabled.
10993    #[serde(default)]
10994    pub coding_systems: HealthcareCodingSystems,
10995
10996    /// Healthcare compliance settings.
10997    #[serde(default)]
10998    pub compliance: HealthcareComplianceConfig,
10999
11000    /// Average daily encounters.
11001    #[serde(default = "default_daily_encounters")]
11002    pub avg_daily_encounters: u32,
11003
11004    /// Average charges per encounter.
11005    #[serde(default = "default_charges_per_encounter")]
11006    pub avg_charges_per_encounter: u32,
11007
11008    /// Denial rate (0.0-1.0).
11009    #[serde(default = "default_hc_denial_rate")]
11010    pub denial_rate: f64,
11011
11012    /// Bad debt rate (0.0-1.0).
11013    #[serde(default = "default_hc_bad_debt_rate")]
11014    pub bad_debt_rate: f64,
11015
11016    /// Charity care rate (0.0-1.0).
11017    #[serde(default = "default_hc_charity_care_rate")]
11018    pub charity_care_rate: f64,
11019
11020    /// Healthcare anomaly injection rates.
11021    #[serde(default)]
11022    pub anomaly_rates: HealthcareAnomalyRates,
11023}
11024
11025fn default_facility_type() -> String {
11026    "hospital".to_string()
11027}
11028
11029fn default_daily_encounters() -> u32 {
11030    150
11031}
11032
11033fn default_charges_per_encounter() -> u32 {
11034    8
11035}
11036
11037fn default_hc_denial_rate() -> f64 {
11038    0.05
11039}
11040
11041fn default_hc_bad_debt_rate() -> f64 {
11042    0.03
11043}
11044
11045fn default_hc_charity_care_rate() -> f64 {
11046    0.02
11047}
11048
11049impl Default for HealthcareConfig {
11050    fn default() -> Self {
11051        Self {
11052            enabled: false,
11053            facility_type: default_facility_type(),
11054            payer_mix: HealthcarePayerMix::default(),
11055            coding_systems: HealthcareCodingSystems::default(),
11056            compliance: HealthcareComplianceConfig::default(),
11057            avg_daily_encounters: default_daily_encounters(),
11058            avg_charges_per_encounter: default_charges_per_encounter(),
11059            denial_rate: default_hc_denial_rate(),
11060            bad_debt_rate: default_hc_bad_debt_rate(),
11061            charity_care_rate: default_hc_charity_care_rate(),
11062            anomaly_rates: HealthcareAnomalyRates::default(),
11063        }
11064    }
11065}
11066
11067/// Healthcare payer mix distribution.
11068#[derive(Debug, Clone, Serialize, Deserialize)]
11069pub struct HealthcarePayerMix {
11070    /// Medicare percentage.
11071    #[serde(default = "default_medicare_pct")]
11072    pub medicare: f64,
11073
11074    /// Medicaid percentage.
11075    #[serde(default = "default_medicaid_pct")]
11076    pub medicaid: f64,
11077
11078    /// Commercial insurance percentage.
11079    #[serde(default = "default_commercial_pct")]
11080    pub commercial: f64,
11081
11082    /// Self-pay percentage.
11083    #[serde(default = "default_self_pay_pct")]
11084    pub self_pay: f64,
11085}
11086
11087fn default_medicare_pct() -> f64 {
11088    0.40
11089}
11090
11091fn default_medicaid_pct() -> f64 {
11092    0.20
11093}
11094
11095fn default_commercial_pct() -> f64 {
11096    0.30
11097}
11098
11099fn default_self_pay_pct() -> f64 {
11100    0.10
11101}
11102
11103impl Default for HealthcarePayerMix {
11104    fn default() -> Self {
11105        Self {
11106            medicare: default_medicare_pct(),
11107            medicaid: default_medicaid_pct(),
11108            commercial: default_commercial_pct(),
11109            self_pay: default_self_pay_pct(),
11110        }
11111    }
11112}
11113
11114/// Healthcare coding systems configuration.
11115#[derive(Debug, Clone, Serialize, Deserialize)]
11116pub struct HealthcareCodingSystems {
11117    /// Enable ICD-10 diagnosis coding.
11118    #[serde(default = "default_true")]
11119    pub icd10: bool,
11120
11121    /// Enable CPT procedure coding.
11122    #[serde(default = "default_true")]
11123    pub cpt: bool,
11124
11125    /// Enable DRG grouping.
11126    #[serde(default = "default_true")]
11127    pub drg: bool,
11128
11129    /// Enable HCPCS Level II coding.
11130    #[serde(default = "default_true")]
11131    pub hcpcs: bool,
11132
11133    /// Enable revenue codes.
11134    #[serde(default = "default_true")]
11135    pub revenue_codes: bool,
11136}
11137
11138impl Default for HealthcareCodingSystems {
11139    fn default() -> Self {
11140        Self {
11141            icd10: true,
11142            cpt: true,
11143            drg: true,
11144            hcpcs: true,
11145            revenue_codes: true,
11146        }
11147    }
11148}
11149
11150/// Healthcare compliance configuration.
11151#[derive(Debug, Clone, Serialize, Deserialize)]
11152pub struct HealthcareComplianceConfig {
11153    /// Enable HIPAA compliance.
11154    #[serde(default = "default_true")]
11155    pub hipaa: bool,
11156
11157    /// Enable Stark Law compliance.
11158    #[serde(default = "default_true")]
11159    pub stark_law: bool,
11160
11161    /// Enable Anti-Kickback Statute compliance.
11162    #[serde(default = "default_true")]
11163    pub anti_kickback: bool,
11164
11165    /// Enable False Claims Act compliance.
11166    #[serde(default = "default_true")]
11167    pub false_claims_act: bool,
11168
11169    /// Enable EMTALA compliance (for hospitals).
11170    #[serde(default = "default_true")]
11171    pub emtala: bool,
11172}
11173
11174impl Default for HealthcareComplianceConfig {
11175    fn default() -> Self {
11176        Self {
11177            hipaa: true,
11178            stark_law: true,
11179            anti_kickback: true,
11180            false_claims_act: true,
11181            emtala: true,
11182        }
11183    }
11184}
11185
11186/// Healthcare anomaly injection rates.
11187#[derive(Debug, Clone, Serialize, Deserialize)]
11188pub struct HealthcareAnomalyRates {
11189    /// Upcoding rate.
11190    #[serde(default = "default_upcoding_rate")]
11191    pub upcoding: f64,
11192
11193    /// Unbundling rate.
11194    #[serde(default = "default_unbundling_rate")]
11195    pub unbundling: f64,
11196
11197    /// Phantom billing rate.
11198    #[serde(default = "default_phantom_billing_rate")]
11199    pub phantom_billing: f64,
11200
11201    /// Kickback rate.
11202    #[serde(default = "default_healthcare_kickback_rate")]
11203    pub kickbacks: f64,
11204
11205    /// Duplicate billing rate.
11206    #[serde(default = "default_duplicate_billing_rate")]
11207    pub duplicate_billing: f64,
11208
11209    /// Medical necessity abuse rate.
11210    #[serde(default = "default_med_necessity_rate")]
11211    pub medical_necessity_abuse: f64,
11212}
11213
11214fn default_upcoding_rate() -> f64 {
11215    0.02
11216}
11217
11218fn default_unbundling_rate() -> f64 {
11219    0.015
11220}
11221
11222fn default_phantom_billing_rate() -> f64 {
11223    0.005
11224}
11225
11226fn default_healthcare_kickback_rate() -> f64 {
11227    0.003
11228}
11229
11230fn default_duplicate_billing_rate() -> f64 {
11231    0.008
11232}
11233
11234fn default_med_necessity_rate() -> f64 {
11235    0.01
11236}
11237
11238impl Default for HealthcareAnomalyRates {
11239    fn default() -> Self {
11240        Self {
11241            upcoding: default_upcoding_rate(),
11242            unbundling: default_unbundling_rate(),
11243            phantom_billing: default_phantom_billing_rate(),
11244            kickbacks: default_healthcare_kickback_rate(),
11245            duplicate_billing: default_duplicate_billing_rate(),
11246            medical_necessity_abuse: default_med_necessity_rate(),
11247        }
11248    }
11249}
11250
11251/// Technology industry configuration.
11252#[derive(Debug, Clone, Serialize, Deserialize)]
11253pub struct TechnologyConfig {
11254    /// Enable technology-specific generation.
11255    #[serde(default)]
11256    pub enabled: bool,
11257
11258    /// Revenue model type.
11259    #[serde(default = "default_revenue_model")]
11260    pub revenue_model: String,
11261
11262    /// Subscription revenue percentage (for SaaS).
11263    #[serde(default = "default_subscription_pct")]
11264    pub subscription_revenue_pct: f64,
11265
11266    /// License revenue percentage.
11267    #[serde(default = "default_license_pct")]
11268    pub license_revenue_pct: f64,
11269
11270    /// Services revenue percentage.
11271    #[serde(default = "default_services_pct")]
11272    pub services_revenue_pct: f64,
11273
11274    /// R&D capitalization settings.
11275    #[serde(default)]
11276    pub rd_capitalization: RdCapitalizationConfig,
11277
11278    /// Technology anomaly injection rates.
11279    #[serde(default)]
11280    pub anomaly_rates: TechnologyAnomalyRates,
11281}
11282
11283fn default_revenue_model() -> String {
11284    "saas".to_string()
11285}
11286
11287fn default_subscription_pct() -> f64 {
11288    0.60
11289}
11290
11291fn default_license_pct() -> f64 {
11292    0.25
11293}
11294
11295fn default_services_pct() -> f64 {
11296    0.15
11297}
11298
11299impl Default for TechnologyConfig {
11300    fn default() -> Self {
11301        Self {
11302            enabled: false,
11303            revenue_model: default_revenue_model(),
11304            subscription_revenue_pct: default_subscription_pct(),
11305            license_revenue_pct: default_license_pct(),
11306            services_revenue_pct: default_services_pct(),
11307            rd_capitalization: RdCapitalizationConfig::default(),
11308            anomaly_rates: TechnologyAnomalyRates::default(),
11309        }
11310    }
11311}
11312
11313/// R&D capitalization configuration.
11314#[derive(Debug, Clone, Serialize, Deserialize)]
11315pub struct RdCapitalizationConfig {
11316    /// Enable R&D capitalization.
11317    #[serde(default = "default_true")]
11318    pub enabled: bool,
11319
11320    /// Capitalization rate (0.0-1.0).
11321    #[serde(default = "default_cap_rate")]
11322    pub capitalization_rate: f64,
11323
11324    /// Useful life in years.
11325    #[serde(default = "default_useful_life")]
11326    pub useful_life_years: u32,
11327}
11328
11329fn default_cap_rate() -> f64 {
11330    0.30
11331}
11332
11333fn default_useful_life() -> u32 {
11334    3
11335}
11336
11337impl Default for RdCapitalizationConfig {
11338    fn default() -> Self {
11339        Self {
11340            enabled: true,
11341            capitalization_rate: default_cap_rate(),
11342            useful_life_years: default_useful_life(),
11343        }
11344    }
11345}
11346
11347/// Technology anomaly injection rates.
11348#[derive(Debug, Clone, Serialize, Deserialize)]
11349pub struct TechnologyAnomalyRates {
11350    /// Premature revenue recognition rate.
11351    #[serde(default = "default_premature_rev_rate")]
11352    pub premature_revenue: f64,
11353
11354    /// Side letter abuse rate.
11355    #[serde(default = "default_side_letter_rate")]
11356    pub side_letter_abuse: f64,
11357
11358    /// Channel stuffing rate.
11359    #[serde(default = "default_channel_stuffing_rate")]
11360    pub channel_stuffing: f64,
11361
11362    /// Improper capitalization rate.
11363    #[serde(default = "default_improper_cap_rate")]
11364    pub improper_capitalization: f64,
11365}
11366
11367fn default_premature_rev_rate() -> f64 {
11368    0.015
11369}
11370
11371fn default_side_letter_rate() -> f64 {
11372    0.008
11373}
11374
11375fn default_channel_stuffing_rate() -> f64 {
11376    0.01
11377}
11378
11379fn default_improper_cap_rate() -> f64 {
11380    0.012
11381}
11382
11383impl Default for TechnologyAnomalyRates {
11384    fn default() -> Self {
11385        Self {
11386            premature_revenue: default_premature_rev_rate(),
11387            side_letter_abuse: default_side_letter_rate(),
11388            channel_stuffing: default_channel_stuffing_rate(),
11389            improper_capitalization: default_improper_cap_rate(),
11390        }
11391    }
11392}
11393
11394/// Financial services industry configuration.
11395#[derive(Debug, Clone, Serialize, Deserialize)]
11396pub struct FinancialServicesConfig {
11397    /// Enable financial services-specific generation.
11398    #[serde(default)]
11399    pub enabled: bool,
11400
11401    /// Financial institution type.
11402    #[serde(default = "default_fi_type")]
11403    pub institution_type: String,
11404
11405    /// Regulatory framework.
11406    #[serde(default = "default_fi_regulatory")]
11407    pub regulatory_framework: String,
11408
11409    /// Financial services anomaly injection rates.
11410    #[serde(default)]
11411    pub anomaly_rates: FinancialServicesAnomalyRates,
11412}
11413
11414fn default_fi_type() -> String {
11415    "commercial_bank".to_string()
11416}
11417
11418fn default_fi_regulatory() -> String {
11419    "us_banking".to_string()
11420}
11421
11422impl Default for FinancialServicesConfig {
11423    fn default() -> Self {
11424        Self {
11425            enabled: false,
11426            institution_type: default_fi_type(),
11427            regulatory_framework: default_fi_regulatory(),
11428            anomaly_rates: FinancialServicesAnomalyRates::default(),
11429        }
11430    }
11431}
11432
11433/// Financial services anomaly injection rates.
11434#[derive(Debug, Clone, Serialize, Deserialize)]
11435pub struct FinancialServicesAnomalyRates {
11436    /// Loan fraud rate.
11437    #[serde(default = "default_loan_fraud_rate")]
11438    pub loan_fraud: f64,
11439
11440    /// Trading fraud rate.
11441    #[serde(default = "default_trading_fraud_rate")]
11442    pub trading_fraud: f64,
11443
11444    /// Insurance fraud rate.
11445    #[serde(default = "default_insurance_fraud_rate")]
11446    pub insurance_fraud: f64,
11447
11448    /// Account manipulation rate.
11449    #[serde(default = "default_account_manip_rate")]
11450    pub account_manipulation: f64,
11451}
11452
11453fn default_loan_fraud_rate() -> f64 {
11454    0.01
11455}
11456
11457fn default_trading_fraud_rate() -> f64 {
11458    0.008
11459}
11460
11461fn default_insurance_fraud_rate() -> f64 {
11462    0.012
11463}
11464
11465fn default_account_manip_rate() -> f64 {
11466    0.005
11467}
11468
11469impl Default for FinancialServicesAnomalyRates {
11470    fn default() -> Self {
11471        Self {
11472            loan_fraud: default_loan_fraud_rate(),
11473            trading_fraud: default_trading_fraud_rate(),
11474            insurance_fraud: default_insurance_fraud_rate(),
11475            account_manipulation: default_account_manip_rate(),
11476        }
11477    }
11478}
11479
11480/// Professional services industry configuration.
11481#[derive(Debug, Clone, Serialize, Deserialize)]
11482pub struct ProfessionalServicesConfig {
11483    /// Enable professional services-specific generation.
11484    #[serde(default)]
11485    pub enabled: bool,
11486
11487    /// Firm type.
11488    #[serde(default = "default_firm_type")]
11489    pub firm_type: String,
11490
11491    /// Billing model.
11492    #[serde(default = "default_billing_model")]
11493    pub billing_model: String,
11494
11495    /// Average hourly rate.
11496    #[serde(default = "default_hourly_rate")]
11497    pub avg_hourly_rate: f64,
11498
11499    /// Trust account settings (for law firms).
11500    #[serde(default)]
11501    pub trust_accounting: TrustAccountingConfig,
11502
11503    /// Professional services anomaly injection rates.
11504    #[serde(default)]
11505    pub anomaly_rates: ProfessionalServicesAnomalyRates,
11506}
11507
11508fn default_firm_type() -> String {
11509    "consulting".to_string()
11510}
11511
11512fn default_billing_model() -> String {
11513    "time_and_materials".to_string()
11514}
11515
11516fn default_hourly_rate() -> f64 {
11517    250.0
11518}
11519
11520impl Default for ProfessionalServicesConfig {
11521    fn default() -> Self {
11522        Self {
11523            enabled: false,
11524            firm_type: default_firm_type(),
11525            billing_model: default_billing_model(),
11526            avg_hourly_rate: default_hourly_rate(),
11527            trust_accounting: TrustAccountingConfig::default(),
11528            anomaly_rates: ProfessionalServicesAnomalyRates::default(),
11529        }
11530    }
11531}
11532
11533/// Trust accounting configuration for law firms.
11534#[derive(Debug, Clone, Serialize, Deserialize)]
11535pub struct TrustAccountingConfig {
11536    /// Enable trust accounting.
11537    #[serde(default)]
11538    pub enabled: bool,
11539
11540    /// Require three-way reconciliation.
11541    #[serde(default = "default_true")]
11542    pub require_three_way_reconciliation: bool,
11543}
11544
11545impl Default for TrustAccountingConfig {
11546    fn default() -> Self {
11547        Self {
11548            enabled: false,
11549            require_three_way_reconciliation: true,
11550        }
11551    }
11552}
11553
11554/// Professional services anomaly injection rates.
11555#[derive(Debug, Clone, Serialize, Deserialize)]
11556pub struct ProfessionalServicesAnomalyRates {
11557    /// Time billing fraud rate.
11558    #[serde(default = "default_time_fraud_rate")]
11559    pub time_billing_fraud: f64,
11560
11561    /// Expense report fraud rate.
11562    #[serde(default = "default_expense_fraud_rate")]
11563    pub expense_fraud: f64,
11564
11565    /// Trust misappropriation rate.
11566    #[serde(default = "default_trust_misappropriation_rate")]
11567    pub trust_misappropriation: f64,
11568}
11569
11570fn default_time_fraud_rate() -> f64 {
11571    0.02
11572}
11573
11574fn default_expense_fraud_rate() -> f64 {
11575    0.015
11576}
11577
11578fn default_trust_misappropriation_rate() -> f64 {
11579    0.003
11580}
11581
11582impl Default for ProfessionalServicesAnomalyRates {
11583    fn default() -> Self {
11584        Self {
11585            time_billing_fraud: default_time_fraud_rate(),
11586            expense_fraud: default_expense_fraud_rate(),
11587            trust_misappropriation: default_trust_misappropriation_rate(),
11588        }
11589    }
11590}
11591
11592/// Fingerprint privacy configuration for extraction and synthesis.
11593///
11594/// Controls the privacy parameters used when extracting fingerprints
11595/// from sensitive data. Supports predefined levels or custom (epsilon, delta) tuples.
11596///
11597/// ```yaml
11598/// fingerprint_privacy:
11599///   level: custom
11600///   epsilon: 0.5
11601///   delta: 1.0e-5
11602///   k_anonymity: 10
11603///   composition_method: renyi_dp
11604/// ```
11605#[derive(Debug, Clone, Serialize, Deserialize)]
11606pub struct FingerprintPrivacyConfig {
11607    /// Privacy level preset. Use "custom" for user-specified epsilon/delta.
11608    #[serde(default)]
11609    pub level: String,
11610    /// Custom epsilon value (only used when level = "custom").
11611    #[serde(default = "default_epsilon")]
11612    pub epsilon: f64,
11613    /// Custom delta value for (epsilon, delta)-DP (only used with RDP/zCDP).
11614    #[serde(default = "default_delta")]
11615    pub delta: f64,
11616    /// K-anonymity threshold.
11617    #[serde(default = "default_k_anonymity")]
11618    pub k_anonymity: u32,
11619    /// Composition method: "naive", "advanced", "renyi_dp", "zcdp".
11620    #[serde(default)]
11621    pub composition_method: String,
11622}
11623
11624fn default_epsilon() -> f64 {
11625    1.0
11626}
11627
11628fn default_delta() -> f64 {
11629    1e-5
11630}
11631
11632fn default_k_anonymity() -> u32 {
11633    5
11634}
11635
11636impl Default for FingerprintPrivacyConfig {
11637    fn default() -> Self {
11638        Self {
11639            level: "standard".to_string(),
11640            epsilon: default_epsilon(),
11641            delta: default_delta(),
11642            k_anonymity: default_k_anonymity(),
11643            composition_method: "naive".to_string(),
11644        }
11645    }
11646}
11647
11648/// Quality gates configuration for pass/fail thresholds on generation runs.
11649///
11650/// ```yaml
11651/// quality_gates:
11652///   enabled: true
11653///   profile: strict  # strict, default, lenient, custom
11654///   fail_on_violation: true
11655///   custom_gates:
11656///     - name: benford_compliance
11657///       metric: benford_mad
11658///       threshold: 0.015
11659///       comparison: lte
11660/// ```
11661#[derive(Debug, Clone, Serialize, Deserialize)]
11662pub struct QualityGatesSchemaConfig {
11663    /// Enable quality gate evaluation.
11664    #[serde(default)]
11665    pub enabled: bool,
11666    /// Gate profile: "strict", "default", "lenient", or "custom".
11667    #[serde(default = "default_gate_profile_name")]
11668    pub profile: String,
11669    /// Whether to fail the generation on gate violations.
11670    #[serde(default)]
11671    pub fail_on_violation: bool,
11672    /// Custom gate definitions (used when profile = "custom").
11673    #[serde(default)]
11674    pub custom_gates: Vec<QualityGateEntry>,
11675}
11676
11677fn default_gate_profile_name() -> String {
11678    "default".to_string()
11679}
11680
11681impl Default for QualityGatesSchemaConfig {
11682    fn default() -> Self {
11683        Self {
11684            enabled: false,
11685            profile: default_gate_profile_name(),
11686            fail_on_violation: false,
11687            custom_gates: Vec::new(),
11688        }
11689    }
11690}
11691
11692/// A single quality gate entry in configuration.
11693#[derive(Debug, Clone, Serialize, Deserialize)]
11694pub struct QualityGateEntry {
11695    /// Gate name.
11696    pub name: String,
11697    /// Metric to check: benford_mad, balance_coherence, document_chain_integrity,
11698    /// correlation_preservation, temporal_consistency, privacy_mia_auc,
11699    /// completion_rate, duplicate_rate, referential_integrity, ic_match_rate.
11700    pub metric: String,
11701    /// Threshold value.
11702    pub threshold: f64,
11703    /// Upper threshold for "between" comparison.
11704    #[serde(default)]
11705    pub upper_threshold: Option<f64>,
11706    /// Comparison operator: "gte", "lte", "eq", "between".
11707    #[serde(default = "default_gate_comparison")]
11708    pub comparison: String,
11709}
11710
11711fn default_gate_comparison() -> String {
11712    "gte".to_string()
11713}
11714
11715/// Compliance configuration for regulatory requirements.
11716///
11717/// ```yaml
11718/// compliance:
11719///   content_marking:
11720///     enabled: true
11721///     format: embedded  # embedded, sidecar, both
11722///   article10_report: true
11723/// ```
11724#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11725pub struct ComplianceSchemaConfig {
11726    /// Synthetic content marking configuration (EU AI Act Article 50).
11727    #[serde(default)]
11728    pub content_marking: ContentMarkingSchemaConfig,
11729    /// Generate Article 10 data governance report.
11730    #[serde(default)]
11731    pub article10_report: bool,
11732    /// Certificate configuration for proving DP guarantees.
11733    #[serde(default)]
11734    pub certificates: CertificateSchemaConfig,
11735}
11736
11737/// Configuration for synthetic data certificates.
11738#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11739pub struct CertificateSchemaConfig {
11740    /// Whether certificate generation is enabled.
11741    #[serde(default)]
11742    pub enabled: bool,
11743    /// Environment variable name for the signing key.
11744    #[serde(default)]
11745    pub signing_key_env: Option<String>,
11746    /// Whether to include quality metrics in the certificate.
11747    #[serde(default)]
11748    pub include_quality_metrics: bool,
11749}
11750
11751/// Content marking configuration for synthetic data output.
11752#[derive(Debug, Clone, Serialize, Deserialize)]
11753pub struct ContentMarkingSchemaConfig {
11754    /// Whether content marking is enabled.
11755    #[serde(default = "default_true")]
11756    pub enabled: bool,
11757    /// Marking format: "embedded", "sidecar", or "both".
11758    #[serde(default = "default_marking_format")]
11759    pub format: String,
11760}
11761
11762fn default_marking_format() -> String {
11763    "embedded".to_string()
11764}
11765
11766impl Default for ContentMarkingSchemaConfig {
11767    fn default() -> Self {
11768        Self {
11769            enabled: true,
11770            format: default_marking_format(),
11771        }
11772    }
11773}
11774
11775/// Webhook notification configuration.
11776#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11777pub struct WebhookSchemaConfig {
11778    /// Whether webhooks are enabled.
11779    #[serde(default)]
11780    pub enabled: bool,
11781    /// Webhook endpoint configurations.
11782    #[serde(default)]
11783    pub endpoints: Vec<WebhookEndpointConfig>,
11784}
11785
11786/// Configuration for a single webhook endpoint.
11787#[derive(Debug, Clone, Serialize, Deserialize)]
11788pub struct WebhookEndpointConfig {
11789    /// Target URL for the webhook.
11790    pub url: String,
11791    /// Event types this endpoint subscribes to.
11792    #[serde(default)]
11793    pub events: Vec<String>,
11794    /// Optional secret for HMAC-SHA256 signature.
11795    #[serde(default)]
11796    pub secret: Option<String>,
11797    /// Maximum retry attempts (default: 3).
11798    #[serde(default = "default_webhook_retries")]
11799    pub max_retries: u32,
11800    /// Timeout in seconds (default: 10).
11801    #[serde(default = "default_webhook_timeout")]
11802    pub timeout_secs: u64,
11803}
11804
11805fn default_webhook_retries() -> u32 {
11806    3
11807}
11808fn default_webhook_timeout() -> u64 {
11809    10
11810}
11811
11812// ===== Enterprise Process Chain Config Structs =====
11813
11814// ----- Source-to-Pay (S2C/S2P) -----
11815
11816/// Source-to-Pay configuration covering the entire sourcing lifecycle.
11817#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11818pub struct SourceToPayConfig {
11819    /// Enable source-to-pay generation
11820    #[serde(default)]
11821    pub enabled: bool,
11822    /// Spend analysis configuration
11823    #[serde(default)]
11824    pub spend_analysis: SpendAnalysisConfig,
11825    /// Sourcing project configuration
11826    #[serde(default)]
11827    pub sourcing: SourcingConfig,
11828    /// Supplier qualification configuration
11829    #[serde(default)]
11830    pub qualification: QualificationConfig,
11831    /// RFx event configuration
11832    #[serde(default)]
11833    pub rfx: RfxConfig,
11834    /// Contract configuration
11835    #[serde(default)]
11836    pub contracts: ContractConfig,
11837    /// Catalog configuration
11838    #[serde(default)]
11839    pub catalog: CatalogConfig,
11840    /// Scorecard configuration
11841    #[serde(default)]
11842    pub scorecards: ScorecardConfig,
11843    /// P2P integration settings
11844    #[serde(default)]
11845    pub p2p_integration: P2PIntegrationConfig,
11846}
11847
11848/// Spend analysis configuration.
11849#[derive(Debug, Clone, Serialize, Deserialize)]
11850pub struct SpendAnalysisConfig {
11851    /// HHI threshold for triggering sourcing project
11852    #[serde(default = "default_hhi_threshold")]
11853    pub hhi_threshold: f64,
11854    /// Target spend coverage under contracts
11855    #[serde(default = "default_contract_coverage_target")]
11856    pub contract_coverage_target: f64,
11857}
11858
11859impl Default for SpendAnalysisConfig {
11860    fn default() -> Self {
11861        Self {
11862            hhi_threshold: default_hhi_threshold(),
11863            contract_coverage_target: default_contract_coverage_target(),
11864        }
11865    }
11866}
11867
11868fn default_hhi_threshold() -> f64 {
11869    2500.0
11870}
11871fn default_contract_coverage_target() -> f64 {
11872    0.80
11873}
11874
11875/// Sourcing project configuration.
11876#[derive(Debug, Clone, Serialize, Deserialize)]
11877pub struct SourcingConfig {
11878    /// Number of sourcing projects per year
11879    #[serde(default = "default_sourcing_projects_per_year")]
11880    pub projects_per_year: u32,
11881    /// Months before expiry to trigger renewal project
11882    #[serde(default = "default_renewal_horizon_months")]
11883    pub renewal_horizon_months: u32,
11884    /// Average project duration in months
11885    #[serde(default = "default_project_duration_months")]
11886    pub project_duration_months: u32,
11887}
11888
11889impl Default for SourcingConfig {
11890    fn default() -> Self {
11891        Self {
11892            projects_per_year: default_sourcing_projects_per_year(),
11893            renewal_horizon_months: default_renewal_horizon_months(),
11894            project_duration_months: default_project_duration_months(),
11895        }
11896    }
11897}
11898
11899fn default_sourcing_projects_per_year() -> u32 {
11900    10
11901}
11902fn default_renewal_horizon_months() -> u32 {
11903    3
11904}
11905fn default_project_duration_months() -> u32 {
11906    4
11907}
11908
11909/// Supplier qualification configuration.
11910#[derive(Debug, Clone, Serialize, Deserialize)]
11911pub struct QualificationConfig {
11912    /// Pass rate for qualification
11913    #[serde(default = "default_qualification_pass_rate")]
11914    pub pass_rate: f64,
11915    /// Qualification validity in days
11916    #[serde(default = "default_qualification_validity_days")]
11917    pub validity_days: u32,
11918    /// Financial stability weight
11919    #[serde(default = "default_financial_weight")]
11920    pub financial_weight: f64,
11921    /// Quality management weight
11922    #[serde(default = "default_quality_weight")]
11923    pub quality_weight: f64,
11924    /// Delivery performance weight
11925    #[serde(default = "default_delivery_weight")]
11926    pub delivery_weight: f64,
11927    /// Compliance weight
11928    #[serde(default = "default_compliance_weight")]
11929    pub compliance_weight: f64,
11930}
11931
11932impl Default for QualificationConfig {
11933    fn default() -> Self {
11934        Self {
11935            pass_rate: default_qualification_pass_rate(),
11936            validity_days: default_qualification_validity_days(),
11937            financial_weight: default_financial_weight(),
11938            quality_weight: default_quality_weight(),
11939            delivery_weight: default_delivery_weight(),
11940            compliance_weight: default_compliance_weight(),
11941        }
11942    }
11943}
11944
11945fn default_qualification_pass_rate() -> f64 {
11946    0.75
11947}
11948fn default_qualification_validity_days() -> u32 {
11949    365
11950}
11951fn default_financial_weight() -> f64 {
11952    0.25
11953}
11954fn default_quality_weight() -> f64 {
11955    0.30
11956}
11957fn default_delivery_weight() -> f64 {
11958    0.25
11959}
11960fn default_compliance_weight() -> f64 {
11961    0.20
11962}
11963
11964/// RFx event configuration.
11965#[derive(Debug, Clone, Serialize, Deserialize)]
11966pub struct RfxConfig {
11967    /// Spend threshold above which RFI is required before RFP
11968    #[serde(default = "default_rfi_threshold")]
11969    pub rfi_threshold: f64,
11970    /// Minimum vendors invited per RFx
11971    #[serde(default = "default_min_invited_vendors")]
11972    pub min_invited_vendors: u32,
11973    /// Maximum vendors invited per RFx
11974    #[serde(default = "default_max_invited_vendors")]
11975    pub max_invited_vendors: u32,
11976    /// Response rate (% of invited vendors that submit bids)
11977    #[serde(default = "default_response_rate")]
11978    pub response_rate: f64,
11979    /// Default price weight in evaluation
11980    #[serde(default = "default_price_weight")]
11981    pub default_price_weight: f64,
11982    /// Default quality weight in evaluation
11983    #[serde(default = "default_rfx_quality_weight")]
11984    pub default_quality_weight: f64,
11985    /// Default delivery weight in evaluation
11986    #[serde(default = "default_rfx_delivery_weight")]
11987    pub default_delivery_weight: f64,
11988}
11989
11990impl Default for RfxConfig {
11991    fn default() -> Self {
11992        Self {
11993            rfi_threshold: default_rfi_threshold(),
11994            min_invited_vendors: default_min_invited_vendors(),
11995            max_invited_vendors: default_max_invited_vendors(),
11996            response_rate: default_response_rate(),
11997            default_price_weight: default_price_weight(),
11998            default_quality_weight: default_rfx_quality_weight(),
11999            default_delivery_weight: default_rfx_delivery_weight(),
12000        }
12001    }
12002}
12003
12004fn default_rfi_threshold() -> f64 {
12005    100_000.0
12006}
12007fn default_min_invited_vendors() -> u32 {
12008    3
12009}
12010fn default_max_invited_vendors() -> u32 {
12011    8
12012}
12013fn default_response_rate() -> f64 {
12014    0.70
12015}
12016fn default_price_weight() -> f64 {
12017    0.40
12018}
12019fn default_rfx_quality_weight() -> f64 {
12020    0.35
12021}
12022fn default_rfx_delivery_weight() -> f64 {
12023    0.25
12024}
12025
12026/// Contract configuration.
12027#[derive(Debug, Clone, Serialize, Deserialize)]
12028pub struct ContractConfig {
12029    /// Minimum contract duration in months
12030    #[serde(default = "default_min_contract_months")]
12031    pub min_duration_months: u32,
12032    /// Maximum contract duration in months
12033    #[serde(default = "default_max_contract_months")]
12034    pub max_duration_months: u32,
12035    /// Auto-renewal rate
12036    #[serde(default = "default_auto_renewal_rate")]
12037    pub auto_renewal_rate: f64,
12038    /// Amendment rate (% of contracts with at least one amendment)
12039    #[serde(default = "default_amendment_rate")]
12040    pub amendment_rate: f64,
12041    /// Distribution of contract types
12042    #[serde(default)]
12043    pub type_distribution: ContractTypeDistribution,
12044}
12045
12046impl Default for ContractConfig {
12047    fn default() -> Self {
12048        Self {
12049            min_duration_months: default_min_contract_months(),
12050            max_duration_months: default_max_contract_months(),
12051            auto_renewal_rate: default_auto_renewal_rate(),
12052            amendment_rate: default_amendment_rate(),
12053            type_distribution: ContractTypeDistribution::default(),
12054        }
12055    }
12056}
12057
12058fn default_min_contract_months() -> u32 {
12059    12
12060}
12061fn default_max_contract_months() -> u32 {
12062    36
12063}
12064fn default_auto_renewal_rate() -> f64 {
12065    0.40
12066}
12067fn default_amendment_rate() -> f64 {
12068    0.20
12069}
12070
12071/// Distribution of contract types.
12072#[derive(Debug, Clone, Serialize, Deserialize)]
12073pub struct ContractTypeDistribution {
12074    /// Fixed price percentage
12075    #[serde(default = "default_fixed_price_pct")]
12076    pub fixed_price: f64,
12077    /// Blanket/framework percentage
12078    #[serde(default = "default_blanket_pct")]
12079    pub blanket: f64,
12080    /// Time and materials percentage
12081    #[serde(default = "default_time_materials_pct")]
12082    pub time_and_materials: f64,
12083    /// Service agreement percentage
12084    #[serde(default = "default_service_agreement_pct")]
12085    pub service_agreement: f64,
12086}
12087
12088impl Default for ContractTypeDistribution {
12089    fn default() -> Self {
12090        Self {
12091            fixed_price: default_fixed_price_pct(),
12092            blanket: default_blanket_pct(),
12093            time_and_materials: default_time_materials_pct(),
12094            service_agreement: default_service_agreement_pct(),
12095        }
12096    }
12097}
12098
12099fn default_fixed_price_pct() -> f64 {
12100    0.40
12101}
12102fn default_blanket_pct() -> f64 {
12103    0.30
12104}
12105fn default_time_materials_pct() -> f64 {
12106    0.15
12107}
12108fn default_service_agreement_pct() -> f64 {
12109    0.15
12110}
12111
12112/// Catalog configuration.
12113#[derive(Debug, Clone, Serialize, Deserialize)]
12114pub struct CatalogConfig {
12115    /// Percentage of catalog items marked as preferred
12116    #[serde(default = "default_preferred_vendor_flag_rate")]
12117    pub preferred_vendor_flag_rate: f64,
12118    /// Rate of materials with multiple sources in catalog
12119    #[serde(default = "default_multi_source_rate")]
12120    pub multi_source_rate: f64,
12121}
12122
12123impl Default for CatalogConfig {
12124    fn default() -> Self {
12125        Self {
12126            preferred_vendor_flag_rate: default_preferred_vendor_flag_rate(),
12127            multi_source_rate: default_multi_source_rate(),
12128        }
12129    }
12130}
12131
12132fn default_preferred_vendor_flag_rate() -> f64 {
12133    0.70
12134}
12135fn default_multi_source_rate() -> f64 {
12136    0.25
12137}
12138
12139/// Scorecard configuration.
12140#[derive(Debug, Clone, Serialize, Deserialize)]
12141pub struct ScorecardConfig {
12142    /// Scorecard review frequency (quarterly, monthly)
12143    #[serde(default = "default_scorecard_frequency")]
12144    pub frequency: String,
12145    /// On-time delivery weight in overall score
12146    #[serde(default = "default_otd_weight")]
12147    pub on_time_delivery_weight: f64,
12148    /// Quality weight in overall score
12149    #[serde(default = "default_quality_score_weight")]
12150    pub quality_weight: f64,
12151    /// Price competitiveness weight
12152    #[serde(default = "default_price_score_weight")]
12153    pub price_weight: f64,
12154    /// Responsiveness weight
12155    #[serde(default = "default_responsiveness_weight")]
12156    pub responsiveness_weight: f64,
12157    /// Grade A threshold (score >= this)
12158    #[serde(default = "default_grade_a_threshold")]
12159    pub grade_a_threshold: f64,
12160    /// Grade B threshold
12161    #[serde(default = "default_grade_b_threshold")]
12162    pub grade_b_threshold: f64,
12163    /// Grade C threshold
12164    #[serde(default = "default_grade_c_threshold")]
12165    pub grade_c_threshold: f64,
12166}
12167
12168impl Default for ScorecardConfig {
12169    fn default() -> Self {
12170        Self {
12171            frequency: default_scorecard_frequency(),
12172            on_time_delivery_weight: default_otd_weight(),
12173            quality_weight: default_quality_score_weight(),
12174            price_weight: default_price_score_weight(),
12175            responsiveness_weight: default_responsiveness_weight(),
12176            grade_a_threshold: default_grade_a_threshold(),
12177            grade_b_threshold: default_grade_b_threshold(),
12178            grade_c_threshold: default_grade_c_threshold(),
12179        }
12180    }
12181}
12182
12183fn default_scorecard_frequency() -> String {
12184    "quarterly".to_string()
12185}
12186fn default_otd_weight() -> f64 {
12187    0.30
12188}
12189fn default_quality_score_weight() -> f64 {
12190    0.30
12191}
12192fn default_price_score_weight() -> f64 {
12193    0.25
12194}
12195fn default_responsiveness_weight() -> f64 {
12196    0.15
12197}
12198fn default_grade_a_threshold() -> f64 {
12199    90.0
12200}
12201fn default_grade_b_threshold() -> f64 {
12202    75.0
12203}
12204fn default_grade_c_threshold() -> f64 {
12205    60.0
12206}
12207
12208/// P2P integration settings for contract enforcement.
12209#[derive(Debug, Clone, Serialize, Deserialize)]
12210pub struct P2PIntegrationConfig {
12211    /// Rate of off-contract (maverick) purchases
12212    #[serde(default = "default_off_contract_rate")]
12213    pub off_contract_rate: f64,
12214    /// Price tolerance for contract price validation
12215    #[serde(default = "default_price_tolerance")]
12216    pub price_tolerance: f64,
12217    /// Whether to enforce catalog ordering
12218    #[serde(default)]
12219    pub catalog_enforcement: bool,
12220}
12221
12222impl Default for P2PIntegrationConfig {
12223    fn default() -> Self {
12224        Self {
12225            off_contract_rate: default_off_contract_rate(),
12226            price_tolerance: default_price_tolerance(),
12227            catalog_enforcement: false,
12228        }
12229    }
12230}
12231
12232fn default_off_contract_rate() -> f64 {
12233    0.15
12234}
12235fn default_price_tolerance() -> f64 {
12236    0.02
12237}
12238
12239// ----- Financial Reporting -----
12240
12241/// Financial reporting configuration.
12242#[derive(Debug, Clone, Serialize, Deserialize)]
12243pub struct FinancialReportingConfig {
12244    /// Enable financial reporting generation
12245    #[serde(default)]
12246    pub enabled: bool,
12247    /// Generate balance sheet
12248    #[serde(default = "default_true")]
12249    pub generate_balance_sheet: bool,
12250    /// Generate income statement
12251    #[serde(default = "default_true")]
12252    pub generate_income_statement: bool,
12253    /// Generate cash flow statement
12254    #[serde(default = "default_true")]
12255    pub generate_cash_flow: bool,
12256    /// Generate changes in equity statement
12257    #[serde(default = "default_true")]
12258    pub generate_changes_in_equity: bool,
12259    /// Number of comparative periods
12260    #[serde(default = "default_comparative_periods")]
12261    pub comparative_periods: u32,
12262    /// Management KPIs configuration
12263    #[serde(default)]
12264    pub management_kpis: ManagementKpisConfig,
12265    /// Budget configuration
12266    #[serde(default)]
12267    pub budgets: BudgetConfig,
12268}
12269
12270impl Default for FinancialReportingConfig {
12271    fn default() -> Self {
12272        Self {
12273            enabled: false,
12274            generate_balance_sheet: true,
12275            generate_income_statement: true,
12276            generate_cash_flow: true,
12277            generate_changes_in_equity: true,
12278            comparative_periods: default_comparative_periods(),
12279            management_kpis: ManagementKpisConfig::default(),
12280            budgets: BudgetConfig::default(),
12281        }
12282    }
12283}
12284
12285fn default_comparative_periods() -> u32 {
12286    1
12287}
12288
12289/// Management KPIs configuration.
12290#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12291pub struct ManagementKpisConfig {
12292    /// Enable KPI generation
12293    #[serde(default)]
12294    pub enabled: bool,
12295    /// KPI calculation frequency (monthly, quarterly)
12296    #[serde(default = "default_kpi_frequency")]
12297    pub frequency: String,
12298}
12299
12300fn default_kpi_frequency() -> String {
12301    "monthly".to_string()
12302}
12303
12304/// Budget configuration.
12305#[derive(Debug, Clone, Serialize, Deserialize)]
12306pub struct BudgetConfig {
12307    /// Enable budget generation
12308    #[serde(default)]
12309    pub enabled: bool,
12310    /// Expected revenue growth rate for budgeting
12311    #[serde(default = "default_revenue_growth_rate")]
12312    pub revenue_growth_rate: f64,
12313    /// Expected expense inflation rate
12314    #[serde(default = "default_expense_inflation_rate")]
12315    pub expense_inflation_rate: f64,
12316    /// Random noise to add to budget vs actual
12317    #[serde(default = "default_variance_noise")]
12318    pub variance_noise: f64,
12319}
12320
12321impl Default for BudgetConfig {
12322    fn default() -> Self {
12323        Self {
12324            enabled: false,
12325            revenue_growth_rate: default_revenue_growth_rate(),
12326            expense_inflation_rate: default_expense_inflation_rate(),
12327            variance_noise: default_variance_noise(),
12328        }
12329    }
12330}
12331
12332fn default_revenue_growth_rate() -> f64 {
12333    0.05
12334}
12335fn default_expense_inflation_rate() -> f64 {
12336    0.03
12337}
12338fn default_variance_noise() -> f64 {
12339    0.10
12340}
12341
12342// ----- HR Configuration -----
12343
12344/// HR (Hire-to-Retire) process configuration.
12345#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12346pub struct HrConfig {
12347    /// Enable HR generation
12348    #[serde(default)]
12349    pub enabled: bool,
12350    /// Payroll configuration
12351    #[serde(default)]
12352    pub payroll: PayrollConfig,
12353    /// Time and attendance configuration
12354    #[serde(default)]
12355    pub time_attendance: TimeAttendanceConfig,
12356    /// Expense management configuration
12357    #[serde(default)]
12358    pub expenses: ExpenseConfig,
12359}
12360
12361/// Payroll configuration.
12362#[derive(Debug, Clone, Serialize, Deserialize)]
12363pub struct PayrollConfig {
12364    /// Enable payroll generation
12365    #[serde(default = "default_true")]
12366    pub enabled: bool,
12367    /// Pay frequency (monthly, biweekly, weekly)
12368    #[serde(default = "default_pay_frequency")]
12369    pub pay_frequency: String,
12370    /// Salary ranges by job level
12371    #[serde(default)]
12372    pub salary_ranges: PayrollSalaryRanges,
12373    /// Effective tax rates
12374    #[serde(default)]
12375    pub tax_rates: PayrollTaxRates,
12376    /// Benefits enrollment rate
12377    #[serde(default = "default_benefits_enrollment_rate")]
12378    pub benefits_enrollment_rate: f64,
12379    /// Retirement plan participation rate
12380    #[serde(default = "default_retirement_participation_rate")]
12381    pub retirement_participation_rate: f64,
12382}
12383
12384impl Default for PayrollConfig {
12385    fn default() -> Self {
12386        Self {
12387            enabled: true,
12388            pay_frequency: default_pay_frequency(),
12389            salary_ranges: PayrollSalaryRanges::default(),
12390            tax_rates: PayrollTaxRates::default(),
12391            benefits_enrollment_rate: default_benefits_enrollment_rate(),
12392            retirement_participation_rate: default_retirement_participation_rate(),
12393        }
12394    }
12395}
12396
12397fn default_pay_frequency() -> String {
12398    "monthly".to_string()
12399}
12400fn default_benefits_enrollment_rate() -> f64 {
12401    0.60
12402}
12403fn default_retirement_participation_rate() -> f64 {
12404    0.45
12405}
12406
12407/// Salary ranges by job level.
12408#[derive(Debug, Clone, Serialize, Deserialize)]
12409pub struct PayrollSalaryRanges {
12410    /// Staff level min/max
12411    #[serde(default = "default_staff_min")]
12412    pub staff_min: f64,
12413    #[serde(default = "default_staff_max")]
12414    pub staff_max: f64,
12415    /// Manager level min/max
12416    #[serde(default = "default_manager_min")]
12417    pub manager_min: f64,
12418    #[serde(default = "default_manager_max")]
12419    pub manager_max: f64,
12420    /// Director level min/max
12421    #[serde(default = "default_director_min")]
12422    pub director_min: f64,
12423    #[serde(default = "default_director_max")]
12424    pub director_max: f64,
12425    /// Executive level min/max
12426    #[serde(default = "default_executive_min")]
12427    pub executive_min: f64,
12428    #[serde(default = "default_executive_max")]
12429    pub executive_max: f64,
12430}
12431
12432impl Default for PayrollSalaryRanges {
12433    fn default() -> Self {
12434        Self {
12435            staff_min: default_staff_min(),
12436            staff_max: default_staff_max(),
12437            manager_min: default_manager_min(),
12438            manager_max: default_manager_max(),
12439            director_min: default_director_min(),
12440            director_max: default_director_max(),
12441            executive_min: default_executive_min(),
12442            executive_max: default_executive_max(),
12443        }
12444    }
12445}
12446
12447fn default_staff_min() -> f64 {
12448    50_000.0
12449}
12450fn default_staff_max() -> f64 {
12451    70_000.0
12452}
12453fn default_manager_min() -> f64 {
12454    80_000.0
12455}
12456fn default_manager_max() -> f64 {
12457    120_000.0
12458}
12459fn default_director_min() -> f64 {
12460    120_000.0
12461}
12462fn default_director_max() -> f64 {
12463    180_000.0
12464}
12465fn default_executive_min() -> f64 {
12466    180_000.0
12467}
12468fn default_executive_max() -> f64 {
12469    350_000.0
12470}
12471
12472/// Effective tax rates for payroll.
12473#[derive(Debug, Clone, Serialize, Deserialize)]
12474pub struct PayrollTaxRates {
12475    /// Federal effective tax rate
12476    #[serde(default = "default_federal_rate")]
12477    pub federal_effective: f64,
12478    /// State effective tax rate
12479    #[serde(default = "default_state_rate")]
12480    pub state_effective: f64,
12481    /// FICA/social security rate
12482    #[serde(default = "default_fica_rate")]
12483    pub fica: f64,
12484}
12485
12486impl Default for PayrollTaxRates {
12487    fn default() -> Self {
12488        Self {
12489            federal_effective: default_federal_rate(),
12490            state_effective: default_state_rate(),
12491            fica: default_fica_rate(),
12492        }
12493    }
12494}
12495
12496fn default_federal_rate() -> f64 {
12497    0.22
12498}
12499fn default_state_rate() -> f64 {
12500    0.05
12501}
12502fn default_fica_rate() -> f64 {
12503    0.0765
12504}
12505
12506/// Time and attendance configuration.
12507#[derive(Debug, Clone, Serialize, Deserialize)]
12508pub struct TimeAttendanceConfig {
12509    /// Enable time tracking
12510    #[serde(default = "default_true")]
12511    pub enabled: bool,
12512    /// Overtime rate (% of employees with overtime in a period)
12513    #[serde(default = "default_overtime_rate")]
12514    pub overtime_rate: f64,
12515}
12516
12517impl Default for TimeAttendanceConfig {
12518    fn default() -> Self {
12519        Self {
12520            enabled: true,
12521            overtime_rate: default_overtime_rate(),
12522        }
12523    }
12524}
12525
12526fn default_overtime_rate() -> f64 {
12527    0.10
12528}
12529
12530/// Expense management configuration.
12531#[derive(Debug, Clone, Serialize, Deserialize)]
12532pub struct ExpenseConfig {
12533    /// Enable expense report generation
12534    #[serde(default = "default_true")]
12535    pub enabled: bool,
12536    /// Rate of employees submitting expenses per month
12537    #[serde(default = "default_expense_submission_rate")]
12538    pub submission_rate: f64,
12539    /// Rate of policy violations
12540    #[serde(default = "default_policy_violation_rate")]
12541    pub policy_violation_rate: f64,
12542}
12543
12544impl Default for ExpenseConfig {
12545    fn default() -> Self {
12546        Self {
12547            enabled: true,
12548            submission_rate: default_expense_submission_rate(),
12549            policy_violation_rate: default_policy_violation_rate(),
12550        }
12551    }
12552}
12553
12554fn default_expense_submission_rate() -> f64 {
12555    0.30
12556}
12557fn default_policy_violation_rate() -> f64 {
12558    0.08
12559}
12560
12561// ----- Manufacturing Configuration -----
12562
12563/// Manufacturing process configuration (production orders, WIP, routing).
12564#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12565pub struct ManufacturingProcessConfig {
12566    /// Enable manufacturing generation
12567    #[serde(default)]
12568    pub enabled: bool,
12569    /// Production order configuration
12570    #[serde(default)]
12571    pub production_orders: ProductionOrderConfig,
12572    /// Costing configuration
12573    #[serde(default)]
12574    pub costing: ManufacturingCostingConfig,
12575    /// Routing configuration
12576    #[serde(default)]
12577    pub routing: RoutingConfig,
12578}
12579
12580/// Production order configuration.
12581#[derive(Debug, Clone, Serialize, Deserialize)]
12582pub struct ProductionOrderConfig {
12583    /// Orders per month
12584    #[serde(default = "default_prod_orders_per_month")]
12585    pub orders_per_month: u32,
12586    /// Average batch size
12587    #[serde(default = "default_prod_avg_batch_size")]
12588    pub avg_batch_size: u32,
12589    /// Yield rate
12590    #[serde(default = "default_prod_yield_rate")]
12591    pub yield_rate: f64,
12592    /// Make-to-order rate (vs make-to-stock)
12593    #[serde(default = "default_prod_make_to_order_rate")]
12594    pub make_to_order_rate: f64,
12595    /// Rework rate
12596    #[serde(default = "default_prod_rework_rate")]
12597    pub rework_rate: f64,
12598}
12599
12600impl Default for ProductionOrderConfig {
12601    fn default() -> Self {
12602        Self {
12603            orders_per_month: default_prod_orders_per_month(),
12604            avg_batch_size: default_prod_avg_batch_size(),
12605            yield_rate: default_prod_yield_rate(),
12606            make_to_order_rate: default_prod_make_to_order_rate(),
12607            rework_rate: default_prod_rework_rate(),
12608        }
12609    }
12610}
12611
12612fn default_prod_orders_per_month() -> u32 {
12613    50
12614}
12615fn default_prod_avg_batch_size() -> u32 {
12616    100
12617}
12618fn default_prod_yield_rate() -> f64 {
12619    0.97
12620}
12621fn default_prod_make_to_order_rate() -> f64 {
12622    0.20
12623}
12624fn default_prod_rework_rate() -> f64 {
12625    0.03
12626}
12627
12628/// Manufacturing costing configuration.
12629#[derive(Debug, Clone, Serialize, Deserialize)]
12630pub struct ManufacturingCostingConfig {
12631    /// Labor rate per hour
12632    #[serde(default = "default_labor_rate")]
12633    pub labor_rate_per_hour: f64,
12634    /// Overhead application rate (multiplier on direct labor)
12635    #[serde(default = "default_overhead_rate")]
12636    pub overhead_rate: f64,
12637    /// Standard cost update frequency
12638    #[serde(default = "default_cost_update_frequency")]
12639    pub standard_cost_update_frequency: String,
12640}
12641
12642impl Default for ManufacturingCostingConfig {
12643    fn default() -> Self {
12644        Self {
12645            labor_rate_per_hour: default_labor_rate(),
12646            overhead_rate: default_overhead_rate(),
12647            standard_cost_update_frequency: default_cost_update_frequency(),
12648        }
12649    }
12650}
12651
12652fn default_labor_rate() -> f64 {
12653    35.0
12654}
12655fn default_overhead_rate() -> f64 {
12656    1.50
12657}
12658fn default_cost_update_frequency() -> String {
12659    "quarterly".to_string()
12660}
12661
12662/// Routing configuration for production operations.
12663#[derive(Debug, Clone, Serialize, Deserialize)]
12664pub struct RoutingConfig {
12665    /// Average number of operations per routing
12666    #[serde(default = "default_avg_operations")]
12667    pub avg_operations: u32,
12668    /// Average setup time in hours
12669    #[serde(default = "default_setup_time")]
12670    pub setup_time_hours: f64,
12671    /// Run time variation coefficient
12672    #[serde(default = "default_run_time_variation")]
12673    pub run_time_variation: f64,
12674}
12675
12676impl Default for RoutingConfig {
12677    fn default() -> Self {
12678        Self {
12679            avg_operations: default_avg_operations(),
12680            setup_time_hours: default_setup_time(),
12681            run_time_variation: default_run_time_variation(),
12682        }
12683    }
12684}
12685
12686fn default_avg_operations() -> u32 {
12687    4
12688}
12689fn default_setup_time() -> f64 {
12690    1.5
12691}
12692fn default_run_time_variation() -> f64 {
12693    0.15
12694}
12695
12696// ----- Sales Quote Configuration -----
12697
12698/// Sales quote (quote-to-order) pipeline configuration.
12699#[derive(Debug, Clone, Serialize, Deserialize)]
12700pub struct SalesQuoteConfig {
12701    /// Enable sales quote generation
12702    #[serde(default)]
12703    pub enabled: bool,
12704    /// Quotes per month
12705    #[serde(default = "default_quotes_per_month")]
12706    pub quotes_per_month: u32,
12707    /// Win rate (fraction of quotes that convert to orders)
12708    #[serde(default = "default_quote_win_rate")]
12709    pub win_rate: f64,
12710    /// Average quote validity in days
12711    #[serde(default = "default_quote_validity_days")]
12712    pub validity_days: u32,
12713}
12714
12715impl Default for SalesQuoteConfig {
12716    fn default() -> Self {
12717        Self {
12718            enabled: false,
12719            quotes_per_month: default_quotes_per_month(),
12720            win_rate: default_quote_win_rate(),
12721            validity_days: default_quote_validity_days(),
12722        }
12723    }
12724}
12725
12726fn default_quotes_per_month() -> u32 {
12727    30
12728}
12729fn default_quote_win_rate() -> f64 {
12730    0.35
12731}
12732fn default_quote_validity_days() -> u32 {
12733    30
12734}
12735
12736// =============================================================================
12737// Tax Accounting Configuration
12738// =============================================================================
12739
12740/// Tax accounting configuration.
12741///
12742/// Controls generation of tax-related data including VAT/GST, sales tax,
12743/// withholding tax, tax provisions, and payroll tax across multiple jurisdictions.
12744#[derive(Debug, Clone, Serialize, Deserialize)]
12745pub struct TaxConfig {
12746    /// Whether tax generation is enabled.
12747    #[serde(default)]
12748    pub enabled: bool,
12749    /// Tax jurisdiction configuration.
12750    #[serde(default)]
12751    pub jurisdictions: TaxJurisdictionConfig,
12752    /// VAT/GST configuration.
12753    #[serde(default)]
12754    pub vat_gst: VatGstConfig,
12755    /// Sales tax configuration.
12756    #[serde(default)]
12757    pub sales_tax: SalesTaxConfig,
12758    /// Withholding tax configuration.
12759    #[serde(default)]
12760    pub withholding: WithholdingTaxSchemaConfig,
12761    /// Tax provision configuration.
12762    #[serde(default)]
12763    pub provisions: TaxProvisionSchemaConfig,
12764    /// Payroll tax configuration.
12765    #[serde(default)]
12766    pub payroll_tax: PayrollTaxSchemaConfig,
12767    /// Anomaly injection rate for tax data (0.0 to 1.0).
12768    #[serde(default = "default_tax_anomaly_rate")]
12769    pub anomaly_rate: f64,
12770}
12771
12772fn default_tax_anomaly_rate() -> f64 {
12773    0.03
12774}
12775
12776impl Default for TaxConfig {
12777    fn default() -> Self {
12778        Self {
12779            enabled: false,
12780            jurisdictions: TaxJurisdictionConfig::default(),
12781            vat_gst: VatGstConfig::default(),
12782            sales_tax: SalesTaxConfig::default(),
12783            withholding: WithholdingTaxSchemaConfig::default(),
12784            provisions: TaxProvisionSchemaConfig::default(),
12785            payroll_tax: PayrollTaxSchemaConfig::default(),
12786            anomaly_rate: default_tax_anomaly_rate(),
12787        }
12788    }
12789}
12790
12791/// Tax jurisdiction configuration.
12792///
12793/// Specifies which countries and subnational jurisdictions to include
12794/// when generating tax data.
12795#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12796pub struct TaxJurisdictionConfig {
12797    /// List of country codes to include (e.g., ["US", "DE", "GB"]).
12798    #[serde(default)]
12799    pub countries: Vec<String>,
12800    /// Whether to include subnational jurisdictions (e.g., US states, Canadian provinces).
12801    #[serde(default)]
12802    pub include_subnational: bool,
12803}
12804
12805/// VAT/GST configuration.
12806///
12807/// Controls generation of Value Added Tax / Goods and Services Tax data,
12808/// including standard and reduced rates, exempt categories, and reverse charge.
12809#[derive(Debug, Clone, Serialize, Deserialize)]
12810pub struct VatGstConfig {
12811    /// Whether VAT/GST generation is enabled.
12812    #[serde(default)]
12813    pub enabled: bool,
12814    /// Standard VAT/GST rates by country code (e.g., {"DE": 0.19, "GB": 0.20}).
12815    #[serde(default)]
12816    pub standard_rates: std::collections::HashMap<String, f64>,
12817    /// Reduced VAT/GST rates by country code (e.g., {"DE": 0.07, "GB": 0.05}).
12818    #[serde(default)]
12819    pub reduced_rates: std::collections::HashMap<String, f64>,
12820    /// Categories exempt from VAT/GST (e.g., ["financial_services", "healthcare"]).
12821    #[serde(default)]
12822    pub exempt_categories: Vec<String>,
12823    /// Whether to apply reverse charge mechanism for cross-border B2B transactions.
12824    #[serde(default = "default_true")]
12825    pub reverse_charge: bool,
12826}
12827
12828impl Default for VatGstConfig {
12829    fn default() -> Self {
12830        Self {
12831            enabled: false,
12832            standard_rates: std::collections::HashMap::new(),
12833            reduced_rates: std::collections::HashMap::new(),
12834            exempt_categories: Vec::new(),
12835            reverse_charge: true,
12836        }
12837    }
12838}
12839
12840/// Sales tax configuration.
12841///
12842/// Controls generation of US-style sales tax data including nexus determination.
12843#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12844pub struct SalesTaxConfig {
12845    /// Whether sales tax generation is enabled.
12846    #[serde(default)]
12847    pub enabled: bool,
12848    /// US states where the company has nexus (e.g., ["CA", "NY", "TX"]).
12849    #[serde(default)]
12850    pub nexus_states: Vec<String>,
12851}
12852
12853/// Withholding tax configuration.
12854///
12855/// Controls generation of withholding tax data for cross-border payments,
12856/// including treaty network and rate overrides.
12857#[derive(Debug, Clone, Serialize, Deserialize)]
12858pub struct WithholdingTaxSchemaConfig {
12859    /// Whether withholding tax generation is enabled.
12860    #[serde(default)]
12861    pub enabled: bool,
12862    /// Whether to simulate a treaty network with reduced rates.
12863    #[serde(default = "default_true")]
12864    pub treaty_network: bool,
12865    /// Default withholding tax rate for non-treaty countries (0.0 to 1.0).
12866    #[serde(default = "default_withholding_rate")]
12867    pub default_rate: f64,
12868    /// Reduced withholding tax rate for treaty countries (0.0 to 1.0).
12869    #[serde(default = "default_treaty_reduced_rate")]
12870    pub treaty_reduced_rate: f64,
12871}
12872
12873fn default_withholding_rate() -> f64 {
12874    0.30
12875}
12876
12877fn default_treaty_reduced_rate() -> f64 {
12878    0.15
12879}
12880
12881impl Default for WithholdingTaxSchemaConfig {
12882    fn default() -> Self {
12883        Self {
12884            enabled: false,
12885            treaty_network: true,
12886            default_rate: default_withholding_rate(),
12887            treaty_reduced_rate: default_treaty_reduced_rate(),
12888        }
12889    }
12890}
12891
12892/// Tax provision configuration.
12893///
12894/// Controls generation of tax provision data including statutory rates
12895/// and uncertain tax positions (ASC 740 / IAS 12).
12896#[derive(Debug, Clone, Serialize, Deserialize)]
12897pub struct TaxProvisionSchemaConfig {
12898    /// Whether tax provision generation is enabled.
12899    /// Defaults to true when tax is enabled, as provisions are typically required.
12900    #[serde(default = "default_true")]
12901    pub enabled: bool,
12902    /// Statutory corporate tax rate (0.0 to 1.0).
12903    #[serde(default = "default_statutory_rate")]
12904    pub statutory_rate: f64,
12905    /// Whether to generate uncertain tax positions (FIN 48 / IFRIC 23).
12906    #[serde(default = "default_true")]
12907    pub uncertain_positions: bool,
12908}
12909
12910fn default_statutory_rate() -> f64 {
12911    0.21
12912}
12913
12914impl Default for TaxProvisionSchemaConfig {
12915    fn default() -> Self {
12916        Self {
12917            enabled: true,
12918            statutory_rate: default_statutory_rate(),
12919            uncertain_positions: true,
12920        }
12921    }
12922}
12923
12924/// Payroll tax configuration.
12925///
12926/// Controls generation of payroll tax data (employer/employee contributions,
12927/// social security, Medicare, etc.).
12928#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12929pub struct PayrollTaxSchemaConfig {
12930    /// Whether payroll tax generation is enabled.
12931    #[serde(default)]
12932    pub enabled: bool,
12933}
12934
12935// ---------------------------------------------------------------------------
12936// Treasury & Cash Management Configuration
12937// ---------------------------------------------------------------------------
12938
12939/// Treasury and cash management configuration.
12940///
12941/// Controls generation of cash positions, forecasts, pooling, hedging
12942/// instruments (ASC 815 / IFRS 9), debt instruments with covenants,
12943/// bank guarantees, and intercompany netting runs.
12944#[derive(Debug, Clone, Serialize, Deserialize)]
12945pub struct TreasuryConfig {
12946    /// Whether treasury generation is enabled.
12947    #[serde(default)]
12948    pub enabled: bool,
12949    /// Cash positioning configuration.
12950    #[serde(default)]
12951    pub cash_positioning: CashPositioningConfig,
12952    /// Cash forecasting configuration.
12953    #[serde(default)]
12954    pub cash_forecasting: CashForecastingConfig,
12955    /// Cash pooling configuration.
12956    #[serde(default)]
12957    pub cash_pooling: CashPoolingConfig,
12958    /// Hedging configuration (FX forwards, IR swaps, etc.).
12959    #[serde(default)]
12960    pub hedging: HedgingSchemaConfig,
12961    /// Debt instrument and covenant configuration.
12962    #[serde(default)]
12963    pub debt: DebtSchemaConfig,
12964    /// Intercompany netting configuration.
12965    #[serde(default)]
12966    pub netting: NettingSchemaConfig,
12967    /// Bank guarantee / letter of credit configuration.
12968    #[serde(default)]
12969    pub bank_guarantees: BankGuaranteeSchemaConfig,
12970    /// Anomaly injection rate for treasury data (0.0 to 1.0).
12971    #[serde(default = "default_treasury_anomaly_rate")]
12972    pub anomaly_rate: f64,
12973}
12974
12975fn default_treasury_anomaly_rate() -> f64 {
12976    0.02
12977}
12978
12979impl Default for TreasuryConfig {
12980    fn default() -> Self {
12981        Self {
12982            enabled: false,
12983            cash_positioning: CashPositioningConfig::default(),
12984            cash_forecasting: CashForecastingConfig::default(),
12985            cash_pooling: CashPoolingConfig::default(),
12986            hedging: HedgingSchemaConfig::default(),
12987            debt: DebtSchemaConfig::default(),
12988            netting: NettingSchemaConfig::default(),
12989            bank_guarantees: BankGuaranteeSchemaConfig::default(),
12990            anomaly_rate: default_treasury_anomaly_rate(),
12991        }
12992    }
12993}
12994
12995/// Cash positioning configuration.
12996///
12997/// Controls daily cash position generation per entity/bank account.
12998#[derive(Debug, Clone, Serialize, Deserialize)]
12999pub struct CashPositioningConfig {
13000    /// Whether cash positioning is enabled.
13001    #[serde(default = "default_true")]
13002    pub enabled: bool,
13003    /// Position generation frequency.
13004    #[serde(default = "default_cash_frequency")]
13005    pub frequency: String,
13006    /// Minimum cash balance policy threshold.
13007    #[serde(default = "default_minimum_balance_policy")]
13008    pub minimum_balance_policy: f64,
13009}
13010
13011fn default_cash_frequency() -> String {
13012    "daily".to_string()
13013}
13014
13015fn default_minimum_balance_policy() -> f64 {
13016    100_000.0
13017}
13018
13019impl Default for CashPositioningConfig {
13020    fn default() -> Self {
13021        Self {
13022            enabled: true,
13023            frequency: default_cash_frequency(),
13024            minimum_balance_policy: default_minimum_balance_policy(),
13025        }
13026    }
13027}
13028
13029/// Cash forecasting configuration.
13030///
13031/// Controls forward-looking cash forecast generation with probability-weighted items.
13032#[derive(Debug, Clone, Serialize, Deserialize)]
13033pub struct CashForecastingConfig {
13034    /// Whether cash forecasting is enabled.
13035    #[serde(default = "default_true")]
13036    pub enabled: bool,
13037    /// Number of days to forecast into the future.
13038    #[serde(default = "default_horizon_days")]
13039    pub horizon_days: u32,
13040    /// AR collection probability curve type ("aging" or "flat").
13041    #[serde(default = "default_ar_probability_curve")]
13042    pub ar_collection_probability_curve: String,
13043    /// Confidence interval for the forecast (0.0 to 1.0).
13044    #[serde(default = "default_confidence_interval")]
13045    pub confidence_interval: f64,
13046}
13047
13048fn default_horizon_days() -> u32 {
13049    90
13050}
13051
13052fn default_ar_probability_curve() -> String {
13053    "aging".to_string()
13054}
13055
13056fn default_confidence_interval() -> f64 {
13057    0.90
13058}
13059
13060impl Default for CashForecastingConfig {
13061    fn default() -> Self {
13062        Self {
13063            enabled: true,
13064            horizon_days: default_horizon_days(),
13065            ar_collection_probability_curve: default_ar_probability_curve(),
13066            confidence_interval: default_confidence_interval(),
13067        }
13068    }
13069}
13070
13071/// Cash pooling configuration.
13072///
13073/// Controls cash pool structure generation (physical, notional, zero-balancing).
13074#[derive(Debug, Clone, Serialize, Deserialize)]
13075pub struct CashPoolingConfig {
13076    /// Whether cash pooling is enabled.
13077    #[serde(default)]
13078    pub enabled: bool,
13079    /// Pool type: "physical_pooling", "notional_pooling", or "zero_balancing".
13080    #[serde(default = "default_pool_type")]
13081    pub pool_type: String,
13082    /// Time of day when sweeps occur (HH:MM format).
13083    #[serde(default = "default_sweep_time")]
13084    pub sweep_time: String,
13085}
13086
13087fn default_pool_type() -> String {
13088    "zero_balancing".to_string()
13089}
13090
13091fn default_sweep_time() -> String {
13092    "16:00".to_string()
13093}
13094
13095impl Default for CashPoolingConfig {
13096    fn default() -> Self {
13097        Self {
13098            enabled: false,
13099            pool_type: default_pool_type(),
13100            sweep_time: default_sweep_time(),
13101        }
13102    }
13103}
13104
13105/// Hedging configuration.
13106///
13107/// Controls generation of hedging instruments and hedge relationship designations
13108/// under ASC 815 / IFRS 9.
13109#[derive(Debug, Clone, Serialize, Deserialize)]
13110pub struct HedgingSchemaConfig {
13111    /// Whether hedging generation is enabled.
13112    #[serde(default)]
13113    pub enabled: bool,
13114    /// Target hedge ratio (0.0 to 1.0). Proportion of FX exposure to hedge.
13115    #[serde(default = "default_hedge_ratio")]
13116    pub hedge_ratio: f64,
13117    /// Types of instruments to generate (e.g., ["fx_forward", "interest_rate_swap"]).
13118    #[serde(default = "default_hedge_instruments")]
13119    pub instruments: Vec<String>,
13120    /// Whether to designate formal hedge accounting relationships.
13121    #[serde(default = "default_true")]
13122    pub hedge_accounting: bool,
13123    /// Effectiveness testing method: "dollar_offset", "regression", or "critical_terms".
13124    #[serde(default = "default_effectiveness_method")]
13125    pub effectiveness_method: String,
13126}
13127
13128fn default_hedge_ratio() -> f64 {
13129    0.75
13130}
13131
13132fn default_hedge_instruments() -> Vec<String> {
13133    vec!["fx_forward".to_string(), "interest_rate_swap".to_string()]
13134}
13135
13136fn default_effectiveness_method() -> String {
13137    "regression".to_string()
13138}
13139
13140impl Default for HedgingSchemaConfig {
13141    fn default() -> Self {
13142        Self {
13143            enabled: false,
13144            hedge_ratio: default_hedge_ratio(),
13145            instruments: default_hedge_instruments(),
13146            hedge_accounting: true,
13147            effectiveness_method: default_effectiveness_method(),
13148        }
13149    }
13150}
13151
13152/// Debt instrument configuration.
13153///
13154/// Controls generation of debt instruments (term loans, revolving credit, bonds)
13155/// with amortization schedules and financial covenants.
13156#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13157pub struct DebtSchemaConfig {
13158    /// Whether debt instrument generation is enabled.
13159    #[serde(default)]
13160    pub enabled: bool,
13161    /// Debt instrument definitions.
13162    #[serde(default)]
13163    pub instruments: Vec<DebtInstrumentDef>,
13164    /// Covenant definitions.
13165    #[serde(default)]
13166    pub covenants: Vec<CovenantDef>,
13167}
13168
13169/// Definition of a debt instrument in configuration.
13170#[derive(Debug, Clone, Serialize, Deserialize)]
13171pub struct DebtInstrumentDef {
13172    /// Instrument type: "term_loan", "revolving_credit", "bond", "commercial_paper", "bridge_loan".
13173    #[serde(rename = "type")]
13174    pub instrument_type: String,
13175    /// Principal amount (for term loans, bonds).
13176    #[serde(default)]
13177    pub principal: Option<f64>,
13178    /// Interest rate (annual, as decimal fraction).
13179    #[serde(default)]
13180    pub rate: Option<f64>,
13181    /// Maturity in months.
13182    #[serde(default)]
13183    pub maturity_months: Option<u32>,
13184    /// Facility limit (for revolving credit).
13185    #[serde(default)]
13186    pub facility: Option<f64>,
13187}
13188
13189/// Definition of a debt covenant in configuration.
13190#[derive(Debug, Clone, Serialize, Deserialize)]
13191pub struct CovenantDef {
13192    /// Covenant type: "debt_to_equity", "interest_coverage", "current_ratio",
13193    /// "net_worth", "debt_to_ebitda", "fixed_charge_coverage".
13194    #[serde(rename = "type")]
13195    pub covenant_type: String,
13196    /// Covenant threshold value.
13197    pub threshold: f64,
13198}
13199
13200/// Intercompany netting configuration.
13201///
13202/// Controls generation of multilateral netting runs.
13203#[derive(Debug, Clone, Serialize, Deserialize)]
13204pub struct NettingSchemaConfig {
13205    /// Whether netting generation is enabled.
13206    #[serde(default)]
13207    pub enabled: bool,
13208    /// Netting cycle: "daily", "weekly", or "monthly".
13209    #[serde(default = "default_netting_cycle")]
13210    pub cycle: String,
13211}
13212
13213fn default_netting_cycle() -> String {
13214    "monthly".to_string()
13215}
13216
13217impl Default for NettingSchemaConfig {
13218    fn default() -> Self {
13219        Self {
13220            enabled: false,
13221            cycle: default_netting_cycle(),
13222        }
13223    }
13224}
13225
13226/// Bank guarantee and letter of credit configuration.
13227///
13228/// Controls generation of bank guarantees, standby LCs, and performance bonds.
13229#[derive(Debug, Clone, Serialize, Deserialize)]
13230pub struct BankGuaranteeSchemaConfig {
13231    /// Whether bank guarantee generation is enabled.
13232    #[serde(default)]
13233    pub enabled: bool,
13234    /// Number of guarantees to generate.
13235    #[serde(default = "default_guarantee_count")]
13236    pub count: u32,
13237}
13238
13239fn default_guarantee_count() -> u32 {
13240    5
13241}
13242
13243impl Default for BankGuaranteeSchemaConfig {
13244    fn default() -> Self {
13245        Self {
13246            enabled: false,
13247            count: default_guarantee_count(),
13248        }
13249    }
13250}
13251
13252// ===========================================================================
13253// Project Accounting Configuration
13254// ===========================================================================
13255
13256/// Project accounting configuration.
13257///
13258/// Controls generation of project cost lines, revenue recognition,
13259/// milestones, change orders, retainage, and earned value metrics.
13260#[derive(Debug, Clone, Serialize, Deserialize)]
13261pub struct ProjectAccountingConfig {
13262    /// Whether project accounting is enabled.
13263    #[serde(default)]
13264    pub enabled: bool,
13265    /// Number of projects to generate.
13266    #[serde(default = "default_project_count")]
13267    pub project_count: u32,
13268    /// Distribution of project types (capital, internal, customer, r_and_d, maintenance, technology).
13269    #[serde(default)]
13270    pub project_types: ProjectTypeDistribution,
13271    /// WBS structure configuration.
13272    #[serde(default)]
13273    pub wbs: WbsSchemaConfig,
13274    /// Cost allocation rates (what % of source documents get project-tagged).
13275    #[serde(default)]
13276    pub cost_allocation: CostAllocationConfig,
13277    /// Revenue recognition configuration for project accounting.
13278    #[serde(default)]
13279    pub revenue_recognition: ProjectRevenueRecognitionConfig,
13280    /// Milestone configuration.
13281    #[serde(default)]
13282    pub milestones: MilestoneSchemaConfig,
13283    /// Change order configuration.
13284    #[serde(default)]
13285    pub change_orders: ChangeOrderSchemaConfig,
13286    /// Retainage configuration.
13287    #[serde(default)]
13288    pub retainage: RetainageSchemaConfig,
13289    /// Earned value management configuration.
13290    #[serde(default)]
13291    pub earned_value: EarnedValueSchemaConfig,
13292    /// Anomaly injection rate for project accounting data (0.0 to 1.0).
13293    #[serde(default = "default_project_anomaly_rate")]
13294    pub anomaly_rate: f64,
13295}
13296
13297fn default_project_count() -> u32 {
13298    10
13299}
13300
13301fn default_project_anomaly_rate() -> f64 {
13302    0.03
13303}
13304
13305impl Default for ProjectAccountingConfig {
13306    fn default() -> Self {
13307        Self {
13308            enabled: false,
13309            project_count: default_project_count(),
13310            project_types: ProjectTypeDistribution::default(),
13311            wbs: WbsSchemaConfig::default(),
13312            cost_allocation: CostAllocationConfig::default(),
13313            revenue_recognition: ProjectRevenueRecognitionConfig::default(),
13314            milestones: MilestoneSchemaConfig::default(),
13315            change_orders: ChangeOrderSchemaConfig::default(),
13316            retainage: RetainageSchemaConfig::default(),
13317            earned_value: EarnedValueSchemaConfig::default(),
13318            anomaly_rate: default_project_anomaly_rate(),
13319        }
13320    }
13321}
13322
13323/// Distribution of project types by weight.
13324#[derive(Debug, Clone, Serialize, Deserialize)]
13325pub struct ProjectTypeDistribution {
13326    /// Weight for capital projects (default 0.25).
13327    #[serde(default = "default_capital_weight")]
13328    pub capital: f64,
13329    /// Weight for internal projects (default 0.20).
13330    #[serde(default = "default_internal_weight")]
13331    pub internal: f64,
13332    /// Weight for customer projects (default 0.30).
13333    #[serde(default = "default_customer_weight")]
13334    pub customer: f64,
13335    /// Weight for R&D projects (default 0.10).
13336    #[serde(default = "default_rnd_weight")]
13337    pub r_and_d: f64,
13338    /// Weight for maintenance projects (default 0.10).
13339    #[serde(default = "default_maintenance_weight")]
13340    pub maintenance: f64,
13341    /// Weight for technology projects (default 0.05).
13342    #[serde(default = "default_technology_weight")]
13343    pub technology: f64,
13344}
13345
13346fn default_capital_weight() -> f64 {
13347    0.25
13348}
13349fn default_internal_weight() -> f64 {
13350    0.20
13351}
13352fn default_customer_weight() -> f64 {
13353    0.30
13354}
13355fn default_rnd_weight() -> f64 {
13356    0.10
13357}
13358fn default_maintenance_weight() -> f64 {
13359    0.10
13360}
13361fn default_technology_weight() -> f64 {
13362    0.05
13363}
13364
13365impl Default for ProjectTypeDistribution {
13366    fn default() -> Self {
13367        Self {
13368            capital: default_capital_weight(),
13369            internal: default_internal_weight(),
13370            customer: default_customer_weight(),
13371            r_and_d: default_rnd_weight(),
13372            maintenance: default_maintenance_weight(),
13373            technology: default_technology_weight(),
13374        }
13375    }
13376}
13377
13378/// WBS structure configuration.
13379#[derive(Debug, Clone, Serialize, Deserialize)]
13380pub struct WbsSchemaConfig {
13381    /// Maximum depth of WBS hierarchy (default 3).
13382    #[serde(default = "default_wbs_max_depth")]
13383    pub max_depth: u32,
13384    /// Minimum elements per level-1 WBS (default 2).
13385    #[serde(default = "default_wbs_min_elements")]
13386    pub min_elements_per_level: u32,
13387    /// Maximum elements per level-1 WBS (default 6).
13388    #[serde(default = "default_wbs_max_elements")]
13389    pub max_elements_per_level: u32,
13390}
13391
13392fn default_wbs_max_depth() -> u32 {
13393    3
13394}
13395fn default_wbs_min_elements() -> u32 {
13396    2
13397}
13398fn default_wbs_max_elements() -> u32 {
13399    6
13400}
13401
13402impl Default for WbsSchemaConfig {
13403    fn default() -> Self {
13404        Self {
13405            max_depth: default_wbs_max_depth(),
13406            min_elements_per_level: default_wbs_min_elements(),
13407            max_elements_per_level: default_wbs_max_elements(),
13408        }
13409    }
13410}
13411
13412/// Cost allocation rates — what fraction of each document type gets linked to a project.
13413#[derive(Debug, Clone, Serialize, Deserialize)]
13414pub struct CostAllocationConfig {
13415    /// Fraction of time entries assigned to projects (0.0 to 1.0).
13416    #[serde(default = "default_time_entry_rate")]
13417    pub time_entry_project_rate: f64,
13418    /// Fraction of expense reports assigned to projects (0.0 to 1.0).
13419    #[serde(default = "default_expense_rate")]
13420    pub expense_project_rate: f64,
13421    /// Fraction of purchase orders assigned to projects (0.0 to 1.0).
13422    #[serde(default = "default_po_rate")]
13423    pub purchase_order_project_rate: f64,
13424    /// Fraction of vendor invoices assigned to projects (0.0 to 1.0).
13425    #[serde(default = "default_vi_rate")]
13426    pub vendor_invoice_project_rate: f64,
13427}
13428
13429fn default_time_entry_rate() -> f64 {
13430    0.60
13431}
13432fn default_expense_rate() -> f64 {
13433    0.30
13434}
13435fn default_po_rate() -> f64 {
13436    0.40
13437}
13438fn default_vi_rate() -> f64 {
13439    0.35
13440}
13441
13442impl Default for CostAllocationConfig {
13443    fn default() -> Self {
13444        Self {
13445            time_entry_project_rate: default_time_entry_rate(),
13446            expense_project_rate: default_expense_rate(),
13447            purchase_order_project_rate: default_po_rate(),
13448            vendor_invoice_project_rate: default_vi_rate(),
13449        }
13450    }
13451}
13452
13453/// Revenue recognition configuration for project accounting.
13454#[derive(Debug, Clone, Serialize, Deserialize)]
13455pub struct ProjectRevenueRecognitionConfig {
13456    /// Whether revenue recognition is enabled for customer projects.
13457    #[serde(default = "default_true")]
13458    pub enabled: bool,
13459    /// Default method: "percentage_of_completion", "completed_contract", "milestone_based".
13460    #[serde(default = "default_revenue_method")]
13461    pub method: String,
13462    /// Default completion measure: "cost_to_cost", "labor_hours", "physical_completion".
13463    #[serde(default = "default_completion_measure")]
13464    pub completion_measure: String,
13465    /// Average contract value for customer projects.
13466    #[serde(default = "default_avg_contract_value")]
13467    pub avg_contract_value: f64,
13468}
13469
13470fn default_revenue_method() -> String {
13471    "percentage_of_completion".to_string()
13472}
13473fn default_completion_measure() -> String {
13474    "cost_to_cost".to_string()
13475}
13476fn default_avg_contract_value() -> f64 {
13477    500_000.0
13478}
13479
13480impl Default for ProjectRevenueRecognitionConfig {
13481    fn default() -> Self {
13482        Self {
13483            enabled: true,
13484            method: default_revenue_method(),
13485            completion_measure: default_completion_measure(),
13486            avg_contract_value: default_avg_contract_value(),
13487        }
13488    }
13489}
13490
13491/// Milestone configuration.
13492#[derive(Debug, Clone, Serialize, Deserialize)]
13493pub struct MilestoneSchemaConfig {
13494    /// Whether milestone generation is enabled.
13495    #[serde(default = "default_true")]
13496    pub enabled: bool,
13497    /// Average number of milestones per project.
13498    #[serde(default = "default_milestones_per_project")]
13499    pub avg_per_project: u32,
13500    /// Fraction of milestones that are payment milestones (0.0 to 1.0).
13501    #[serde(default = "default_payment_milestone_rate")]
13502    pub payment_milestone_rate: f64,
13503}
13504
13505fn default_milestones_per_project() -> u32 {
13506    4
13507}
13508fn default_payment_milestone_rate() -> f64 {
13509    0.50
13510}
13511
13512impl Default for MilestoneSchemaConfig {
13513    fn default() -> Self {
13514        Self {
13515            enabled: true,
13516            avg_per_project: default_milestones_per_project(),
13517            payment_milestone_rate: default_payment_milestone_rate(),
13518        }
13519    }
13520}
13521
13522/// Change order configuration.
13523#[derive(Debug, Clone, Serialize, Deserialize)]
13524pub struct ChangeOrderSchemaConfig {
13525    /// Whether change order generation is enabled.
13526    #[serde(default = "default_true")]
13527    pub enabled: bool,
13528    /// Probability that a project will have at least one change order (0.0 to 1.0).
13529    #[serde(default = "default_change_order_probability")]
13530    pub probability: f64,
13531    /// Maximum change orders per project.
13532    #[serde(default = "default_max_change_orders")]
13533    pub max_per_project: u32,
13534    /// Approval rate for change orders (0.0 to 1.0).
13535    #[serde(default = "default_change_order_approval_rate")]
13536    pub approval_rate: f64,
13537}
13538
13539fn default_change_order_probability() -> f64 {
13540    0.40
13541}
13542fn default_max_change_orders() -> u32 {
13543    3
13544}
13545fn default_change_order_approval_rate() -> f64 {
13546    0.75
13547}
13548
13549impl Default for ChangeOrderSchemaConfig {
13550    fn default() -> Self {
13551        Self {
13552            enabled: true,
13553            probability: default_change_order_probability(),
13554            max_per_project: default_max_change_orders(),
13555            approval_rate: default_change_order_approval_rate(),
13556        }
13557    }
13558}
13559
13560/// Retainage configuration.
13561#[derive(Debug, Clone, Serialize, Deserialize)]
13562pub struct RetainageSchemaConfig {
13563    /// Whether retainage is enabled.
13564    #[serde(default)]
13565    pub enabled: bool,
13566    /// Default retainage percentage (0.0 to 1.0, e.g., 0.10 for 10%).
13567    #[serde(default = "default_retainage_pct")]
13568    pub default_percentage: f64,
13569}
13570
13571fn default_retainage_pct() -> f64 {
13572    0.10
13573}
13574
13575impl Default for RetainageSchemaConfig {
13576    fn default() -> Self {
13577        Self {
13578            enabled: false,
13579            default_percentage: default_retainage_pct(),
13580        }
13581    }
13582}
13583
13584/// Earned value management (EVM) configuration.
13585#[derive(Debug, Clone, Serialize, Deserialize)]
13586pub struct EarnedValueSchemaConfig {
13587    /// Whether EVM metrics are generated.
13588    #[serde(default = "default_true")]
13589    pub enabled: bool,
13590    /// Measurement frequency: "weekly", "biweekly", "monthly".
13591    #[serde(default = "default_evm_frequency")]
13592    pub frequency: String,
13593}
13594
13595fn default_evm_frequency() -> String {
13596    "monthly".to_string()
13597}
13598
13599impl Default for EarnedValueSchemaConfig {
13600    fn default() -> Self {
13601        Self {
13602            enabled: true,
13603            frequency: default_evm_frequency(),
13604        }
13605    }
13606}
13607
13608// =============================================================================
13609// ESG / Sustainability Configuration
13610// =============================================================================
13611
13612/// Top-level ESG / sustainability reporting configuration.
13613#[derive(Debug, Clone, Serialize, Deserialize)]
13614pub struct EsgConfig {
13615    /// Whether ESG generation is enabled.
13616    #[serde(default)]
13617    pub enabled: bool,
13618    /// Environmental metrics (emissions, energy, water, waste).
13619    #[serde(default)]
13620    pub environmental: EnvironmentalConfig,
13621    /// Social metrics (diversity, pay equity, safety).
13622    #[serde(default)]
13623    pub social: SocialConfig,
13624    /// Governance metrics (board composition, ethics, compliance).
13625    #[serde(default)]
13626    pub governance: GovernanceSchemaConfig,
13627    /// Supply-chain ESG assessment settings.
13628    #[serde(default)]
13629    pub supply_chain_esg: SupplyChainEsgConfig,
13630    /// ESG reporting / disclosure framework settings.
13631    #[serde(default)]
13632    pub reporting: EsgReportingConfig,
13633    /// Climate scenario analysis settings.
13634    #[serde(default)]
13635    pub climate_scenarios: ClimateScenarioConfig,
13636    /// Anomaly injection rate for ESG data (0.0 to 1.0).
13637    #[serde(default = "default_esg_anomaly_rate")]
13638    pub anomaly_rate: f64,
13639}
13640
13641fn default_esg_anomaly_rate() -> f64 {
13642    0.02
13643}
13644
13645impl Default for EsgConfig {
13646    fn default() -> Self {
13647        Self {
13648            enabled: false,
13649            environmental: EnvironmentalConfig::default(),
13650            social: SocialConfig::default(),
13651            governance: GovernanceSchemaConfig::default(),
13652            supply_chain_esg: SupplyChainEsgConfig::default(),
13653            reporting: EsgReportingConfig::default(),
13654            climate_scenarios: ClimateScenarioConfig::default(),
13655            anomaly_rate: default_esg_anomaly_rate(),
13656        }
13657    }
13658}
13659
13660/// Country pack configuration.
13661///
13662/// Controls where to load additional country packs and per-country overrides.
13663/// When omitted, only the built-in packs (_default, US, DE, GB) are used.
13664#[derive(Debug, Clone, Serialize, Deserialize, Default)]
13665pub struct CountryPacksSchemaConfig {
13666    /// Optional directory containing additional `*.json` country packs.
13667    #[serde(default)]
13668    pub external_dir: Option<PathBuf>,
13669    /// Per-country overrides applied after loading.
13670    /// Keys are ISO 3166-1 alpha-2 codes; values are partial JSON objects
13671    /// that are deep-merged on top of the loaded pack.
13672    #[serde(default)]
13673    pub overrides: std::collections::HashMap<String, serde_json::Value>,
13674}
13675
13676/// Environmental metrics configuration.
13677#[derive(Debug, Clone, Serialize, Deserialize)]
13678pub struct EnvironmentalConfig {
13679    /// Whether environmental metrics are generated.
13680    #[serde(default = "default_true")]
13681    pub enabled: bool,
13682    /// Scope 1 (direct) emission generation settings.
13683    #[serde(default)]
13684    pub scope1: EmissionScopeConfig,
13685    /// Scope 2 (purchased energy) emission generation settings.
13686    #[serde(default)]
13687    pub scope2: EmissionScopeConfig,
13688    /// Scope 3 (value chain) emission generation settings.
13689    #[serde(default)]
13690    pub scope3: Scope3Config,
13691    /// Energy consumption tracking settings.
13692    #[serde(default)]
13693    pub energy: EnergySchemaConfig,
13694    /// Water usage tracking settings.
13695    #[serde(default)]
13696    pub water: WaterSchemaConfig,
13697    /// Waste management tracking settings.
13698    #[serde(default)]
13699    pub waste: WasteSchemaConfig,
13700}
13701
13702impl Default for EnvironmentalConfig {
13703    fn default() -> Self {
13704        Self {
13705            enabled: true,
13706            scope1: EmissionScopeConfig::default(),
13707            scope2: EmissionScopeConfig::default(),
13708            scope3: Scope3Config::default(),
13709            energy: EnergySchemaConfig::default(),
13710            water: WaterSchemaConfig::default(),
13711            waste: WasteSchemaConfig::default(),
13712        }
13713    }
13714}
13715
13716/// Configuration for a single emission scope (Scope 1 or 2).
13717#[derive(Debug, Clone, Serialize, Deserialize)]
13718pub struct EmissionScopeConfig {
13719    /// Whether this scope is enabled.
13720    #[serde(default = "default_true")]
13721    pub enabled: bool,
13722    /// Emission factor region (e.g., "US", "EU", "global").
13723    #[serde(default = "default_emission_region")]
13724    pub factor_region: String,
13725}
13726
13727fn default_emission_region() -> String {
13728    "US".to_string()
13729}
13730
13731impl Default for EmissionScopeConfig {
13732    fn default() -> Self {
13733        Self {
13734            enabled: true,
13735            factor_region: default_emission_region(),
13736        }
13737    }
13738}
13739
13740/// Scope 3 (value chain) emission configuration.
13741#[derive(Debug, Clone, Serialize, Deserialize)]
13742pub struct Scope3Config {
13743    /// Whether Scope 3 emissions are generated.
13744    #[serde(default = "default_true")]
13745    pub enabled: bool,
13746    /// Categories to include (e.g., "purchased_goods", "business_travel", "commuting").
13747    #[serde(default = "default_scope3_categories")]
13748    pub categories: Vec<String>,
13749    /// Spend-based emission intensity (kg CO2e per USD).
13750    #[serde(default = "default_spend_intensity")]
13751    pub default_spend_intensity_kg_per_usd: f64,
13752}
13753
13754fn default_scope3_categories() -> Vec<String> {
13755    vec![
13756        "purchased_goods".to_string(),
13757        "business_travel".to_string(),
13758        "employee_commuting".to_string(),
13759    ]
13760}
13761
13762fn default_spend_intensity() -> f64 {
13763    0.5
13764}
13765
13766impl Default for Scope3Config {
13767    fn default() -> Self {
13768        Self {
13769            enabled: true,
13770            categories: default_scope3_categories(),
13771            default_spend_intensity_kg_per_usd: default_spend_intensity(),
13772        }
13773    }
13774}
13775
13776/// Energy consumption configuration.
13777#[derive(Debug, Clone, Serialize, Deserialize)]
13778pub struct EnergySchemaConfig {
13779    /// Whether energy consumption tracking is enabled.
13780    #[serde(default = "default_true")]
13781    pub enabled: bool,
13782    /// Number of facilities to generate.
13783    #[serde(default = "default_facility_count")]
13784    pub facility_count: u32,
13785    /// Target percentage of energy from renewable sources (0.0 to 1.0).
13786    #[serde(default = "default_renewable_target")]
13787    pub renewable_target: f64,
13788}
13789
13790fn default_facility_count() -> u32 {
13791    5
13792}
13793
13794fn default_renewable_target() -> f64 {
13795    0.30
13796}
13797
13798impl Default for EnergySchemaConfig {
13799    fn default() -> Self {
13800        Self {
13801            enabled: true,
13802            facility_count: default_facility_count(),
13803            renewable_target: default_renewable_target(),
13804        }
13805    }
13806}
13807
13808/// Water usage configuration.
13809#[derive(Debug, Clone, Serialize, Deserialize)]
13810pub struct WaterSchemaConfig {
13811    /// Whether water usage tracking is enabled.
13812    #[serde(default = "default_true")]
13813    pub enabled: bool,
13814    /// Number of facilities with water tracking.
13815    #[serde(default = "default_water_facility_count")]
13816    pub facility_count: u32,
13817}
13818
13819fn default_water_facility_count() -> u32 {
13820    3
13821}
13822
13823impl Default for WaterSchemaConfig {
13824    fn default() -> Self {
13825        Self {
13826            enabled: true,
13827            facility_count: default_water_facility_count(),
13828        }
13829    }
13830}
13831
13832/// Waste management configuration.
13833#[derive(Debug, Clone, Serialize, Deserialize)]
13834pub struct WasteSchemaConfig {
13835    /// Whether waste tracking is enabled.
13836    #[serde(default = "default_true")]
13837    pub enabled: bool,
13838    /// Target diversion rate (0.0 to 1.0).
13839    #[serde(default = "default_diversion_target")]
13840    pub diversion_target: f64,
13841}
13842
13843fn default_diversion_target() -> f64 {
13844    0.50
13845}
13846
13847impl Default for WasteSchemaConfig {
13848    fn default() -> Self {
13849        Self {
13850            enabled: true,
13851            diversion_target: default_diversion_target(),
13852        }
13853    }
13854}
13855
13856/// Social metrics configuration.
13857#[derive(Debug, Clone, Serialize, Deserialize)]
13858pub struct SocialConfig {
13859    /// Whether social metrics are generated.
13860    #[serde(default = "default_true")]
13861    pub enabled: bool,
13862    /// Workforce diversity tracking settings.
13863    #[serde(default)]
13864    pub diversity: DiversitySchemaConfig,
13865    /// Pay equity analysis settings.
13866    #[serde(default)]
13867    pub pay_equity: PayEquitySchemaConfig,
13868    /// Safety incident and metrics settings.
13869    #[serde(default)]
13870    pub safety: SafetySchemaConfig,
13871}
13872
13873impl Default for SocialConfig {
13874    fn default() -> Self {
13875        Self {
13876            enabled: true,
13877            diversity: DiversitySchemaConfig::default(),
13878            pay_equity: PayEquitySchemaConfig::default(),
13879            safety: SafetySchemaConfig::default(),
13880        }
13881    }
13882}
13883
13884/// Workforce diversity configuration.
13885#[derive(Debug, Clone, Serialize, Deserialize)]
13886pub struct DiversitySchemaConfig {
13887    /// Whether diversity metrics are generated.
13888    #[serde(default = "default_true")]
13889    pub enabled: bool,
13890    /// Dimensions to track (e.g., "gender", "ethnicity", "age_group").
13891    #[serde(default = "default_diversity_dimensions")]
13892    pub dimensions: Vec<String>,
13893}
13894
13895fn default_diversity_dimensions() -> Vec<String> {
13896    vec![
13897        "gender".to_string(),
13898        "ethnicity".to_string(),
13899        "age_group".to_string(),
13900    ]
13901}
13902
13903impl Default for DiversitySchemaConfig {
13904    fn default() -> Self {
13905        Self {
13906            enabled: true,
13907            dimensions: default_diversity_dimensions(),
13908        }
13909    }
13910}
13911
13912/// Pay equity analysis configuration.
13913#[derive(Debug, Clone, Serialize, Deserialize)]
13914pub struct PayEquitySchemaConfig {
13915    /// Whether pay equity analysis is generated.
13916    #[serde(default = "default_true")]
13917    pub enabled: bool,
13918    /// Target pay gap threshold for flagging (e.g., 0.05 = 5% gap).
13919    #[serde(default = "default_pay_gap_threshold")]
13920    pub gap_threshold: f64,
13921}
13922
13923fn default_pay_gap_threshold() -> f64 {
13924    0.05
13925}
13926
13927impl Default for PayEquitySchemaConfig {
13928    fn default() -> Self {
13929        Self {
13930            enabled: true,
13931            gap_threshold: default_pay_gap_threshold(),
13932        }
13933    }
13934}
13935
13936/// Safety metrics configuration.
13937#[derive(Debug, Clone, Serialize, Deserialize)]
13938pub struct SafetySchemaConfig {
13939    /// Whether safety metrics are generated.
13940    #[serde(default = "default_true")]
13941    pub enabled: bool,
13942    /// Average annual recordable incidents per 200,000 hours.
13943    #[serde(default = "default_trir_target")]
13944    pub target_trir: f64,
13945    /// Number of safety incidents to generate.
13946    #[serde(default = "default_incident_count")]
13947    pub incident_count: u32,
13948}
13949
13950fn default_trir_target() -> f64 {
13951    2.5
13952}
13953
13954fn default_incident_count() -> u32 {
13955    20
13956}
13957
13958impl Default for SafetySchemaConfig {
13959    fn default() -> Self {
13960        Self {
13961            enabled: true,
13962            target_trir: default_trir_target(),
13963            incident_count: default_incident_count(),
13964        }
13965    }
13966}
13967
13968/// Governance metrics configuration.
13969#[derive(Debug, Clone, Serialize, Deserialize)]
13970pub struct GovernanceSchemaConfig {
13971    /// Whether governance metrics are generated.
13972    #[serde(default = "default_true")]
13973    pub enabled: bool,
13974    /// Number of board members.
13975    #[serde(default = "default_board_size")]
13976    pub board_size: u32,
13977    /// Target independent director ratio (0.0 to 1.0).
13978    #[serde(default = "default_independence_target")]
13979    pub independence_target: f64,
13980}
13981
13982fn default_board_size() -> u32 {
13983    11
13984}
13985
13986fn default_independence_target() -> f64 {
13987    0.67
13988}
13989
13990impl Default for GovernanceSchemaConfig {
13991    fn default() -> Self {
13992        Self {
13993            enabled: true,
13994            board_size: default_board_size(),
13995            independence_target: default_independence_target(),
13996        }
13997    }
13998}
13999
14000/// Supply-chain ESG assessment configuration.
14001#[derive(Debug, Clone, Serialize, Deserialize)]
14002pub struct SupplyChainEsgConfig {
14003    /// Whether supply chain ESG assessments are generated.
14004    #[serde(default = "default_true")]
14005    pub enabled: bool,
14006    /// Proportion of vendors to assess (0.0 to 1.0).
14007    #[serde(default = "default_assessment_coverage")]
14008    pub assessment_coverage: f64,
14009    /// High-risk country codes for automatic flagging.
14010    #[serde(default = "default_high_risk_countries")]
14011    pub high_risk_countries: Vec<String>,
14012}
14013
14014fn default_assessment_coverage() -> f64 {
14015    0.80
14016}
14017
14018fn default_high_risk_countries() -> Vec<String> {
14019    vec!["CN".to_string(), "BD".to_string(), "MM".to_string()]
14020}
14021
14022impl Default for SupplyChainEsgConfig {
14023    fn default() -> Self {
14024        Self {
14025            enabled: true,
14026            assessment_coverage: default_assessment_coverage(),
14027            high_risk_countries: default_high_risk_countries(),
14028        }
14029    }
14030}
14031
14032/// ESG reporting / disclosure framework configuration.
14033#[derive(Debug, Clone, Serialize, Deserialize)]
14034pub struct EsgReportingConfig {
14035    /// Whether ESG disclosures are generated.
14036    #[serde(default = "default_true")]
14037    pub enabled: bool,
14038    /// Frameworks to generate disclosures for.
14039    #[serde(default = "default_esg_frameworks")]
14040    pub frameworks: Vec<String>,
14041    /// Whether materiality assessment is performed.
14042    #[serde(default = "default_true")]
14043    pub materiality_assessment: bool,
14044    /// Materiality threshold for impact dimension (0.0 to 1.0).
14045    #[serde(default = "default_materiality_threshold")]
14046    pub impact_threshold: f64,
14047    /// Materiality threshold for financial dimension (0.0 to 1.0).
14048    #[serde(default = "default_materiality_threshold")]
14049    pub financial_threshold: f64,
14050}
14051
14052fn default_esg_frameworks() -> Vec<String> {
14053    vec!["GRI".to_string(), "ESRS".to_string()]
14054}
14055
14056fn default_materiality_threshold() -> f64 {
14057    0.6
14058}
14059
14060impl Default for EsgReportingConfig {
14061    fn default() -> Self {
14062        Self {
14063            enabled: true,
14064            frameworks: default_esg_frameworks(),
14065            materiality_assessment: true,
14066            impact_threshold: default_materiality_threshold(),
14067            financial_threshold: default_materiality_threshold(),
14068        }
14069    }
14070}
14071
14072/// Climate scenario analysis configuration.
14073#[derive(Debug, Clone, Serialize, Deserialize)]
14074pub struct ClimateScenarioConfig {
14075    /// Whether climate scenario analysis is generated.
14076    #[serde(default)]
14077    pub enabled: bool,
14078    /// Scenarios to model (e.g., "net_zero_2050", "stated_policies", "current_trajectory").
14079    #[serde(default = "default_climate_scenarios")]
14080    pub scenarios: Vec<String>,
14081    /// Time horizons in years to project.
14082    #[serde(default = "default_time_horizons")]
14083    pub time_horizons: Vec<u32>,
14084}
14085
14086fn default_climate_scenarios() -> Vec<String> {
14087    vec![
14088        "net_zero_2050".to_string(),
14089        "stated_policies".to_string(),
14090        "current_trajectory".to_string(),
14091    ]
14092}
14093
14094fn default_time_horizons() -> Vec<u32> {
14095    vec![5, 10, 30]
14096}
14097
14098impl Default for ClimateScenarioConfig {
14099    fn default() -> Self {
14100        Self {
14101            enabled: false,
14102            scenarios: default_climate_scenarios(),
14103            time_horizons: default_time_horizons(),
14104        }
14105    }
14106}
14107
14108// ===== Counterfactual Simulation Scenarios =====
14109
14110/// Configuration for counterfactual simulation scenarios.
14111#[derive(Debug, Clone, Serialize, Deserialize, Default)]
14112pub struct ScenariosConfig {
14113    /// Whether scenario generation is enabled.
14114    #[serde(default)]
14115    pub enabled: bool,
14116    /// List of scenario definitions.
14117    #[serde(default)]
14118    pub scenarios: Vec<ScenarioSchemaConfig>,
14119    /// Causal model configuration.
14120    #[serde(default)]
14121    pub causal_model: CausalModelSchemaConfig,
14122    /// Default settings applied to all scenarios.
14123    #[serde(default)]
14124    pub defaults: ScenarioDefaultsConfig,
14125    /// Generate counterfactual (original, mutated) JE pairs for ML training.
14126    /// When true, the orchestrator produces paired clean/anomalous journal entries.
14127    #[serde(default)]
14128    pub generate_counterfactuals: bool,
14129}
14130
14131/// A single scenario definition in the config.
14132#[derive(Debug, Clone, Serialize, Deserialize)]
14133pub struct ScenarioSchemaConfig {
14134    /// Scenario name (must be unique).
14135    pub name: String,
14136    /// Human-readable description.
14137    #[serde(default)]
14138    pub description: String,
14139    /// Tags for categorization.
14140    #[serde(default)]
14141    pub tags: Vec<String>,
14142    /// Base scenario name (None = default config).
14143    pub base: Option<String>,
14144    /// IFRS 9-style probability weight.
14145    pub probability_weight: Option<f64>,
14146    /// List of interventions to apply.
14147    #[serde(default)]
14148    pub interventions: Vec<InterventionSchemaConfig>,
14149    /// Constraint overrides for this scenario.
14150    #[serde(default)]
14151    pub constraints: ScenarioConstraintsSchemaConfig,
14152    /// Output configuration for this scenario.
14153    #[serde(default)]
14154    pub output: ScenarioOutputSchemaConfig,
14155    /// Arbitrary metadata.
14156    #[serde(default)]
14157    pub metadata: std::collections::HashMap<String, String>,
14158}
14159
14160/// An intervention definition in the config.
14161#[derive(Debug, Clone, Serialize, Deserialize)]
14162pub struct InterventionSchemaConfig {
14163    /// Intervention type and parameters (flattened tagged enum).
14164    #[serde(flatten)]
14165    pub intervention_type: serde_json::Value,
14166    /// Timing configuration.
14167    #[serde(default)]
14168    pub timing: InterventionTimingSchemaConfig,
14169    /// Human-readable label.
14170    pub label: Option<String>,
14171    /// Priority for conflict resolution (higher wins).
14172    #[serde(default)]
14173    pub priority: u32,
14174}
14175
14176/// Timing configuration for an intervention.
14177#[derive(Debug, Clone, Serialize, Deserialize)]
14178pub struct InterventionTimingSchemaConfig {
14179    /// Month offset from start (1-indexed).
14180    #[serde(default = "default_start_month")]
14181    pub start_month: u32,
14182    /// Duration in months.
14183    pub duration_months: Option<u32>,
14184    /// Onset type: "sudden", "gradual", "oscillating", "custom".
14185    #[serde(default = "default_onset")]
14186    pub onset: String,
14187    /// Ramp period in months.
14188    pub ramp_months: Option<u32>,
14189}
14190
14191fn default_start_month() -> u32 {
14192    1
14193}
14194
14195fn default_onset() -> String {
14196    "sudden".to_string()
14197}
14198
14199impl Default for InterventionTimingSchemaConfig {
14200    fn default() -> Self {
14201        Self {
14202            start_month: 1,
14203            duration_months: None,
14204            onset: "sudden".to_string(),
14205            ramp_months: None,
14206        }
14207    }
14208}
14209
14210/// Scenario constraint overrides.
14211#[derive(Debug, Clone, Serialize, Deserialize)]
14212pub struct ScenarioConstraintsSchemaConfig {
14213    #[serde(default = "default_true")]
14214    pub preserve_accounting_identity: bool,
14215    #[serde(default = "default_true")]
14216    pub preserve_document_chains: bool,
14217    #[serde(default = "default_true")]
14218    pub preserve_period_close: bool,
14219    #[serde(default = "default_true")]
14220    pub preserve_balance_coherence: bool,
14221    #[serde(default)]
14222    pub custom: Vec<CustomConstraintSchemaConfig>,
14223}
14224
14225impl Default for ScenarioConstraintsSchemaConfig {
14226    fn default() -> Self {
14227        Self {
14228            preserve_accounting_identity: true,
14229            preserve_document_chains: true,
14230            preserve_period_close: true,
14231            preserve_balance_coherence: true,
14232            custom: Vec::new(),
14233        }
14234    }
14235}
14236
14237/// Custom constraint in config.
14238#[derive(Debug, Clone, Serialize, Deserialize)]
14239pub struct CustomConstraintSchemaConfig {
14240    pub config_path: String,
14241    pub min: Option<f64>,
14242    pub max: Option<f64>,
14243    #[serde(default)]
14244    pub description: String,
14245}
14246
14247/// Output configuration for a scenario.
14248#[derive(Debug, Clone, Serialize, Deserialize)]
14249pub struct ScenarioOutputSchemaConfig {
14250    #[serde(default = "default_true")]
14251    pub paired: bool,
14252    #[serde(default = "default_diff_formats_schema")]
14253    pub diff_formats: Vec<String>,
14254    #[serde(default)]
14255    pub diff_scope: Vec<String>,
14256}
14257
14258fn default_diff_formats_schema() -> Vec<String> {
14259    vec!["summary".to_string(), "aggregate".to_string()]
14260}
14261
14262impl Default for ScenarioOutputSchemaConfig {
14263    fn default() -> Self {
14264        Self {
14265            paired: true,
14266            diff_formats: default_diff_formats_schema(),
14267            diff_scope: Vec::new(),
14268        }
14269    }
14270}
14271
14272/// Causal model configuration.
14273#[derive(Debug, Clone, Serialize, Deserialize)]
14274pub struct CausalModelSchemaConfig {
14275    /// Preset name: "default", "minimal", or "custom".
14276    #[serde(default = "default_causal_preset")]
14277    pub preset: String,
14278    /// Custom nodes (merged with preset).
14279    #[serde(default)]
14280    pub nodes: Vec<serde_json::Value>,
14281    /// Custom edges (merged with preset).
14282    #[serde(default)]
14283    pub edges: Vec<serde_json::Value>,
14284}
14285
14286fn default_causal_preset() -> String {
14287    "default".to_string()
14288}
14289
14290impl Default for CausalModelSchemaConfig {
14291    fn default() -> Self {
14292        Self {
14293            preset: "default".to_string(),
14294            nodes: Vec::new(),
14295            edges: Vec::new(),
14296        }
14297    }
14298}
14299
14300/// Default settings applied to all scenarios.
14301#[derive(Debug, Clone, Serialize, Deserialize, Default)]
14302pub struct ScenarioDefaultsConfig {
14303    #[serde(default)]
14304    pub constraints: ScenarioConstraintsSchemaConfig,
14305    #[serde(default)]
14306    pub output: ScenarioOutputSchemaConfig,
14307}
14308
14309// =====================================================================
14310// Compliance Regulations Framework Configuration
14311// =====================================================================
14312
14313/// Top-level configuration for the compliance regulations framework.
14314///
14315/// Controls standards registry, jurisdiction profiles, temporal versioning,
14316/// audit procedure templates, compliance graph integration, and output settings.
14317///
14318/// # Example
14319///
14320/// ```yaml
14321/// compliance_regulations:
14322///   enabled: true
14323///   jurisdictions: [US, DE, GB]
14324///   reference_date: "2025-06-30"
14325///   standards_selection:
14326///     categories: [accounting, auditing, regulatory]
14327///     include: ["IFRS-16", "ASC-606"]
14328///   audit_procedures:
14329///     enabled: true
14330///     procedures_per_standard: 3
14331///   findings:
14332///     enabled: true
14333///     finding_rate: 0.05
14334///   filings:
14335///     enabled: true
14336///   graph:
14337///     enabled: true
14338///     include_compliance_nodes: true
14339///     include_compliance_edges: true
14340/// ```
14341#[derive(Debug, Clone, Default, Serialize, Deserialize)]
14342pub struct ComplianceRegulationsConfig {
14343    /// Master switch for the compliance regulations framework.
14344    #[serde(default)]
14345    pub enabled: bool,
14346    /// Jurisdictions to generate compliance data for (ISO 3166-1 alpha-2 codes).
14347    /// If empty, inferred from company countries in the config.
14348    #[serde(default)]
14349    pub jurisdictions: Vec<String>,
14350    /// Reference date for temporal standard resolution (YYYY-MM-DD).
14351    /// Defaults to the global start_date if not set.
14352    #[serde(default)]
14353    pub reference_date: Option<String>,
14354    /// Standards selection filters.
14355    #[serde(default)]
14356    pub standards_selection: StandardsSelectionConfig,
14357    /// Audit procedure generation settings.
14358    #[serde(default)]
14359    pub audit_procedures: AuditProcedureGenConfig,
14360    /// Compliance finding generation settings.
14361    #[serde(default)]
14362    pub findings: ComplianceFindingGenConfig,
14363    /// Regulatory filing generation settings.
14364    #[serde(default)]
14365    pub filings: ComplianceFilingGenConfig,
14366    /// Compliance graph integration settings.
14367    #[serde(default)]
14368    pub graph: ComplianceGraphConfig,
14369    /// Output settings for compliance-specific files.
14370    #[serde(default)]
14371    pub output: ComplianceOutputConfig,
14372    /// v3.3.0: legal-document generation (engagement letters,
14373    /// management reps, legal opinions, regulatory filings, board
14374    /// resolutions). Requires `compliance_regulations.enabled = true`
14375    /// AND `legal_documents.enabled = true` to take effect.
14376    #[serde(default)]
14377    pub legal_documents: LegalDocumentsConfig,
14378}
14379
14380/// Legal-document generation settings (v3.3.0+).
14381///
14382/// Wires `LegalDocumentGenerator` into the orchestrator. Generates one
14383/// batch per audit engagement when enabled.
14384#[derive(Debug, Clone, Serialize, Deserialize)]
14385pub struct LegalDocumentsConfig {
14386    /// Master switch.
14387    #[serde(default)]
14388    pub enabled: bool,
14389    /// Probability of including a legal-opinion document in an engagement.
14390    #[serde(default = "default_legal_opinion_probability")]
14391    pub legal_opinion_probability: f64,
14392}
14393
14394fn default_legal_opinion_probability() -> f64 {
14395    0.40
14396}
14397
14398impl Default for LegalDocumentsConfig {
14399    fn default() -> Self {
14400        Self {
14401            enabled: false,
14402            legal_opinion_probability: default_legal_opinion_probability(),
14403        }
14404    }
14405}
14406
14407/// Filters which standards are included in the generation.
14408#[derive(Debug, Clone, Default, Serialize, Deserialize)]
14409pub struct StandardsSelectionConfig {
14410    /// Standard categories to include (accounting, auditing, regulatory, tax, esg).
14411    /// Empty = all categories.
14412    #[serde(default)]
14413    pub categories: Vec<String>,
14414    /// Explicit standard IDs to include (e.g., ["IFRS-16", "ASC-606"]).
14415    /// When non-empty, only these standards (plus mandatory ones for selected jurisdictions) are used.
14416    #[serde(default)]
14417    pub include: Vec<String>,
14418    /// Standard IDs to exclude.
14419    #[serde(default)]
14420    pub exclude: Vec<String>,
14421    /// Include superseded standards in the output (for historical analysis).
14422    #[serde(default)]
14423    pub include_superseded: bool,
14424}
14425
14426/// Configuration for audit procedure template generation.
14427#[derive(Debug, Clone, Serialize, Deserialize)]
14428pub struct AuditProcedureGenConfig {
14429    /// Whether audit procedure generation is enabled.
14430    #[serde(default)]
14431    pub enabled: bool,
14432    /// Number of procedures to generate per applicable standard.
14433    #[serde(default = "default_procedures_per_standard")]
14434    pub procedures_per_standard: usize,
14435    /// Sampling methodology: "statistical", "non_statistical", "mixed".
14436    #[serde(default = "default_sampling_method")]
14437    pub sampling_method: String,
14438    /// Confidence level for statistical sampling (0.0-1.0).
14439    #[serde(default = "default_confidence_level")]
14440    pub confidence_level: f64,
14441    /// Tolerable misstatement rate for sampling (0.0-1.0).
14442    #[serde(default = "default_tolerable_misstatement")]
14443    pub tolerable_misstatement: f64,
14444}
14445
14446fn default_procedures_per_standard() -> usize {
14447    3
14448}
14449
14450fn default_sampling_method() -> String {
14451    "statistical".to_string()
14452}
14453
14454fn default_confidence_level() -> f64 {
14455    0.95
14456}
14457
14458fn default_tolerable_misstatement() -> f64 {
14459    0.05
14460}
14461
14462impl Default for AuditProcedureGenConfig {
14463    fn default() -> Self {
14464        Self {
14465            enabled: false,
14466            procedures_per_standard: default_procedures_per_standard(),
14467            sampling_method: default_sampling_method(),
14468            confidence_level: default_confidence_level(),
14469            tolerable_misstatement: default_tolerable_misstatement(),
14470        }
14471    }
14472}
14473
14474/// Configuration for compliance finding generation.
14475#[derive(Debug, Clone, Serialize, Deserialize)]
14476pub struct ComplianceFindingGenConfig {
14477    /// Whether finding generation is enabled.
14478    #[serde(default)]
14479    pub enabled: bool,
14480    /// Rate of findings per audit procedure (0.0-1.0).
14481    #[serde(default = "default_finding_rate")]
14482    pub finding_rate: f64,
14483    /// Rate of material weakness findings among all findings (0.0-1.0).
14484    #[serde(default = "default_cr_material_weakness_rate")]
14485    pub material_weakness_rate: f64,
14486    /// Rate of significant deficiency findings among all findings (0.0-1.0).
14487    #[serde(default = "default_cr_significant_deficiency_rate")]
14488    pub significant_deficiency_rate: f64,
14489    /// Whether to generate remediation plans for findings.
14490    #[serde(default = "default_true")]
14491    pub generate_remediation: bool,
14492}
14493
14494fn default_finding_rate() -> f64 {
14495    0.05
14496}
14497
14498fn default_cr_material_weakness_rate() -> f64 {
14499    0.02
14500}
14501
14502fn default_cr_significant_deficiency_rate() -> f64 {
14503    0.08
14504}
14505
14506impl Default for ComplianceFindingGenConfig {
14507    fn default() -> Self {
14508        Self {
14509            enabled: false,
14510            finding_rate: default_finding_rate(),
14511            material_weakness_rate: default_cr_material_weakness_rate(),
14512            significant_deficiency_rate: default_cr_significant_deficiency_rate(),
14513            generate_remediation: true,
14514        }
14515    }
14516}
14517
14518/// Configuration for regulatory filing generation.
14519#[derive(Debug, Clone, Serialize, Deserialize)]
14520pub struct ComplianceFilingGenConfig {
14521    /// Whether filing generation is enabled.
14522    #[serde(default)]
14523    pub enabled: bool,
14524    /// Filing types to include (e.g., ["10-K", "10-Q", "Jahresabschluss"]).
14525    /// Empty = all applicable filings for the selected jurisdictions.
14526    #[serde(default)]
14527    pub filing_types: Vec<String>,
14528    /// Generate filing status progression (draft → filed → accepted).
14529    #[serde(default = "default_true")]
14530    pub generate_status_progression: bool,
14531}
14532
14533impl Default for ComplianceFilingGenConfig {
14534    fn default() -> Self {
14535        Self {
14536            enabled: false,
14537            filing_types: Vec::new(),
14538            generate_status_progression: true,
14539        }
14540    }
14541}
14542
14543/// Configuration for compliance graph integration.
14544#[derive(Debug, Clone, Serialize, Deserialize)]
14545pub struct ComplianceGraphConfig {
14546    /// Whether compliance graph integration is enabled.
14547    #[serde(default)]
14548    pub enabled: bool,
14549    /// Include compliance nodes (Standard, Regulation, Jurisdiction, etc.).
14550    #[serde(default = "default_true")]
14551    pub include_compliance_nodes: bool,
14552    /// Include compliance edges (MapsToStandard, TestsControl, etc.).
14553    #[serde(default = "default_true")]
14554    pub include_compliance_edges: bool,
14555    /// Include cross-reference edges between standards.
14556    #[serde(default = "default_true")]
14557    pub include_cross_references: bool,
14558    /// Include temporal supersession edges.
14559    #[serde(default)]
14560    pub include_supersession_edges: bool,
14561    /// Include edges linking standards to the GL account types they govern.
14562    #[serde(default = "default_true")]
14563    pub include_account_links: bool,
14564    /// Include edges linking standards to the internal controls that implement them.
14565    #[serde(default = "default_true")]
14566    pub include_control_links: bool,
14567    /// Include edges linking filings and jurisdictions to the originating company.
14568    #[serde(default = "default_true")]
14569    pub include_company_links: bool,
14570}
14571
14572impl Default for ComplianceGraphConfig {
14573    fn default() -> Self {
14574        Self {
14575            enabled: false,
14576            include_compliance_nodes: true,
14577            include_compliance_edges: true,
14578            include_cross_references: true,
14579            include_supersession_edges: false,
14580            include_account_links: true,
14581            include_control_links: true,
14582            include_company_links: true,
14583        }
14584    }
14585}
14586
14587/// Output settings for compliance-specific data files.
14588#[derive(Debug, Clone, Serialize, Deserialize)]
14589pub struct ComplianceOutputConfig {
14590    /// Export the standards registry catalog.
14591    #[serde(default = "default_true")]
14592    pub export_registry: bool,
14593    /// Export jurisdiction profiles.
14594    #[serde(default = "default_true")]
14595    pub export_jurisdictions: bool,
14596    /// Export cross-reference map.
14597    #[serde(default = "default_true")]
14598    pub export_cross_references: bool,
14599    /// Export temporal version history.
14600    #[serde(default)]
14601    pub export_version_history: bool,
14602}
14603
14604impl Default for ComplianceOutputConfig {
14605    fn default() -> Self {
14606        Self {
14607            export_registry: true,
14608            export_jurisdictions: true,
14609            export_cross_references: true,
14610            export_version_history: false,
14611        }
14612    }
14613}
14614
14615#[cfg(test)]
14616mod tests {
14617    use super::*;
14618    use crate::presets::demo_preset;
14619
14620    // ==========================================================================
14621    // Serialization/Deserialization Tests
14622    // ==========================================================================
14623
14624    #[test]
14625    fn test_config_yaml_roundtrip() {
14626        let config = demo_preset();
14627        let yaml = serde_yaml::to_string(&config).expect("Failed to serialize to YAML");
14628        let deserialized: GeneratorConfig =
14629            serde_yaml::from_str(&yaml).expect("Failed to deserialize from YAML");
14630
14631        assert_eq!(
14632            config.global.period_months,
14633            deserialized.global.period_months
14634        );
14635        assert_eq!(config.global.industry, deserialized.global.industry);
14636        assert_eq!(config.companies.len(), deserialized.companies.len());
14637        assert_eq!(config.companies[0].code, deserialized.companies[0].code);
14638    }
14639
14640    #[test]
14641    fn test_config_json_roundtrip() {
14642        // Create a config without infinity values (JSON can't serialize f64::INFINITY)
14643        let mut config = demo_preset();
14644        // Replace infinity with a large but finite value for JSON compatibility
14645        config.master_data.employees.approval_limits.executive = 1e12;
14646
14647        let json = serde_json::to_string(&config).expect("Failed to serialize to JSON");
14648        let deserialized: GeneratorConfig =
14649            serde_json::from_str(&json).expect("Failed to deserialize from JSON");
14650
14651        assert_eq!(
14652            config.global.period_months,
14653            deserialized.global.period_months
14654        );
14655        assert_eq!(config.global.industry, deserialized.global.industry);
14656        assert_eq!(config.companies.len(), deserialized.companies.len());
14657    }
14658
14659    #[test]
14660    fn test_transaction_volume_serialization() {
14661        // Test various transaction volumes serialize correctly
14662        let volumes = vec![
14663            (TransactionVolume::TenK, "ten_k"),
14664            (TransactionVolume::HundredK, "hundred_k"),
14665            (TransactionVolume::OneM, "one_m"),
14666            (TransactionVolume::TenM, "ten_m"),
14667            (TransactionVolume::HundredM, "hundred_m"),
14668        ];
14669
14670        for (volume, expected_key) in volumes {
14671            let json = serde_json::to_string(&volume).expect("Failed to serialize");
14672            assert!(
14673                json.contains(expected_key),
14674                "Expected {} in JSON: {}",
14675                expected_key,
14676                json
14677            );
14678        }
14679    }
14680
14681    #[test]
14682    fn test_transaction_volume_custom_serialization() {
14683        let volume = TransactionVolume::Custom(12345);
14684        let json = serde_json::to_string(&volume).expect("Failed to serialize");
14685        let deserialized: TransactionVolume =
14686            serde_json::from_str(&json).expect("Failed to deserialize");
14687        assert_eq!(deserialized.count(), 12345);
14688    }
14689
14690    #[test]
14691    fn test_output_mode_serialization() {
14692        let modes = vec![
14693            OutputMode::Streaming,
14694            OutputMode::FlatFile,
14695            OutputMode::Both,
14696        ];
14697
14698        for mode in modes {
14699            let json = serde_json::to_string(&mode).expect("Failed to serialize");
14700            let deserialized: OutputMode =
14701                serde_json::from_str(&json).expect("Failed to deserialize");
14702            assert!(format!("{:?}", mode) == format!("{:?}", deserialized));
14703        }
14704    }
14705
14706    #[test]
14707    fn test_file_format_serialization() {
14708        let formats = vec![
14709            FileFormat::Csv,
14710            FileFormat::Parquet,
14711            FileFormat::Json,
14712            FileFormat::JsonLines,
14713        ];
14714
14715        for format in formats {
14716            let json = serde_json::to_string(&format).expect("Failed to serialize");
14717            let deserialized: FileFormat =
14718                serde_json::from_str(&json).expect("Failed to deserialize");
14719            assert!(format!("{:?}", format) == format!("{:?}", deserialized));
14720        }
14721    }
14722
14723    #[test]
14724    fn test_compression_algorithm_serialization() {
14725        let algos = vec![
14726            CompressionAlgorithm::Gzip,
14727            CompressionAlgorithm::Zstd,
14728            CompressionAlgorithm::Lz4,
14729            CompressionAlgorithm::Snappy,
14730        ];
14731
14732        for algo in algos {
14733            let json = serde_json::to_string(&algo).expect("Failed to serialize");
14734            let deserialized: CompressionAlgorithm =
14735                serde_json::from_str(&json).expect("Failed to deserialize");
14736            assert!(format!("{:?}", algo) == format!("{:?}", deserialized));
14737        }
14738    }
14739
14740    #[test]
14741    fn test_transfer_pricing_method_serialization() {
14742        let methods = vec![
14743            TransferPricingMethod::CostPlus,
14744            TransferPricingMethod::ComparableUncontrolled,
14745            TransferPricingMethod::ResalePrice,
14746            TransferPricingMethod::TransactionalNetMargin,
14747            TransferPricingMethod::ProfitSplit,
14748        ];
14749
14750        for method in methods {
14751            let json = serde_json::to_string(&method).expect("Failed to serialize");
14752            let deserialized: TransferPricingMethod =
14753                serde_json::from_str(&json).expect("Failed to deserialize");
14754            assert!(format!("{:?}", method) == format!("{:?}", deserialized));
14755        }
14756    }
14757
14758    #[test]
14759    fn test_benford_exemption_serialization() {
14760        let exemptions = vec![
14761            BenfordExemption::Recurring,
14762            BenfordExemption::Payroll,
14763            BenfordExemption::FixedFees,
14764            BenfordExemption::RoundAmounts,
14765        ];
14766
14767        for exemption in exemptions {
14768            let json = serde_json::to_string(&exemption).expect("Failed to serialize");
14769            let deserialized: BenfordExemption =
14770                serde_json::from_str(&json).expect("Failed to deserialize");
14771            assert!(format!("{:?}", exemption) == format!("{:?}", deserialized));
14772        }
14773    }
14774
14775    // ==========================================================================
14776    // Default Value Tests
14777    // ==========================================================================
14778
14779    #[test]
14780    fn test_global_config_defaults() {
14781        let yaml = r#"
14782            industry: manufacturing
14783            start_date: "2024-01-01"
14784            period_months: 6
14785        "#;
14786        let config: GlobalConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14787        assert_eq!(config.group_currency, "USD");
14788        assert!(config.parallel);
14789        assert_eq!(config.worker_threads, 0);
14790        assert_eq!(config.memory_limit_mb, 0);
14791    }
14792
14793    #[test]
14794    fn test_fraud_config_defaults() {
14795        let config = FraudConfig::default();
14796        assert!(!config.enabled);
14797        assert_eq!(config.fraud_rate, 0.005);
14798        assert!(!config.clustering_enabled);
14799    }
14800
14801    #[test]
14802    fn test_internal_controls_config_defaults() {
14803        let config = InternalControlsConfig::default();
14804        assert!(!config.enabled);
14805        assert_eq!(config.exception_rate, 0.02);
14806        assert_eq!(config.sod_violation_rate, 0.01);
14807        assert!(config.export_control_master_data);
14808        assert_eq!(config.sox_materiality_threshold, 10000.0);
14809        // COSO fields
14810        assert!(config.coso_enabled);
14811        assert!(!config.include_entity_level_controls);
14812        assert_eq!(config.target_maturity_level, "mixed");
14813    }
14814
14815    #[test]
14816    fn test_output_config_defaults() {
14817        let config = OutputConfig::default();
14818        assert!(matches!(config.mode, OutputMode::FlatFile));
14819        assert_eq!(config.formats, vec![FileFormat::Parquet]);
14820        assert!(config.compression.enabled);
14821        assert!(matches!(
14822            config.compression.algorithm,
14823            CompressionAlgorithm::Zstd
14824        ));
14825        assert!(config.include_acdoca);
14826        assert!(!config.include_bseg);
14827        assert!(config.partition_by_period);
14828        assert!(!config.partition_by_company);
14829    }
14830
14831    #[test]
14832    fn test_approval_config_defaults() {
14833        let config = ApprovalConfig::default();
14834        assert!(!config.enabled);
14835        assert_eq!(config.auto_approve_threshold, 1000.0);
14836        assert_eq!(config.rejection_rate, 0.02);
14837        assert_eq!(config.revision_rate, 0.05);
14838        assert_eq!(config.average_approval_delay_hours, 4.0);
14839        assert_eq!(config.thresholds.len(), 4);
14840    }
14841
14842    #[test]
14843    fn test_p2p_flow_config_defaults() {
14844        let config = P2PFlowConfig::default();
14845        assert!(config.enabled);
14846        assert_eq!(config.three_way_match_rate, 0.95);
14847        assert_eq!(config.partial_delivery_rate, 0.15);
14848        assert_eq!(config.average_po_to_gr_days, 14);
14849    }
14850
14851    #[test]
14852    fn test_o2c_flow_config_defaults() {
14853        let config = O2CFlowConfig::default();
14854        assert!(config.enabled);
14855        assert_eq!(config.credit_check_failure_rate, 0.02);
14856        assert_eq!(config.return_rate, 0.03);
14857        assert_eq!(config.bad_debt_rate, 0.01);
14858    }
14859
14860    #[test]
14861    fn test_balance_config_defaults() {
14862        let config = BalanceConfig::default();
14863        assert!(!config.generate_opening_balances);
14864        assert!(config.generate_trial_balances);
14865        assert_eq!(config.target_gross_margin, 0.35);
14866        assert!(config.validate_balance_equation);
14867        assert!(config.reconcile_subledgers);
14868    }
14869
14870    // ==========================================================================
14871    // Partial Config Deserialization Tests
14872    // ==========================================================================
14873
14874    #[test]
14875    fn test_partial_config_with_defaults() {
14876        // Minimal config that should use all defaults
14877        let yaml = r#"
14878            global:
14879              industry: manufacturing
14880              start_date: "2024-01-01"
14881              period_months: 3
14882            companies:
14883              - code: "TEST"
14884                name: "Test Company"
14885                currency: "USD"
14886                country: "US"
14887                annual_transaction_volume: ten_k
14888            chart_of_accounts:
14889              complexity: small
14890            output:
14891              output_directory: "./output"
14892        "#;
14893
14894        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14895        assert_eq!(config.global.period_months, 3);
14896        assert_eq!(config.companies.len(), 1);
14897        assert!(!config.fraud.enabled); // Default
14898        assert!(!config.internal_controls.enabled); // Default
14899    }
14900
14901    #[test]
14902    fn test_config_with_fraud_enabled() {
14903        let yaml = r#"
14904            global:
14905              industry: retail
14906              start_date: "2024-01-01"
14907              period_months: 12
14908            companies:
14909              - code: "RETAIL"
14910                name: "Retail Co"
14911                currency: "USD"
14912                country: "US"
14913                annual_transaction_volume: hundred_k
14914            chart_of_accounts:
14915              complexity: medium
14916            output:
14917              output_directory: "./output"
14918            fraud:
14919              enabled: true
14920              fraud_rate: 0.05
14921              clustering_enabled: true
14922        "#;
14923
14924        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14925        assert!(config.fraud.enabled);
14926        assert_eq!(config.fraud.fraud_rate, 0.05);
14927        assert!(config.fraud.clustering_enabled);
14928    }
14929
14930    #[test]
14931    fn test_config_with_multiple_companies() {
14932        let yaml = r#"
14933            global:
14934              industry: manufacturing
14935              start_date: "2024-01-01"
14936              period_months: 6
14937            companies:
14938              - code: "HQ"
14939                name: "Headquarters"
14940                currency: "USD"
14941                country: "US"
14942                annual_transaction_volume: hundred_k
14943                volume_weight: 1.0
14944              - code: "EU"
14945                name: "European Subsidiary"
14946                currency: "EUR"
14947                country: "DE"
14948                annual_transaction_volume: hundred_k
14949                volume_weight: 0.5
14950              - code: "APAC"
14951                name: "Asia Pacific"
14952                currency: "JPY"
14953                country: "JP"
14954                annual_transaction_volume: ten_k
14955                volume_weight: 0.3
14956            chart_of_accounts:
14957              complexity: large
14958            output:
14959              output_directory: "./output"
14960        "#;
14961
14962        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14963        assert_eq!(config.companies.len(), 3);
14964        assert_eq!(config.companies[0].code, "HQ");
14965        assert_eq!(config.companies[1].currency, "EUR");
14966        assert_eq!(config.companies[2].volume_weight, 0.3);
14967    }
14968
14969    #[test]
14970    fn test_intercompany_config() {
14971        let yaml = r#"
14972            enabled: true
14973            ic_transaction_rate: 0.20
14974            transfer_pricing_method: cost_plus
14975            markup_percent: 0.08
14976            generate_matched_pairs: true
14977            generate_eliminations: true
14978        "#;
14979
14980        let config: IntercompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14981        assert!(config.enabled);
14982        assert_eq!(config.ic_transaction_rate, 0.20);
14983        assert!(matches!(
14984            config.transfer_pricing_method,
14985            TransferPricingMethod::CostPlus
14986        ));
14987        assert_eq!(config.markup_percent, 0.08);
14988        assert!(config.generate_eliminations);
14989    }
14990
14991    // ==========================================================================
14992    // Company Config Tests
14993    // ==========================================================================
14994
14995    #[test]
14996    fn test_company_config_defaults() {
14997        let yaml = r#"
14998            code: "TEST"
14999            name: "Test Company"
15000            currency: "USD"
15001            country: "US"
15002            annual_transaction_volume: ten_k
15003        "#;
15004
15005        let config: CompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15006        assert_eq!(config.fiscal_year_variant, "K4"); // Default
15007        assert_eq!(config.volume_weight, 1.0); // Default
15008    }
15009
15010    // ==========================================================================
15011    // Chart of Accounts Config Tests
15012    // ==========================================================================
15013
15014    #[test]
15015    fn test_coa_config_defaults() {
15016        let yaml = r#"
15017            complexity: medium
15018        "#;
15019
15020        let config: ChartOfAccountsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15021        assert!(config.industry_specific); // Default true
15022        assert!(config.custom_accounts.is_none());
15023        assert_eq!(config.min_hierarchy_depth, 2); // Default
15024        assert_eq!(config.max_hierarchy_depth, 5); // Default
15025    }
15026
15027    // ==========================================================================
15028    // Accounting Standards Config Tests
15029    // ==========================================================================
15030
15031    #[test]
15032    fn test_accounting_standards_config_defaults() {
15033        let config = AccountingStandardsConfig::default();
15034        assert!(!config.enabled);
15035        assert!(config.framework.is_none());
15036        assert!(!config.revenue_recognition.enabled);
15037        assert!(!config.leases.enabled);
15038        assert!(!config.fair_value.enabled);
15039        assert!(!config.impairment.enabled);
15040        assert!(!config.generate_differences);
15041    }
15042
15043    #[test]
15044    fn test_accounting_standards_config_yaml() {
15045        let yaml = r#"
15046            enabled: true
15047            framework: ifrs
15048            revenue_recognition:
15049              enabled: true
15050              generate_contracts: true
15051              avg_obligations_per_contract: 2.5
15052              variable_consideration_rate: 0.20
15053              over_time_recognition_rate: 0.35
15054              contract_count: 150
15055            leases:
15056              enabled: true
15057              lease_count: 75
15058              finance_lease_percent: 0.25
15059              avg_lease_term_months: 48
15060            generate_differences: true
15061        "#;
15062
15063        let config: AccountingStandardsConfig =
15064            serde_yaml::from_str(yaml).expect("Failed to parse");
15065        assert!(config.enabled);
15066        assert!(matches!(
15067            config.framework,
15068            Some(AccountingFrameworkConfig::Ifrs)
15069        ));
15070        assert!(config.revenue_recognition.enabled);
15071        assert_eq!(config.revenue_recognition.contract_count, 150);
15072        assert_eq!(config.revenue_recognition.avg_obligations_per_contract, 2.5);
15073        assert!(config.leases.enabled);
15074        assert_eq!(config.leases.lease_count, 75);
15075        assert_eq!(config.leases.finance_lease_percent, 0.25);
15076        assert!(config.generate_differences);
15077    }
15078
15079    #[test]
15080    fn test_accounting_framework_serialization() {
15081        let frameworks = [
15082            AccountingFrameworkConfig::UsGaap,
15083            AccountingFrameworkConfig::Ifrs,
15084            AccountingFrameworkConfig::DualReporting,
15085            AccountingFrameworkConfig::FrenchGaap,
15086            AccountingFrameworkConfig::GermanGaap,
15087        ];
15088
15089        for framework in frameworks {
15090            let json = serde_json::to_string(&framework).expect("Failed to serialize");
15091            let deserialized: AccountingFrameworkConfig =
15092                serde_json::from_str(&json).expect("Failed to deserialize");
15093            assert!(format!("{:?}", framework) == format!("{:?}", deserialized));
15094        }
15095    }
15096
15097    #[test]
15098    fn test_revenue_recognition_config_defaults() {
15099        let config = RevenueRecognitionConfig::default();
15100        assert!(!config.enabled);
15101        assert!(config.generate_contracts);
15102        assert_eq!(config.avg_obligations_per_contract, 2.0);
15103        assert_eq!(config.variable_consideration_rate, 0.15);
15104        assert_eq!(config.over_time_recognition_rate, 0.30);
15105        assert_eq!(config.contract_count, 100);
15106    }
15107
15108    #[test]
15109    fn test_lease_accounting_config_defaults() {
15110        let config = LeaseAccountingConfig::default();
15111        assert!(!config.enabled);
15112        assert_eq!(config.lease_count, 50);
15113        assert_eq!(config.finance_lease_percent, 0.30);
15114        assert_eq!(config.avg_lease_term_months, 60);
15115        assert!(config.generate_amortization);
15116        assert_eq!(config.real_estate_percent, 0.40);
15117    }
15118
15119    #[test]
15120    fn test_fair_value_config_defaults() {
15121        let config = FairValueConfig::default();
15122        assert!(!config.enabled);
15123        assert_eq!(config.measurement_count, 25);
15124        assert_eq!(config.level1_percent, 0.40);
15125        assert_eq!(config.level2_percent, 0.35);
15126        assert_eq!(config.level3_percent, 0.25);
15127        assert!(!config.include_sensitivity_analysis);
15128    }
15129
15130    #[test]
15131    fn test_impairment_config_defaults() {
15132        let config = ImpairmentConfig::default();
15133        assert!(!config.enabled);
15134        assert_eq!(config.test_count, 15);
15135        assert_eq!(config.impairment_rate, 0.10);
15136        assert!(config.generate_projections);
15137        assert!(!config.include_goodwill);
15138    }
15139
15140    // ==========================================================================
15141    // Audit Standards Config Tests
15142    // ==========================================================================
15143
15144    #[test]
15145    fn test_audit_standards_config_defaults() {
15146        let config = AuditStandardsConfig::default();
15147        assert!(!config.enabled);
15148        assert!(!config.isa_compliance.enabled);
15149        assert!(!config.analytical_procedures.enabled);
15150        assert!(!config.confirmations.enabled);
15151        assert!(!config.opinion.enabled);
15152        assert!(!config.generate_audit_trail);
15153        assert!(!config.sox.enabled);
15154        assert!(!config.pcaob.enabled);
15155    }
15156
15157    #[test]
15158    fn test_audit_standards_config_yaml() {
15159        let yaml = r#"
15160            enabled: true
15161            isa_compliance:
15162              enabled: true
15163              compliance_level: comprehensive
15164              generate_isa_mappings: true
15165              include_pcaob: true
15166              framework: dual
15167            analytical_procedures:
15168              enabled: true
15169              procedures_per_account: 5
15170              variance_probability: 0.25
15171            confirmations:
15172              enabled: true
15173              confirmation_count: 75
15174              positive_response_rate: 0.90
15175              exception_rate: 0.08
15176            opinion:
15177              enabled: true
15178              generate_kam: true
15179              average_kam_count: 4
15180            sox:
15181              enabled: true
15182              generate_302_certifications: true
15183              generate_404_assessments: true
15184              material_weakness_rate: 0.03
15185            pcaob:
15186              enabled: true
15187              is_pcaob_audit: true
15188              include_icfr_opinion: true
15189            generate_audit_trail: true
15190        "#;
15191
15192        let config: AuditStandardsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15193        assert!(config.enabled);
15194        assert!(config.isa_compliance.enabled);
15195        assert_eq!(config.isa_compliance.compliance_level, "comprehensive");
15196        assert!(config.isa_compliance.include_pcaob);
15197        assert_eq!(config.isa_compliance.framework, "dual");
15198        assert!(config.analytical_procedures.enabled);
15199        assert_eq!(config.analytical_procedures.procedures_per_account, 5);
15200        assert!(config.confirmations.enabled);
15201        assert_eq!(config.confirmations.confirmation_count, 75);
15202        assert!(config.opinion.enabled);
15203        assert_eq!(config.opinion.average_kam_count, 4);
15204        assert!(config.sox.enabled);
15205        assert!(config.sox.generate_302_certifications);
15206        assert_eq!(config.sox.material_weakness_rate, 0.03);
15207        assert!(config.pcaob.enabled);
15208        assert!(config.pcaob.is_pcaob_audit);
15209        assert!(config.pcaob.include_icfr_opinion);
15210        assert!(config.generate_audit_trail);
15211    }
15212
15213    #[test]
15214    fn test_isa_compliance_config_defaults() {
15215        let config = IsaComplianceConfig::default();
15216        assert!(!config.enabled);
15217        assert_eq!(config.compliance_level, "standard");
15218        assert!(config.generate_isa_mappings);
15219        assert!(config.generate_coverage_summary);
15220        assert!(!config.include_pcaob);
15221        assert_eq!(config.framework, "isa");
15222    }
15223
15224    #[test]
15225    fn test_sox_compliance_config_defaults() {
15226        let config = SoxComplianceConfig::default();
15227        assert!(!config.enabled);
15228        assert!(config.generate_302_certifications);
15229        assert!(config.generate_404_assessments);
15230        assert_eq!(config.materiality_threshold, 10000.0);
15231        assert_eq!(config.material_weakness_rate, 0.02);
15232        assert_eq!(config.significant_deficiency_rate, 0.08);
15233    }
15234
15235    #[test]
15236    fn test_pcaob_config_defaults() {
15237        let config = PcaobConfig::default();
15238        assert!(!config.enabled);
15239        assert!(!config.is_pcaob_audit);
15240        assert!(config.generate_cam);
15241        assert!(!config.include_icfr_opinion);
15242        assert!(!config.generate_standard_mappings);
15243    }
15244
15245    #[test]
15246    fn test_config_with_standards_enabled() {
15247        let yaml = r#"
15248            global:
15249              industry: financial_services
15250              start_date: "2024-01-01"
15251              period_months: 12
15252            companies:
15253              - code: "BANK"
15254                name: "Test Bank"
15255                currency: "USD"
15256                country: "US"
15257                annual_transaction_volume: hundred_k
15258            chart_of_accounts:
15259              complexity: large
15260            output:
15261              output_directory: "./output"
15262            accounting_standards:
15263              enabled: true
15264              framework: us_gaap
15265              revenue_recognition:
15266                enabled: true
15267              leases:
15268                enabled: true
15269            audit_standards:
15270              enabled: true
15271              isa_compliance:
15272                enabled: true
15273              sox:
15274                enabled: true
15275        "#;
15276
15277        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15278        assert!(config.accounting_standards.enabled);
15279        assert!(matches!(
15280            config.accounting_standards.framework,
15281            Some(AccountingFrameworkConfig::UsGaap)
15282        ));
15283        assert!(config.accounting_standards.revenue_recognition.enabled);
15284        assert!(config.accounting_standards.leases.enabled);
15285        assert!(config.audit_standards.enabled);
15286        assert!(config.audit_standards.isa_compliance.enabled);
15287        assert!(config.audit_standards.sox.enabled);
15288    }
15289
15290    // ==========================================================================
15291    // Industry-Specific Config Tests
15292    // ==========================================================================
15293
15294    #[test]
15295    fn test_industry_specific_config_defaults() {
15296        let config = IndustrySpecificConfig::default();
15297        assert!(!config.enabled);
15298        assert!(!config.manufacturing.enabled);
15299        assert!(!config.retail.enabled);
15300        assert!(!config.healthcare.enabled);
15301        assert!(!config.technology.enabled);
15302        assert!(!config.financial_services.enabled);
15303        assert!(!config.professional_services.enabled);
15304    }
15305
15306    #[test]
15307    fn test_manufacturing_config_defaults() {
15308        let config = ManufacturingConfig::default();
15309        assert!(!config.enabled);
15310        assert_eq!(config.bom_depth, 4);
15311        assert!(!config.just_in_time);
15312        assert_eq!(config.supplier_tiers, 2);
15313        assert_eq!(config.target_yield_rate, 0.97);
15314        assert_eq!(config.scrap_alert_threshold, 0.03);
15315    }
15316
15317    #[test]
15318    fn test_retail_config_defaults() {
15319        let config = RetailConfig::default();
15320        assert!(!config.enabled);
15321        assert_eq!(config.avg_daily_transactions, 500);
15322        assert!(config.loss_prevention);
15323        assert_eq!(config.shrinkage_rate, 0.015);
15324    }
15325
15326    #[test]
15327    fn test_healthcare_config_defaults() {
15328        let config = HealthcareConfig::default();
15329        assert!(!config.enabled);
15330        assert_eq!(config.facility_type, "hospital");
15331        assert_eq!(config.avg_daily_encounters, 150);
15332        assert!(config.compliance.hipaa);
15333        assert!(config.compliance.stark_law);
15334        assert!(config.coding_systems.icd10);
15335        assert!(config.coding_systems.cpt);
15336    }
15337
15338    #[test]
15339    fn test_technology_config_defaults() {
15340        let config = TechnologyConfig::default();
15341        assert!(!config.enabled);
15342        assert_eq!(config.revenue_model, "saas");
15343        assert_eq!(config.subscription_revenue_pct, 0.60);
15344        assert!(config.rd_capitalization.enabled);
15345    }
15346
15347    #[test]
15348    fn test_config_with_industry_specific() {
15349        let yaml = r#"
15350            global:
15351              industry: healthcare
15352              start_date: "2024-01-01"
15353              period_months: 12
15354            companies:
15355              - code: "HOSP"
15356                name: "Test Hospital"
15357                currency: "USD"
15358                country: "US"
15359                annual_transaction_volume: hundred_k
15360            chart_of_accounts:
15361              complexity: medium
15362            output:
15363              output_directory: "./output"
15364            industry_specific:
15365              enabled: true
15366              healthcare:
15367                enabled: true
15368                facility_type: hospital
15369                payer_mix:
15370                  medicare: 0.45
15371                  medicaid: 0.15
15372                  commercial: 0.35
15373                  self_pay: 0.05
15374                coding_systems:
15375                  icd10: true
15376                  cpt: true
15377                  drg: true
15378                compliance:
15379                  hipaa: true
15380                  stark_law: true
15381                anomaly_rates:
15382                  upcoding: 0.03
15383                  unbundling: 0.02
15384        "#;
15385
15386        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15387        assert!(config.industry_specific.enabled);
15388        assert!(config.industry_specific.healthcare.enabled);
15389        assert_eq!(
15390            config.industry_specific.healthcare.facility_type,
15391            "hospital"
15392        );
15393        assert_eq!(config.industry_specific.healthcare.payer_mix.medicare, 0.45);
15394        assert_eq!(config.industry_specific.healthcare.payer_mix.self_pay, 0.05);
15395        assert!(config.industry_specific.healthcare.coding_systems.icd10);
15396        assert!(config.industry_specific.healthcare.compliance.hipaa);
15397        assert_eq!(
15398            config.industry_specific.healthcare.anomaly_rates.upcoding,
15399            0.03
15400        );
15401    }
15402
15403    #[test]
15404    fn test_config_with_manufacturing_specific() {
15405        let yaml = r#"
15406            global:
15407              industry: manufacturing
15408              start_date: "2024-01-01"
15409              period_months: 12
15410            companies:
15411              - code: "MFG"
15412                name: "Test Manufacturing"
15413                currency: "USD"
15414                country: "US"
15415                annual_transaction_volume: hundred_k
15416            chart_of_accounts:
15417              complexity: medium
15418            output:
15419              output_directory: "./output"
15420            industry_specific:
15421              enabled: true
15422              manufacturing:
15423                enabled: true
15424                bom_depth: 5
15425                just_in_time: true
15426                supplier_tiers: 3
15427                target_yield_rate: 0.98
15428                anomaly_rates:
15429                  yield_manipulation: 0.02
15430                  phantom_production: 0.01
15431        "#;
15432
15433        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15434        assert!(config.industry_specific.enabled);
15435        assert!(config.industry_specific.manufacturing.enabled);
15436        assert_eq!(config.industry_specific.manufacturing.bom_depth, 5);
15437        assert!(config.industry_specific.manufacturing.just_in_time);
15438        assert_eq!(config.industry_specific.manufacturing.supplier_tiers, 3);
15439        assert_eq!(
15440            config.industry_specific.manufacturing.target_yield_rate,
15441            0.98
15442        );
15443        assert_eq!(
15444            config
15445                .industry_specific
15446                .manufacturing
15447                .anomaly_rates
15448                .yield_manipulation,
15449            0.02
15450        );
15451    }
15452
15453    // ==========================================================================
15454    // Tax Configuration Tests
15455    // ==========================================================================
15456
15457    #[test]
15458    fn test_tax_config_defaults() {
15459        let tax = TaxConfig::default();
15460        assert!(!tax.enabled);
15461        assert!(tax.jurisdictions.countries.is_empty());
15462        assert!(!tax.jurisdictions.include_subnational);
15463        assert!(!tax.vat_gst.enabled);
15464        assert!(tax.vat_gst.standard_rates.is_empty());
15465        assert!(tax.vat_gst.reduced_rates.is_empty());
15466        assert!(tax.vat_gst.exempt_categories.is_empty());
15467        assert!(tax.vat_gst.reverse_charge);
15468        assert!(!tax.sales_tax.enabled);
15469        assert!(tax.sales_tax.nexus_states.is_empty());
15470        assert!(!tax.withholding.enabled);
15471        assert!(tax.withholding.treaty_network);
15472        assert_eq!(tax.withholding.default_rate, 0.30);
15473        assert_eq!(tax.withholding.treaty_reduced_rate, 0.15);
15474        assert!(tax.provisions.enabled);
15475        assert_eq!(tax.provisions.statutory_rate, 0.21);
15476        assert!(tax.provisions.uncertain_positions);
15477        assert!(!tax.payroll_tax.enabled);
15478        assert_eq!(tax.anomaly_rate, 0.03);
15479    }
15480
15481    #[test]
15482    fn test_tax_config_from_yaml() {
15483        let yaml = r#"
15484            global:
15485              seed: 42
15486              start_date: "2024-01-01"
15487              period_months: 12
15488              industry: retail
15489            companies:
15490              - code: C001
15491                name: Test Corp
15492                currency: USD
15493                country: US
15494                annual_transaction_volume: ten_k
15495            chart_of_accounts:
15496              complexity: small
15497            output:
15498              output_directory: ./output
15499            tax:
15500              enabled: true
15501              anomaly_rate: 0.05
15502              jurisdictions:
15503                countries: ["US", "DE", "GB"]
15504                include_subnational: true
15505              vat_gst:
15506                enabled: true
15507                standard_rates:
15508                  DE: 0.19
15509                  GB: 0.20
15510                reduced_rates:
15511                  DE: 0.07
15512                  GB: 0.05
15513                exempt_categories:
15514                  - financial_services
15515                  - healthcare
15516                reverse_charge: false
15517              sales_tax:
15518                enabled: true
15519                nexus_states: ["CA", "NY", "TX"]
15520              withholding:
15521                enabled: true
15522                treaty_network: false
15523                default_rate: 0.25
15524                treaty_reduced_rate: 0.10
15525              provisions:
15526                enabled: false
15527                statutory_rate: 0.28
15528                uncertain_positions: false
15529              payroll_tax:
15530                enabled: true
15531        "#;
15532
15533        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15534        assert!(config.tax.enabled);
15535        assert_eq!(config.tax.anomaly_rate, 0.05);
15536
15537        // Jurisdictions
15538        assert_eq!(config.tax.jurisdictions.countries.len(), 3);
15539        assert!(config
15540            .tax
15541            .jurisdictions
15542            .countries
15543            .contains(&"DE".to_string()));
15544        assert!(config.tax.jurisdictions.include_subnational);
15545
15546        // VAT/GST
15547        assert!(config.tax.vat_gst.enabled);
15548        assert_eq!(config.tax.vat_gst.standard_rates.get("DE"), Some(&0.19));
15549        assert_eq!(config.tax.vat_gst.standard_rates.get("GB"), Some(&0.20));
15550        assert_eq!(config.tax.vat_gst.reduced_rates.get("DE"), Some(&0.07));
15551        assert_eq!(config.tax.vat_gst.exempt_categories.len(), 2);
15552        assert!(!config.tax.vat_gst.reverse_charge);
15553
15554        // Sales tax
15555        assert!(config.tax.sales_tax.enabled);
15556        assert_eq!(config.tax.sales_tax.nexus_states.len(), 3);
15557        assert!(config
15558            .tax
15559            .sales_tax
15560            .nexus_states
15561            .contains(&"CA".to_string()));
15562
15563        // Withholding
15564        assert!(config.tax.withholding.enabled);
15565        assert!(!config.tax.withholding.treaty_network);
15566        assert_eq!(config.tax.withholding.default_rate, 0.25);
15567        assert_eq!(config.tax.withholding.treaty_reduced_rate, 0.10);
15568
15569        // Provisions
15570        assert!(!config.tax.provisions.enabled);
15571        assert_eq!(config.tax.provisions.statutory_rate, 0.28);
15572        assert!(!config.tax.provisions.uncertain_positions);
15573
15574        // Payroll tax
15575        assert!(config.tax.payroll_tax.enabled);
15576    }
15577
15578    #[test]
15579    fn test_generator_config_with_tax_default() {
15580        let yaml = r#"
15581            global:
15582              seed: 42
15583              start_date: "2024-01-01"
15584              period_months: 12
15585              industry: retail
15586            companies:
15587              - code: C001
15588                name: Test Corp
15589                currency: USD
15590                country: US
15591                annual_transaction_volume: ten_k
15592            chart_of_accounts:
15593              complexity: small
15594            output:
15595              output_directory: ./output
15596        "#;
15597
15598        let config: GeneratorConfig =
15599            serde_yaml::from_str(yaml).expect("Failed to parse config without tax section");
15600        // Tax should be present with defaults when not specified in YAML
15601        assert!(!config.tax.enabled);
15602        assert!(config.tax.jurisdictions.countries.is_empty());
15603        assert_eq!(config.tax.anomaly_rate, 0.03);
15604        assert!(config.tax.provisions.enabled); // provisions default to enabled=true
15605        assert_eq!(config.tax.provisions.statutory_rate, 0.21);
15606    }
15607
15608    // ==========================================================================
15609    // SessionSchemaConfig Tests
15610    // ==========================================================================
15611
15612    #[test]
15613    fn test_session_config_default_disabled() {
15614        let yaml = "{}";
15615        let config: SessionSchemaConfig =
15616            serde_yaml::from_str(yaml).expect("Failed to parse empty session config");
15617        assert!(!config.enabled);
15618        assert!(config.checkpoint_path.is_none());
15619        assert!(config.per_period_output);
15620        assert!(config.consolidated_output);
15621    }
15622
15623    #[test]
15624    fn test_config_backward_compatible_without_session() {
15625        let yaml = r#"
15626            global:
15627              seed: 42
15628              start_date: "2024-01-01"
15629              period_months: 12
15630              industry: retail
15631            companies:
15632              - code: C001
15633                name: Test Corp
15634                currency: USD
15635                country: US
15636                annual_transaction_volume: ten_k
15637            chart_of_accounts:
15638              complexity: small
15639            output:
15640              output_directory: ./output
15641        "#;
15642
15643        let config: GeneratorConfig =
15644            serde_yaml::from_str(yaml).expect("Failed to parse config without session");
15645        // Session should default to disabled
15646        assert!(!config.session.enabled);
15647        assert!(config.session.per_period_output);
15648        assert!(config.session.consolidated_output);
15649        // fiscal_year_months should be None
15650        assert!(config.global.fiscal_year_months.is_none());
15651    }
15652
15653    #[test]
15654    fn test_fiscal_year_months_parsed() {
15655        let yaml = r#"
15656            global:
15657              seed: 42
15658              start_date: "2024-01-01"
15659              period_months: 24
15660              industry: retail
15661              fiscal_year_months: 12
15662            companies:
15663              - code: C001
15664                name: Test Corp
15665                currency: USD
15666                country: US
15667                annual_transaction_volume: ten_k
15668            chart_of_accounts:
15669              complexity: small
15670            output:
15671              output_directory: ./output
15672            session:
15673              enabled: true
15674              checkpoint_path: /tmp/checkpoints
15675              per_period_output: true
15676              consolidated_output: false
15677        "#;
15678
15679        let config: GeneratorConfig =
15680            serde_yaml::from_str(yaml).expect("Failed to parse config with fiscal_year_months");
15681        assert_eq!(config.global.fiscal_year_months, Some(12));
15682        assert!(config.session.enabled);
15683        assert_eq!(
15684            config.session.checkpoint_path,
15685            Some("/tmp/checkpoints".to_string())
15686        );
15687        assert!(config.session.per_period_output);
15688        assert!(!config.session.consolidated_output);
15689    }
15690
15691    // -----------------------------------------------------------------------
15692    // SP3 — IndustryProfileField / IndustryPriorsConfig tests
15693    // -----------------------------------------------------------------------
15694
15695    #[test]
15696    fn industry_profile_legacy_string_form_parses() {
15697        // Legacy YAML: bare enum variant name.  Must round-trip without changes
15698        // to existing config files.
15699        let yaml = r#"
15700enabled: true
15701industry_profile: retail
15702"#;
15703        let cfg: AdvancedDistributionConfig =
15704            serde_yaml::from_str(yaml).expect("parse legacy industry_profile string");
15705        let profile = cfg.industry_profile.expect("Some");
15706        assert_eq!(profile.profile_type(), IndustryProfileType::Retail);
15707        assert!(profile.priors().is_none());
15708    }
15709
15710    #[test]
15711    fn industry_profile_full_form_with_priors_parses() {
15712        let yaml = r#"
15713enabled: true
15714industry_profile:
15715  name: healthcare
15716  priors:
15717    enabled: true
15718    source: bundled
15719"#;
15720        let cfg: AdvancedDistributionConfig =
15721            serde_yaml::from_str(yaml).expect("parse full industry_profile struct");
15722        let profile = cfg.industry_profile.expect("Some");
15723        assert_eq!(profile.profile_type(), IndustryProfileType::Healthcare);
15724        let priors = profile.priors().expect("priors present");
15725        assert!(priors.enabled);
15726        assert_eq!(priors.source, PriorsSource::Bundled);
15727        assert!(priors.path.is_none());
15728    }
15729
15730    #[test]
15731    fn industry_profile_full_form_without_priors_parses() {
15732        // Struct form with only `name` and no priors block.
15733        let yaml = r#"
15734enabled: true
15735industry_profile:
15736  name: manufacturing
15737"#;
15738        let cfg: AdvancedDistributionConfig =
15739            serde_yaml::from_str(yaml).expect("parse struct without priors");
15740        let profile = cfg.industry_profile.expect("Some");
15741        assert_eq!(profile.profile_type(), IndustryProfileType::Manufacturing);
15742        assert!(profile.priors().is_none());
15743    }
15744
15745    #[test]
15746    fn industry_profile_priors_file_without_path_fails_validation() {
15747        use crate::validation::validate_config;
15748
15749        // Minimal valid config plumbing.
15750        let yaml = r#"
15751global:
15752  seed: 42
15753  start_date: "2024-01-01"
15754  period_months: 1
15755  industry: retail
15756companies:
15757  - code: C001
15758    name: Test Corp
15759    currency: USD
15760    country: US
15761    annual_transaction_volume: ten_k
15762chart_of_accounts:
15763  complexity: small
15764output:
15765  output_directory: ./output
15766distributions:
15767  enabled: true
15768  industry_profile:
15769    name: retail
15770    priors:
15771      enabled: true
15772      source: file
15773"#;
15774        let cfg: GeneratorConfig = serde_yaml::from_str(yaml).expect("serde parse should succeed");
15775        let err = validate_config(&cfg).expect_err("path required when source=file");
15776        let msg = err.to_string();
15777        assert!(
15778            msg.contains("path") || msg.contains("required"),
15779            "unexpected error message: {msg}"
15780        );
15781    }
15782
15783    #[test]
15784    fn industry_profile_priors_file_with_path_passes_validation() {
15785        use crate::validation::validate_config;
15786
15787        let yaml = r#"
15788global:
15789  seed: 42
15790  start_date: "2024-01-01"
15791  period_months: 1
15792  industry: retail
15793companies:
15794  - code: C001
15795    name: Test Corp
15796    currency: USD
15797    country: US
15798    annual_transaction_volume: ten_k
15799chart_of_accounts:
15800  complexity: small
15801output:
15802  output_directory: ./output
15803distributions:
15804  enabled: true
15805  industry_profile:
15806    name: retail
15807    priors:
15808      enabled: true
15809      source: file
15810      path: /tmp/priors.json
15811"#;
15812        let cfg: GeneratorConfig = serde_yaml::from_str(yaml).expect("serde parse should succeed");
15813        validate_config(&cfg).expect("validation should pass with path supplied");
15814    }
15815}