Skip to main content

datasynth_config/
schema.rs

1//! Configuration schema for synthetic data generation.
2
3use datasynth_core::distributions::{
4    AmountDistributionConfig, DebitCreditDistributionConfig, EvenOddDistributionConfig,
5    LineItemDistributionConfig, SeasonalityConfig,
6};
7use datasynth_core::models::{CoAComplexity, ExpectationDriver, IndustrySector};
8use serde::{Deserialize, Serialize};
9use std::path::PathBuf;
10
11/// Root configuration for the synthetic data generator.
12///
13/// # camelCase alias policy
14///
15/// Every multi-word field carries `#[serde(alias = "camelCaseName")]`
16/// so SDK clients that follow JSON conventions can submit configs
17/// without round-tripping through a snake_case transformer.
18///
19/// Before v4.4.1 several fields — `documentFlows`, `accountingStandards`,
20/// `complianceRegulations`, `analyticsMetadata` — had no alias, so SDK
21/// submissions silently fell through to defaults. The symptom was
22/// "enabling the 6 feature subsections together collapses the archive
23/// from 99 files to 19". Root cause: those four fields never parsed;
24/// the orchestrator produced far less data than requested, and
25/// `output.exportFormat` similarly fell through so journal_entries
26/// landed as the default Parquet/CSV rather than JSON.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct GeneratorConfig {
29    /// Global settings
30    pub global: GlobalConfig,
31    /// Company configuration
32    pub companies: Vec<CompanyConfig>,
33    /// Chart of Accounts configuration
34    #[serde(alias = "chartOfAccounts")]
35    pub chart_of_accounts: ChartOfAccountsConfig,
36    /// Transaction generation settings
37    #[serde(default)]
38    pub transactions: TransactionConfig,
39    /// Output configuration
40    pub output: OutputConfig,
41    /// Fraud simulation settings
42    #[serde(default)]
43    pub fraud: FraudConfig,
44    /// Data quality variation settings
45    #[serde(default, alias = "dataQuality")]
46    pub data_quality: DataQualitySchemaConfig,
47    /// Internal Controls System settings
48    #[serde(default, alias = "internalControls")]
49    pub internal_controls: InternalControlsConfig,
50    /// Business process mix
51    #[serde(default, alias = "businessProcesses")]
52    pub business_processes: BusinessProcessConfig,
53    /// User persona distribution
54    #[serde(default, alias = "userPersonas")]
55    pub user_personas: UserPersonaConfig,
56    /// Template configuration for realistic data
57    #[serde(default)]
58    pub templates: TemplateConfig,
59    /// Approval workflow configuration
60    #[serde(default)]
61    pub approval: ApprovalConfig,
62    /// Department structure configuration
63    #[serde(default)]
64    pub departments: DepartmentConfig,
65    /// Master data generation settings
66    #[serde(default, alias = "masterData")]
67    pub master_data: MasterDataConfig,
68    /// Document flow generation settings
69    #[serde(default, alias = "documentFlows")]
70    pub document_flows: DocumentFlowConfig,
71    /// Intercompany transaction settings
72    #[serde(default)]
73    pub intercompany: IntercompanyConfig,
74    /// Balance and trial balance settings
75    #[serde(default)]
76    pub balance: BalanceConfig,
77    /// OCPM (Object-Centric Process Mining) settings
78    #[serde(default)]
79    pub ocpm: OcpmConfig,
80    /// Audit engagement and workpaper generation settings
81    #[serde(default)]
82    pub audit: AuditGenerationConfig,
83    /// Banking KYC/AML transaction generation settings
84    #[serde(default)]
85    pub banking: datasynth_banking::BankingConfig,
86    /// Scenario configuration for metadata and tagging (Phase 1.3)
87    #[serde(default)]
88    pub scenario: ScenarioConfig,
89    /// Temporal drift configuration for simulating distribution changes over time (Phase 2.2)
90    #[serde(default)]
91    pub temporal: TemporalDriftConfig,
92    /// Graph export configuration for accounting network export
93    #[serde(default, alias = "graphExport")]
94    pub graph_export: GraphExportConfig,
95    /// Streaming output API configuration
96    #[serde(default)]
97    pub streaming: StreamingSchemaConfig,
98    /// Rate limiting configuration
99    #[serde(default, alias = "rateLimit")]
100    pub rate_limit: RateLimitSchemaConfig,
101    /// Temporal attribute generation configuration
102    #[serde(default, alias = "temporalAttributes")]
103    pub temporal_attributes: TemporalAttributeSchemaConfig,
104    /// Relationship generation configuration
105    #[serde(default)]
106    pub relationships: RelationshipSchemaConfig,
107    /// Accounting standards framework configuration (IFRS, US GAAP)
108    #[serde(default, alias = "accountingStandards")]
109    pub accounting_standards: AccountingStandardsConfig,
110    /// Audit standards framework configuration (ISA, PCAOB)
111    #[serde(default, alias = "auditStandards")]
112    pub audit_standards: AuditStandardsConfig,
113    /// Advanced distribution configuration (mixture models, correlations, regime changes)
114    #[serde(default)]
115    pub distributions: AdvancedDistributionConfig,
116    /// Temporal patterns configuration (business days, period-end dynamics, processing lags)
117    #[serde(default, alias = "temporalPatterns")]
118    pub temporal_patterns: TemporalPatternsConfig,
119    /// Vendor network configuration (multi-tier supply chain modeling)
120    #[serde(default, alias = "vendorNetwork")]
121    pub vendor_network: VendorNetworkSchemaConfig,
122    /// Customer segmentation configuration (value segments, lifecycle stages)
123    #[serde(default, alias = "customerSegmentation")]
124    pub customer_segmentation: CustomerSegmentationSchemaConfig,
125    /// Relationship strength calculation configuration
126    #[serde(default, alias = "relationshipStrength")]
127    pub relationship_strength: RelationshipStrengthSchemaConfig,
128    /// Cross-process link configuration (P2P ↔ O2C via inventory)
129    #[serde(default, alias = "crossProcessLinks")]
130    pub cross_process_links: CrossProcessLinksSchemaConfig,
131    /// Organizational events configuration (acquisitions, divestitures, etc.)
132    #[serde(default, alias = "organizationalEvents")]
133    pub organizational_events: OrganizationalEventsSchemaConfig,
134    /// Behavioral drift configuration (vendor, customer, employee behavior)
135    #[serde(default, alias = "behavioralDrift")]
136    pub behavioral_drift: BehavioralDriftSchemaConfig,
137    /// Market drift configuration (economic cycles, commodities, price shocks)
138    #[serde(default, alias = "marketDrift")]
139    pub market_drift: MarketDriftSchemaConfig,
140    /// Drift labeling configuration for ground truth generation
141    #[serde(default, alias = "driftLabeling")]
142    pub drift_labeling: DriftLabelingSchemaConfig,
143    /// Enhanced anomaly injection configuration (multi-stage schemes, correlated injection, near-miss)
144    #[serde(default, alias = "anomalyInjection")]
145    pub anomaly_injection: EnhancedAnomalyConfig,
146    /// Industry-specific transaction and anomaly generation configuration
147    #[serde(default, alias = "industrySpecific")]
148    pub industry_specific: IndustrySpecificConfig,
149    /// Fingerprint privacy configuration for extraction/synthesis
150    #[serde(default, alias = "fingerprintPrivacy")]
151    pub fingerprint_privacy: FingerprintPrivacyConfig,
152    /// Quality gate configuration for pass/fail thresholds
153    #[serde(default, alias = "qualityGates")]
154    pub quality_gates: QualityGatesSchemaConfig,
155    /// Compliance configuration (EU AI Act, content marking)
156    #[serde(default)]
157    pub compliance: ComplianceSchemaConfig,
158    /// Webhook notification configuration
159    #[serde(default)]
160    pub webhooks: WebhookSchemaConfig,
161    /// LLM enrichment configuration (AI-augmented vendor names, descriptions, explanations)
162    #[serde(default)]
163    pub llm: LlmSchemaConfig,
164    /// Diffusion model configuration (statistical diffusion-based data enhancement)
165    #[serde(default)]
166    pub diffusion: DiffusionSchemaConfig,
167    /// Causal generation configuration (structural causal models, interventions)
168    #[serde(default)]
169    pub causal: CausalSchemaConfig,
170
171    // ===== Enterprise Process Chain Extensions =====
172    /// Source-to-Pay (S2C/S2P) configuration (sourcing, contracts, catalogs, scorecards)
173    #[serde(default, alias = "sourceToPay")]
174    pub source_to_pay: SourceToPayConfig,
175    /// Financial reporting configuration (financial statements, KPIs, budgets)
176    #[serde(default, alias = "financialReporting")]
177    pub financial_reporting: FinancialReportingConfig,
178    /// HR process configuration (payroll, time & attendance, expenses)
179    #[serde(default)]
180    pub hr: HrConfig,
181    /// Manufacturing configuration (production orders, WIP, routing)
182    #[serde(default)]
183    pub manufacturing: ManufacturingProcessConfig,
184    /// Sales quote configuration (quote-to-order pipeline)
185    #[serde(default, alias = "salesQuotes")]
186    pub sales_quotes: SalesQuoteConfig,
187    /// Tax accounting configuration (VAT/GST, sales tax, withholding, provisions, payroll tax)
188    #[serde(default)]
189    pub tax: TaxConfig,
190    /// Treasury and cash management configuration
191    #[serde(default)]
192    pub treasury: TreasuryConfig,
193    /// Project accounting configuration
194    #[serde(default, alias = "projectAccounting")]
195    pub project_accounting: ProjectAccountingConfig,
196    /// ESG / Sustainability reporting configuration
197    #[serde(default)]
198    pub esg: EsgConfig,
199    /// Country pack configuration (external packs directory, per-country overrides)
200    #[serde(default, alias = "countryPacks")]
201    pub country_packs: Option<CountryPacksSchemaConfig>,
202    /// Counterfactual simulation scenario configuration
203    #[serde(default)]
204    pub scenarios: ScenariosConfig,
205    /// Generation session configuration (period-by-period generation with balance carry-forward)
206    #[serde(default)]
207    pub session: SessionSchemaConfig,
208    /// Compliance regulations framework configuration (standards registry, jurisdictions, temporal versioning, audit templates, graph integration)
209    #[serde(default, alias = "complianceRegulations")]
210    pub compliance_regulations: ComplianceRegulationsConfig,
211    /// v3.3.0: analytics metadata phase — prior-year comparatives,
212    /// industry benchmarks, management reports, drift events. Off by
213    /// default so v3.2.1 archives are byte-identical.
214    #[serde(default, alias = "analyticsMetadata")]
215    pub analytics_metadata: AnalyticsMetadataConfig,
216    /// Phase 1 of the central concentration abstraction (#143). Post-generation
217    /// passes over the JE batch that reshape distributional structure toward a
218    /// corpus-derived target. Off by default — see
219    /// `docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md`.
220    #[serde(default)]
221    pub concentration: ConcentrationConfig,
222}
223
224/// v3.3.0: analytics-metadata phase configuration.
225///
226/// Gates the `phase_analytics_metadata` pass that runs AFTER all
227/// JE-adding phases (including the fraud-bias sweep at Phase 20b).
228/// When enabled, the orchestrator calls `PriorYearGenerator`,
229/// `IndustryBenchmarkGenerator`, `ManagementReportGenerator`, and
230/// `DriftEventGenerator` in sequence; each sub-flag below controls
231/// whether that specific generator fires.
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct AnalyticsMetadataConfig {
234    /// Master switch for the whole analytics phase.
235    #[serde(default)]
236    pub enabled: bool,
237    /// Emit `PriorYearComparative` records derived from current
238    /// period's account balances.
239    #[serde(default = "default_true")]
240    pub prior_year: bool,
241    /// Emit `IndustryBenchmark` records for the configured industry.
242    #[serde(default = "default_true")]
243    pub industry_benchmark: bool,
244    /// Emit management-report artefacts.
245    #[serde(default = "default_true")]
246    pub management_reports: bool,
247    /// Emit `LabeledDriftEvent` records — post-generation sweep over
248    /// journal entries to label detected drift patterns.
249    #[serde(default = "default_true")]
250    pub drift_events: bool,
251}
252
253impl Default for AnalyticsMetadataConfig {
254    fn default() -> Self {
255        Self {
256            enabled: false,
257            prior_year: true,
258            industry_benchmark: true,
259            management_reports: true,
260            drift_events: true,
261        }
262    }
263}
264
265/// LLM enrichment configuration.
266///
267/// Controls AI-augmented metadata enrichment using LLM providers.
268/// When enabled, vendor names, transaction descriptions, and anomaly explanations
269/// are enriched using the configured provider (mock by default).
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct LlmSchemaConfig {
272    /// Whether LLM enrichment is enabled.
273    #[serde(default)]
274    pub enabled: bool,
275    /// Provider type: "mock", "openai", "anthropic", "custom".
276    #[serde(default = "default_llm_provider")]
277    pub provider: String,
278    /// Model name/ID for the provider.
279    #[serde(default = "default_llm_model_name")]
280    pub model: String,
281    /// Maximum number of vendor names to enrich per run.
282    #[serde(default = "default_llm_batch_size")]
283    pub max_vendor_enrichments: usize,
284
285    /// v4.1.1+: also enrich customer names at generate time.
286    /// Default `false` preserves v4.1.0 behaviour.
287    #[serde(default)]
288    pub enrich_customers: bool,
289
290    /// v4.1.1+: also enrich material descriptions at generate time.
291    /// Default `false`.
292    #[serde(default)]
293    pub enrich_materials: bool,
294
295    /// v4.1.1+: also enrich audit finding titles at generate time
296    /// (the finding narratives remain on their existing template path
297    /// because they're richer and locale-specific). Default `false`.
298    #[serde(default)]
299    pub enrich_findings: bool,
300
301    /// v4.1.1+: upper bound on customer enrichments per run. Matches
302    /// `max_vendor_enrichments` semantics.
303    #[serde(default = "default_llm_batch_size")]
304    pub max_customer_enrichments: usize,
305
306    /// v4.1.1+: upper bound on material enrichments per run.
307    #[serde(default = "default_llm_batch_size")]
308    pub max_material_enrichments: usize,
309
310    /// v4.1.1+: upper bound on finding enrichments per run.
311    #[serde(default = "default_llm_batch_size")]
312    pub max_finding_enrichments: usize,
313}
314
315fn default_llm_provider() -> String {
316    "mock".to_string()
317}
318
319fn default_llm_model_name() -> String {
320    "gpt-4o-mini".to_string()
321}
322
323fn default_llm_batch_size() -> usize {
324    50
325}
326
327impl Default for LlmSchemaConfig {
328    fn default() -> Self {
329        Self {
330            enabled: false,
331            provider: default_llm_provider(),
332            model: default_llm_model_name(),
333            max_vendor_enrichments: default_llm_batch_size(),
334            enrich_customers: false,
335            enrich_materials: false,
336            enrich_findings: false,
337            max_customer_enrichments: default_llm_batch_size(),
338            max_material_enrichments: default_llm_batch_size(),
339            max_finding_enrichments: default_llm_batch_size(),
340        }
341    }
342}
343
344/// Diffusion model configuration.
345///
346/// Controls statistical diffusion-based data enhancement that generates samples
347/// matching target distribution properties (means, standard deviations, correlations).
348#[derive(Debug, Clone, Serialize, Deserialize)]
349pub struct DiffusionSchemaConfig {
350    /// Whether diffusion enhancement is enabled.
351    #[serde(default)]
352    pub enabled: bool,
353    /// Number of diffusion steps (higher = better quality, slower).
354    #[serde(default = "default_diffusion_steps")]
355    pub n_steps: usize,
356    /// Noise schedule type: "linear", "cosine", "sigmoid".
357    #[serde(default = "default_diffusion_schedule")]
358    pub schedule: String,
359    /// Number of sample rows to generate for demonstration.
360    #[serde(default = "default_diffusion_sample_size")]
361    pub sample_size: usize,
362    /// Backend type: "statistical" (default), "neural", "hybrid".
363    #[serde(default = "default_diffusion_backend")]
364    pub backend: String,
365    /// Neural diffusion backend configuration (used when backend is "neural" or "hybrid").
366    #[serde(default)]
367    pub neural: NeuralDiffusionSchemaConfig,
368}
369
370fn default_diffusion_steps() -> usize {
371    100
372}
373
374fn default_diffusion_schedule() -> String {
375    "linear".to_string()
376}
377
378fn default_diffusion_sample_size() -> usize {
379    100
380}
381
382fn default_diffusion_backend() -> String {
383    "statistical".to_string()
384}
385
386impl Default for DiffusionSchemaConfig {
387    fn default() -> Self {
388        Self {
389            enabled: false,
390            n_steps: default_diffusion_steps(),
391            schedule: default_diffusion_schedule(),
392            sample_size: default_diffusion_sample_size(),
393            backend: default_diffusion_backend(),
394            neural: NeuralDiffusionSchemaConfig::default(),
395        }
396    }
397}
398
399/// Neural diffusion backend configuration.
400///
401/// Controls the `candle`-based neural score network that learns joint distributions
402/// from training data for the neural and hybrid diffusion backends.
403#[derive(Debug, Clone, Serialize, Deserialize)]
404pub struct NeuralDiffusionSchemaConfig {
405    /// Hidden layer dimensions for the score network MLP.
406    #[serde(default = "default_neural_hidden_dims")]
407    pub hidden_dims: Vec<usize>,
408    /// Dimensionality of the timestep embedding.
409    #[serde(default = "default_neural_timestep_embed_dim")]
410    pub timestep_embed_dim: usize,
411    /// Learning rate for training.
412    #[serde(default = "default_neural_learning_rate")]
413    pub learning_rate: f64,
414    /// Number of training epochs.
415    #[serde(default = "default_neural_training_epochs")]
416    pub training_epochs: usize,
417    /// Training batch size.
418    #[serde(default = "default_neural_batch_size")]
419    pub batch_size: usize,
420    /// Blend weight for hybrid mode (0.0 = all statistical, 1.0 = all neural).
421    #[serde(default = "default_neural_hybrid_weight")]
422    pub hybrid_weight: f64,
423    /// Hybrid blending strategy: "weighted_average", "column_select", "threshold".
424    #[serde(default = "default_neural_hybrid_strategy")]
425    pub hybrid_strategy: String,
426    /// Columns to apply neural generation to (empty = all numeric columns).
427    #[serde(default)]
428    pub neural_columns: Vec<String>,
429    /// v4.4.0+ Optional path to a pre-trained score-network checkpoint
430    /// (`.safetensors`). When set, the orchestrator loads the
431    /// checkpoint instead of training from the first batch — useful
432    /// for long-running production deployments where training cost
433    /// dominates per-run cost. When empty, the orchestrator trains
434    /// on the first generated JE amounts.
435    #[serde(default, skip_serializing_if = "Option::is_none")]
436    pub checkpoint_path: Option<String>,
437}
438
439fn default_neural_hidden_dims() -> Vec<usize> {
440    vec![256, 256, 128]
441}
442
443fn default_neural_timestep_embed_dim() -> usize {
444    64
445}
446
447fn default_neural_learning_rate() -> f64 {
448    0.001
449}
450
451fn default_neural_training_epochs() -> usize {
452    100
453}
454
455fn default_neural_batch_size() -> usize {
456    64
457}
458
459fn default_neural_hybrid_weight() -> f64 {
460    0.5
461}
462
463fn default_neural_hybrid_strategy() -> String {
464    "weighted_average".to_string()
465}
466
467impl Default for NeuralDiffusionSchemaConfig {
468    fn default() -> Self {
469        Self {
470            hidden_dims: default_neural_hidden_dims(),
471            timestep_embed_dim: default_neural_timestep_embed_dim(),
472            learning_rate: default_neural_learning_rate(),
473            training_epochs: default_neural_training_epochs(),
474            batch_size: default_neural_batch_size(),
475            hybrid_weight: default_neural_hybrid_weight(),
476            hybrid_strategy: default_neural_hybrid_strategy(),
477            neural_columns: Vec::new(),
478            checkpoint_path: None,
479        }
480    }
481}
482
483/// Causal generation configuration.
484///
485/// Controls structural causal model (SCM) based data generation that respects
486/// causal relationships between variables, supports do-calculus interventions,
487/// and enables counterfactual scenarios.
488#[derive(Debug, Clone, Serialize, Deserialize)]
489pub struct CausalSchemaConfig {
490    /// Whether causal generation is enabled.
491    #[serde(default)]
492    pub enabled: bool,
493    /// Built-in template to use: "fraud_detection", "revenue_cycle", or "custom".
494    #[serde(default = "default_causal_template")]
495    pub template: String,
496    /// Number of causal samples to generate.
497    #[serde(default = "default_causal_sample_size")]
498    pub sample_size: usize,
499    /// Whether to run causal validation on the output.
500    #[serde(default = "default_true")]
501    pub validate: bool,
502}
503
504fn default_causal_template() -> String {
505    "fraud_detection".to_string()
506}
507
508fn default_causal_sample_size() -> usize {
509    500
510}
511
512impl Default for CausalSchemaConfig {
513    fn default() -> Self {
514        Self {
515            enabled: false,
516            template: default_causal_template(),
517            sample_size: default_causal_sample_size(),
518            validate: true,
519        }
520    }
521}
522
523/// Graph export configuration for accounting network and ML training exports.
524///
525/// This section enables exporting generated data as graphs for:
526/// - Network reconstruction algorithms
527/// - Graph neural network training
528/// - Neo4j graph database import
529#[derive(Debug, Clone, Serialize, Deserialize)]
530pub struct GraphExportConfig {
531    /// Enable graph export.
532    #[serde(default)]
533    pub enabled: bool,
534
535    /// Graph types to generate.
536    #[serde(default = "default_graph_types")]
537    pub graph_types: Vec<GraphTypeConfig>,
538
539    /// Export formats to generate.
540    #[serde(default = "default_graph_formats")]
541    pub formats: Vec<GraphExportFormat>,
542
543    /// Train split ratio for ML datasets.
544    #[serde(default = "default_train_ratio")]
545    pub train_ratio: f64,
546
547    /// Validation split ratio for ML datasets.
548    #[serde(default = "default_val_ratio")]
549    pub validation_ratio: f64,
550
551    /// Random seed for train/val/test splits.
552    #[serde(default)]
553    pub split_seed: Option<u64>,
554
555    /// Output subdirectory for graph exports (relative to output directory).
556    #[serde(default = "default_graph_subdir")]
557    pub output_subdirectory: String,
558
559    /// Multi-layer hypergraph export settings for RustGraph integration.
560    #[serde(default)]
561    pub hypergraph: HypergraphExportSettings,
562
563    /// DGL-specific export settings.
564    #[serde(default)]
565    pub dgl: DglExportConfig,
566
567    /// `graphs/je_network.csv` flat edge-list export settings (v5.8.0+).
568    #[serde(default)]
569    pub je_network: JeNetworkConfig,
570}
571
572/// Method used to construct edges from journal entries when writing
573/// `graphs/je_network.csv` (v5.8.0+).
574///
575/// Reference: Ivertowski (2024), *Hardware Accelerated Method for
576/// Accounting Network Generation*, Methods A through E.
577#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
578#[serde(rename_all = "snake_case")]
579pub enum JeNetworkMethod {
580    /// Method B (full Cartesian product) for every JE — bijective on
581    /// 2-line entries (Method A) and `n × m` Cartesian for multi-line
582    /// entries with proportional amount allocation.  Produces
583    /// O(n × m) edges per JE — a 50-debit / 50-credit period-close
584    /// consolidation alone yields 2 500 edges, and a typical
585    /// HF-scale 1 M-line config can blow up to 200 M+ edges (and tens
586    /// of GB of memory). Use explicitly when downstream consumers
587    /// already depend on the Cartesian shape.
588    Cartesian,
589    /// Method A only — emit a single edge per 2-line journal entry
590    /// (1 debit + 1 credit) and skip multi-line entries entirely.
591    /// Edge count = number of 2-line JEs (≈ 60 % of entries per the
592    /// 2024 paper); per-edge confidence is exactly `1.0`.
593    ///
594    /// **Default since v5.27** (previously `Cartesian`). The Cartesian
595    /// default OOM'd small-complexity CLI smoke tests on 14-16 GB CI
596    /// runners — a 50 × 50 period-close JE alone wanted 20 GB of edge
597    /// memory. Method A is the bounded, exactness-preserving fallback
598    /// recommended for published reference datasets where size and
599    /// exactness matter more than recall on multi-line consolidations.
600    /// Set `je_network.method: cartesian` explicitly to restore the
601    /// pre-v5.27 behaviour.
602    #[default]
603    A,
604}
605
606/// Configuration for the `graphs/je_network.csv` flat edge-list
607/// export (v5.8.0+).
608#[derive(Debug, Clone, Default, Serialize, Deserialize)]
609#[serde(deny_unknown_fields)]
610pub struct JeNetworkConfig {
611    /// Edge-construction method (see [`JeNetworkMethod`]).
612    #[serde(default)]
613    pub method: JeNetworkMethod,
614}
615
616fn default_graph_types() -> Vec<GraphTypeConfig> {
617    vec![GraphTypeConfig::default()]
618}
619
620fn default_graph_formats() -> Vec<GraphExportFormat> {
621    vec![GraphExportFormat::PytorchGeometric]
622}
623
624fn default_train_ratio() -> f64 {
625    0.7
626}
627
628fn default_val_ratio() -> f64 {
629    0.15
630}
631
632fn default_graph_subdir() -> String {
633    "graphs".to_string()
634}
635
636impl Default for GraphExportConfig {
637    fn default() -> Self {
638        Self {
639            enabled: false,
640            graph_types: default_graph_types(),
641            formats: default_graph_formats(),
642            train_ratio: 0.7,
643            validation_ratio: 0.15,
644            split_seed: None,
645            output_subdirectory: "graphs".to_string(),
646            hypergraph: HypergraphExportSettings::default(),
647            dgl: DglExportConfig::default(),
648            je_network: JeNetworkConfig::default(),
649        }
650    }
651}
652
653/// DGL-specific export settings.
654#[derive(Debug, Clone, Default, Serialize, Deserialize)]
655pub struct DglExportConfig {
656    /// Export as a heterogeneous graph (distinct node/edge types).
657    ///
658    /// When `true` the DGL exporter produces a `HeteroData` object with typed
659    /// node and edge stores rather than a single homogeneous graph.
660    /// Set to `true` in `graph_export.dgl.heterogeneous: true` in YAML.
661    #[serde(default)]
662    pub heterogeneous: bool,
663}
664
665// Default derived: heterogeneous = false (bool default)
666
667/// Settings for the multi-layer hypergraph export (RustGraph integration).
668///
669/// Produces a 3-layer hypergraph:
670/// - Layer 1: Governance & Controls (COSO, SOX, internal controls, organizational)
671/// - Layer 2: Process Events (P2P/O2C document flows, OCPM events)
672/// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
673#[derive(Debug, Clone, Serialize, Deserialize)]
674pub struct HypergraphExportSettings {
675    /// Enable hypergraph export.
676    #[serde(default)]
677    pub enabled: bool,
678
679    /// Maximum total nodes across all layers (default 50000).
680    #[serde(default = "default_hypergraph_max_nodes")]
681    pub max_nodes: usize,
682
683    /// Aggregation strategy when node budget is exceeded.
684    #[serde(default = "default_aggregation_strategy")]
685    pub aggregation_strategy: String,
686
687    /// Layer 1 (Governance & Controls) settings.
688    #[serde(default)]
689    pub governance_layer: GovernanceLayerSettings,
690
691    /// Layer 2 (Process Events) settings.
692    #[serde(default)]
693    pub process_layer: ProcessLayerSettings,
694
695    /// Layer 3 (Accounting Network) settings.
696    #[serde(default)]
697    pub accounting_layer: AccountingLayerSettings,
698
699    /// Cross-layer edge generation settings.
700    #[serde(default)]
701    pub cross_layer: CrossLayerSettings,
702
703    /// Output subdirectory for hypergraph files (relative to graph output directory).
704    #[serde(default = "default_hypergraph_subdir")]
705    pub output_subdirectory: String,
706
707    /// Output format: "native" (default) for internal field names, "unified" for RustGraph format.
708    #[serde(default = "default_hypergraph_format")]
709    pub output_format: String,
710
711    /// Optional URL for streaming unified JSONL to a RustGraph ingest endpoint.
712    #[serde(default)]
713    pub stream_target: Option<String>,
714
715    /// Batch size for streaming (number of JSONL lines per HTTP POST). Default: 1000.
716    #[serde(default = "default_stream_batch_size")]
717    pub stream_batch_size: usize,
718}
719
720fn default_hypergraph_max_nodes() -> usize {
721    50_000
722}
723
724fn default_aggregation_strategy() -> String {
725    "pool_by_counterparty".to_string()
726}
727
728fn default_hypergraph_subdir() -> String {
729    "hypergraph".to_string()
730}
731
732fn default_hypergraph_format() -> String {
733    "native".to_string()
734}
735
736fn default_stream_batch_size() -> usize {
737    1000
738}
739
740impl Default for HypergraphExportSettings {
741    fn default() -> Self {
742        Self {
743            enabled: false,
744            max_nodes: 50_000,
745            aggregation_strategy: "pool_by_counterparty".to_string(),
746            governance_layer: GovernanceLayerSettings::default(),
747            process_layer: ProcessLayerSettings::default(),
748            accounting_layer: AccountingLayerSettings::default(),
749            cross_layer: CrossLayerSettings::default(),
750            output_subdirectory: "hypergraph".to_string(),
751            output_format: "native".to_string(),
752            stream_target: None,
753            stream_batch_size: 1000,
754        }
755    }
756}
757
758/// Layer 1: Governance & Controls layer settings.
759#[derive(Debug, Clone, Serialize, Deserialize)]
760pub struct GovernanceLayerSettings {
761    /// Include COSO framework nodes (5 components + 17 principles).
762    #[serde(default = "default_true")]
763    pub include_coso: bool,
764    /// Include internal control nodes.
765    #[serde(default = "default_true")]
766    pub include_controls: bool,
767    /// Include SOX assertion nodes.
768    #[serde(default = "default_true")]
769    pub include_sox: bool,
770    /// Include vendor master data nodes.
771    #[serde(default = "default_true")]
772    pub include_vendors: bool,
773    /// Include customer master data nodes.
774    #[serde(default = "default_true")]
775    pub include_customers: bool,
776    /// Include employee/organizational nodes.
777    #[serde(default = "default_true")]
778    pub include_employees: bool,
779}
780
781impl Default for GovernanceLayerSettings {
782    fn default() -> Self {
783        Self {
784            include_coso: true,
785            include_controls: true,
786            include_sox: true,
787            include_vendors: true,
788            include_customers: true,
789            include_employees: true,
790        }
791    }
792}
793
794/// Layer 2: Process Events layer settings.
795#[derive(Debug, Clone, Serialize, Deserialize)]
796pub struct ProcessLayerSettings {
797    /// Include P2P (Procure-to-Pay) document flow nodes.
798    #[serde(default = "default_true")]
799    pub include_p2p: bool,
800    /// Include O2C (Order-to-Cash) document flow nodes.
801    #[serde(default = "default_true")]
802    pub include_o2c: bool,
803    /// Include S2C (Source-to-Contract) document flow nodes.
804    #[serde(default = "default_true")]
805    pub include_s2c: bool,
806    /// Include H2R (Hire-to-Retire) document flow nodes.
807    #[serde(default = "default_true")]
808    pub include_h2r: bool,
809    /// Include MFG (Manufacturing) document flow nodes.
810    #[serde(default = "default_true")]
811    pub include_mfg: bool,
812    /// Include BANK (Banking) document flow nodes.
813    #[serde(default = "default_true")]
814    pub include_bank: bool,
815    /// Include AUDIT document flow nodes.
816    #[serde(default = "default_true")]
817    pub include_audit: bool,
818    /// Include R2R (Record-to-Report) document flow nodes (bank recon + period close).
819    #[serde(default = "default_true")]
820    pub include_r2r: bool,
821    /// Export OCPM events as hyperedges.
822    #[serde(default = "default_true")]
823    pub events_as_hyperedges: bool,
824    /// Threshold: if a counterparty has more documents than this, aggregate into pool nodes.
825    #[serde(default = "default_docs_per_counterparty_threshold")]
826    pub docs_per_counterparty_threshold: usize,
827}
828
829fn default_docs_per_counterparty_threshold() -> usize {
830    20
831}
832
833impl Default for ProcessLayerSettings {
834    fn default() -> Self {
835        Self {
836            include_p2p: true,
837            include_o2c: true,
838            include_s2c: true,
839            include_h2r: true,
840            include_mfg: true,
841            include_bank: true,
842            include_audit: true,
843            include_r2r: true,
844            events_as_hyperedges: true,
845            docs_per_counterparty_threshold: 20,
846        }
847    }
848}
849
850/// Layer 3: Accounting Network layer settings.
851#[derive(Debug, Clone, Serialize, Deserialize)]
852pub struct AccountingLayerSettings {
853    /// Include GL account nodes.
854    #[serde(default = "default_true")]
855    pub include_accounts: bool,
856    /// Export journal entries as hyperedges (debit+credit accounts as participants).
857    #[serde(default = "default_true")]
858    pub je_as_hyperedges: bool,
859}
860
861impl Default for AccountingLayerSettings {
862    fn default() -> Self {
863        Self {
864            include_accounts: true,
865            je_as_hyperedges: true,
866        }
867    }
868}
869
870/// Cross-layer edge generation settings.
871#[derive(Debug, Clone, Serialize, Deserialize)]
872pub struct CrossLayerSettings {
873    /// Generate cross-layer edges (Control→Account, Vendor→PO, etc.).
874    #[serde(default = "default_true")]
875    pub enabled: bool,
876}
877
878impl Default for CrossLayerSettings {
879    fn default() -> Self {
880        Self { enabled: true }
881    }
882}
883
884/// Configuration for a specific graph type to export.
885#[derive(Debug, Clone, Serialize, Deserialize)]
886pub struct GraphTypeConfig {
887    /// Name identifier for this graph configuration.
888    #[serde(default = "default_graph_name")]
889    pub name: String,
890
891    /// Whether to aggregate parallel edges between the same nodes.
892    #[serde(default)]
893    pub aggregate_edges: bool,
894
895    /// Minimum edge weight to include (filters out small transactions).
896    #[serde(default)]
897    pub min_edge_weight: f64,
898
899    /// Whether to include document nodes (creates hub-and-spoke structure).
900    #[serde(default)]
901    pub include_document_nodes: bool,
902}
903
904fn default_graph_name() -> String {
905    "accounting_network".to_string()
906}
907
908impl Default for GraphTypeConfig {
909    fn default() -> Self {
910        Self {
911            name: "accounting_network".to_string(),
912            aggregate_edges: false,
913            min_edge_weight: 0.0,
914            include_document_nodes: false,
915        }
916    }
917}
918
919/// Export format for graph data.
920#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
921#[serde(rename_all = "snake_case")]
922pub enum GraphExportFormat {
923    /// PyTorch Geometric format (.npy files + metadata.json).
924    PytorchGeometric,
925    /// Neo4j format (CSV files + Cypher import scripts).
926    Neo4j,
927    /// Deep Graph Library format.
928    Dgl,
929    /// RustGraph/RustAssureTwin JSON format.
930    RustGraph,
931    /// RustGraph multi-layer hypergraph format (nodes.jsonl + edges.jsonl + hyperedges.jsonl).
932    RustGraphHypergraph,
933}
934
935/// Scenario configuration for metadata, tagging, and ML training setup.
936///
937/// This section enables tracking the purpose and characteristics of a generation run.
938#[derive(Debug, Clone, Default, Serialize, Deserialize)]
939pub struct ScenarioConfig {
940    /// Tags for categorizing and filtering datasets.
941    /// Examples: "fraud_detection", "retail", "month_end_stress", "ml_training"
942    #[serde(default)]
943    pub tags: Vec<String>,
944
945    /// Data quality profile preset.
946    /// - "clean": Minimal data quality issues (0.1% missing, 0.05% typos)
947    /// - "noisy": Moderate issues (5% missing, 2% typos, 1% duplicates)
948    /// - "legacy": Heavy issues simulating legacy system data (10% missing, 5% typos)
949    #[serde(default)]
950    pub profile: Option<String>,
951
952    /// Human-readable description of the scenario purpose.
953    #[serde(default)]
954    pub description: Option<String>,
955
956    /// Whether this run is for ML training (enables balanced labeling).
957    #[serde(default)]
958    pub ml_training: bool,
959
960    /// Target anomaly class balance for ML training.
961    /// If set, anomalies will be injected to achieve this ratio.
962    #[serde(default)]
963    pub target_anomaly_ratio: Option<f64>,
964
965    /// Custom metadata key-value pairs.
966    #[serde(default)]
967    pub metadata: std::collections::HashMap<String, String>,
968}
969
970/// Temporal drift configuration for simulating distribution changes over time.
971///
972/// This enables generation of data that shows realistic temporal evolution,
973/// useful for training drift detection models and testing temporal robustness.
974#[derive(Debug, Clone, Serialize, Deserialize)]
975pub struct TemporalDriftConfig {
976    /// Enable temporal drift simulation.
977    #[serde(default)]
978    pub enabled: bool,
979
980    /// Amount mean drift per period (e.g., 0.02 = 2% mean shift per month).
981    /// Simulates gradual inflation or business growth.
982    #[serde(default = "default_amount_drift")]
983    pub amount_mean_drift: f64,
984
985    /// Amount variance drift per period (e.g., 0.01 = 1% variance increase per month).
986    /// Simulates increasing volatility over time.
987    #[serde(default)]
988    pub amount_variance_drift: f64,
989
990    /// Anomaly rate drift per period (e.g., 0.001 = 0.1% increase per month).
991    /// Simulates increasing fraud attempts or degrading controls.
992    #[serde(default)]
993    pub anomaly_rate_drift: f64,
994
995    /// Concept drift rate - how quickly feature distributions change (0.0-1.0).
996    /// Higher values cause more rapid distribution shifts.
997    #[serde(default = "default_concept_drift")]
998    pub concept_drift_rate: f64,
999
1000    /// Sudden drift events - probability of a sudden distribution shift in any period.
1001    #[serde(default)]
1002    pub sudden_drift_probability: f64,
1003
1004    /// Magnitude of sudden drift events when they occur (multiplier).
1005    #[serde(default = "default_sudden_drift_magnitude")]
1006    pub sudden_drift_magnitude: f64,
1007
1008    /// Seasonal drift - enable cyclic patterns that repeat annually.
1009    #[serde(default)]
1010    pub seasonal_drift: bool,
1011
1012    /// Drift start period (0 = from beginning). Use to simulate stable baseline before drift.
1013    #[serde(default)]
1014    pub drift_start_period: u32,
1015
1016    /// Drift type: "gradual", "sudden", "recurring", "mixed"
1017    #[serde(default = "default_drift_type")]
1018    pub drift_type: DriftType,
1019}
1020
1021fn default_amount_drift() -> f64 {
1022    0.02
1023}
1024
1025fn default_concept_drift() -> f64 {
1026    0.01
1027}
1028
1029fn default_sudden_drift_magnitude() -> f64 {
1030    2.0
1031}
1032
1033fn default_drift_type() -> DriftType {
1034    DriftType::Gradual
1035}
1036
1037impl Default for TemporalDriftConfig {
1038    fn default() -> Self {
1039        Self {
1040            enabled: false,
1041            amount_mean_drift: 0.02,
1042            amount_variance_drift: 0.0,
1043            anomaly_rate_drift: 0.0,
1044            concept_drift_rate: 0.01,
1045            sudden_drift_probability: 0.0,
1046            sudden_drift_magnitude: 2.0,
1047            seasonal_drift: false,
1048            drift_start_period: 0,
1049            drift_type: DriftType::Gradual,
1050        }
1051    }
1052}
1053
1054impl TemporalDriftConfig {
1055    /// Convert to core DriftConfig for use in generators.
1056    pub fn to_core_config(&self) -> datasynth_core::distributions::DriftConfig {
1057        datasynth_core::distributions::DriftConfig {
1058            enabled: self.enabled,
1059            amount_mean_drift: self.amount_mean_drift,
1060            amount_variance_drift: self.amount_variance_drift,
1061            anomaly_rate_drift: self.anomaly_rate_drift,
1062            concept_drift_rate: self.concept_drift_rate,
1063            sudden_drift_probability: self.sudden_drift_probability,
1064            sudden_drift_magnitude: self.sudden_drift_magnitude,
1065            seasonal_drift: self.seasonal_drift,
1066            drift_start_period: self.drift_start_period,
1067            drift_type: match self.drift_type {
1068                DriftType::Gradual => datasynth_core::distributions::DriftType::Gradual,
1069                DriftType::Sudden => datasynth_core::distributions::DriftType::Sudden,
1070                DriftType::Recurring => datasynth_core::distributions::DriftType::Recurring,
1071                DriftType::Mixed => datasynth_core::distributions::DriftType::Mixed,
1072            },
1073            regime_changes: Vec::new(),
1074            economic_cycle: Default::default(),
1075            parameter_drifts: Vec::new(),
1076        }
1077    }
1078}
1079
1080/// Types of temporal drift patterns.
1081#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1082#[serde(rename_all = "snake_case")]
1083pub enum DriftType {
1084    /// Gradual, continuous drift over time (like inflation).
1085    #[default]
1086    Gradual,
1087    /// Sudden, point-in-time shifts (like policy changes).
1088    Sudden,
1089    /// Recurring patterns that cycle (like seasonal variations).
1090    Recurring,
1091    /// Combination of gradual background drift with occasional sudden shifts.
1092    Mixed,
1093}
1094
1095// ============================================================================
1096// Streaming Output API Configuration (Phase 2)
1097// ============================================================================
1098
1099/// Configuration for streaming output API.
1100#[derive(Debug, Clone, Serialize, Deserialize)]
1101pub struct StreamingSchemaConfig {
1102    /// Enable streaming output.
1103    #[serde(default)]
1104    pub enabled: bool,
1105    /// Target events per second (0 = unlimited, default 0).
1106    #[serde(default)]
1107    pub events_per_second: f64,
1108    /// Token bucket burst size (default 100).
1109    #[serde(default = "default_burst_size")]
1110    pub burst_size: u32,
1111    /// Buffer size for streaming (number of items).
1112    #[serde(default = "default_buffer_size")]
1113    pub buffer_size: usize,
1114    /// Enable progress reporting.
1115    #[serde(default = "default_true")]
1116    pub enable_progress: bool,
1117    /// Progress reporting interval (number of items).
1118    #[serde(default = "default_progress_interval")]
1119    pub progress_interval: u64,
1120    /// Backpressure strategy.
1121    #[serde(default)]
1122    pub backpressure: BackpressureSchemaStrategy,
1123}
1124
1125fn default_buffer_size() -> usize {
1126    1000
1127}
1128
1129fn default_progress_interval() -> u64 {
1130    100
1131}
1132
1133impl Default for StreamingSchemaConfig {
1134    fn default() -> Self {
1135        Self {
1136            enabled: false,
1137            events_per_second: 0.0,
1138            burst_size: 100,
1139            buffer_size: 1000,
1140            enable_progress: true,
1141            progress_interval: 100,
1142            backpressure: BackpressureSchemaStrategy::Block,
1143        }
1144    }
1145}
1146
1147/// Backpressure strategy for streaming output.
1148#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1149#[serde(rename_all = "snake_case")]
1150pub enum BackpressureSchemaStrategy {
1151    /// Block until space is available in the buffer.
1152    #[default]
1153    Block,
1154    /// Drop oldest items when buffer is full.
1155    DropOldest,
1156    /// Drop newest items when buffer is full.
1157    DropNewest,
1158    /// Buffer overflow items up to a limit, then block.
1159    Buffer,
1160}
1161
1162// ============================================================================
1163// Rate Limiting Configuration (Phase 5)
1164// ============================================================================
1165
1166/// Configuration for rate limiting.
1167#[derive(Debug, Clone, Serialize, Deserialize)]
1168pub struct RateLimitSchemaConfig {
1169    /// Enable rate limiting.
1170    #[serde(default)]
1171    pub enabled: bool,
1172    /// Entities per second limit.
1173    #[serde(default = "default_entities_per_second")]
1174    pub entities_per_second: f64,
1175    /// Burst size (number of tokens in bucket).
1176    #[serde(default = "default_burst_size")]
1177    pub burst_size: u32,
1178    /// Backpressure strategy for rate limiting.
1179    #[serde(default)]
1180    pub backpressure: RateLimitBackpressureSchema,
1181}
1182
1183fn default_entities_per_second() -> f64 {
1184    1000.0
1185}
1186
1187fn default_burst_size() -> u32 {
1188    100
1189}
1190
1191impl Default for RateLimitSchemaConfig {
1192    fn default() -> Self {
1193        Self {
1194            enabled: false,
1195            entities_per_second: 1000.0,
1196            burst_size: 100,
1197            backpressure: RateLimitBackpressureSchema::Block,
1198        }
1199    }
1200}
1201
1202/// Backpressure strategy for rate limiting.
1203#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1204#[serde(rename_all = "snake_case")]
1205pub enum RateLimitBackpressureSchema {
1206    /// Block until rate allows.
1207    #[default]
1208    Block,
1209    /// Drop items that exceed rate.
1210    Drop,
1211    /// Buffer items and process when rate allows.
1212    Buffer,
1213}
1214
1215// ============================================================================
1216// Temporal Attribute Generation Configuration (Phase 3)
1217// ============================================================================
1218
1219/// Configuration for temporal attribute generation.
1220#[derive(Debug, Clone, Serialize, Deserialize)]
1221pub struct TemporalAttributeSchemaConfig {
1222    /// Enable temporal attribute generation.
1223    #[serde(default)]
1224    pub enabled: bool,
1225    /// Valid time configuration.
1226    #[serde(default)]
1227    pub valid_time: ValidTimeSchemaConfig,
1228    /// Transaction time configuration.
1229    #[serde(default)]
1230    pub transaction_time: TransactionTimeSchemaConfig,
1231    /// Generate version chains for entities.
1232    #[serde(default)]
1233    pub generate_version_chains: bool,
1234    /// Average number of versions per entity.
1235    #[serde(default = "default_avg_versions")]
1236    pub avg_versions_per_entity: f64,
1237}
1238
1239fn default_avg_versions() -> f64 {
1240    1.5
1241}
1242
1243impl Default for TemporalAttributeSchemaConfig {
1244    fn default() -> Self {
1245        Self {
1246            enabled: false,
1247            valid_time: ValidTimeSchemaConfig::default(),
1248            transaction_time: TransactionTimeSchemaConfig::default(),
1249            generate_version_chains: false,
1250            avg_versions_per_entity: 1.5,
1251        }
1252    }
1253}
1254
1255/// Configuration for valid time (business time) generation.
1256#[derive(Debug, Clone, Serialize, Deserialize)]
1257pub struct ValidTimeSchemaConfig {
1258    /// Probability that valid_to is set (entity has ended validity).
1259    #[serde(default = "default_closed_probability")]
1260    pub closed_probability: f64,
1261    /// Average validity duration in days.
1262    #[serde(default = "default_avg_validity_days")]
1263    pub avg_validity_days: u32,
1264    /// Standard deviation of validity duration in days.
1265    #[serde(default = "default_validity_stddev")]
1266    pub validity_stddev_days: u32,
1267}
1268
1269fn default_closed_probability() -> f64 {
1270    0.1
1271}
1272
1273fn default_avg_validity_days() -> u32 {
1274    365
1275}
1276
1277fn default_validity_stddev() -> u32 {
1278    90
1279}
1280
1281impl Default for ValidTimeSchemaConfig {
1282    fn default() -> Self {
1283        Self {
1284            closed_probability: 0.1,
1285            avg_validity_days: 365,
1286            validity_stddev_days: 90,
1287        }
1288    }
1289}
1290
1291/// Configuration for transaction time (system time) generation.
1292#[derive(Debug, Clone, Serialize, Deserialize)]
1293pub struct TransactionTimeSchemaConfig {
1294    /// Average recording delay in seconds (0 = immediate).
1295    #[serde(default)]
1296    pub avg_recording_delay_seconds: u32,
1297    /// Allow backdating (recording time before valid time).
1298    #[serde(default)]
1299    pub allow_backdating: bool,
1300    /// Probability of backdating if allowed.
1301    #[serde(default = "default_backdating_probability")]
1302    pub backdating_probability: f64,
1303    /// Maximum backdate days.
1304    #[serde(default = "default_max_backdate_days")]
1305    pub max_backdate_days: u32,
1306}
1307
1308fn default_backdating_probability() -> f64 {
1309    0.01
1310}
1311
1312fn default_max_backdate_days() -> u32 {
1313    30
1314}
1315
1316impl Default for TransactionTimeSchemaConfig {
1317    fn default() -> Self {
1318        Self {
1319            avg_recording_delay_seconds: 0,
1320            allow_backdating: false,
1321            backdating_probability: 0.01,
1322            max_backdate_days: 30,
1323        }
1324    }
1325}
1326
1327// ============================================================================
1328// Relationship Generation Configuration (Phase 4)
1329// ============================================================================
1330
1331/// Configuration for relationship generation.
1332#[derive(Debug, Clone, Serialize, Deserialize)]
1333pub struct RelationshipSchemaConfig {
1334    /// Relationship type definitions.
1335    #[serde(default)]
1336    pub relationship_types: Vec<RelationshipTypeSchemaConfig>,
1337    /// Allow orphan entities (entities with no relationships).
1338    #[serde(default = "default_true")]
1339    pub allow_orphans: bool,
1340    /// Probability of creating an orphan entity.
1341    #[serde(default = "default_orphan_probability")]
1342    pub orphan_probability: f64,
1343    /// Allow circular relationships.
1344    #[serde(default)]
1345    pub allow_circular: bool,
1346    /// Maximum depth for circular relationship detection.
1347    #[serde(default = "default_max_circular_depth")]
1348    pub max_circular_depth: u32,
1349}
1350
1351fn default_orphan_probability() -> f64 {
1352    0.01
1353}
1354
1355fn default_max_circular_depth() -> u32 {
1356    3
1357}
1358
1359impl Default for RelationshipSchemaConfig {
1360    fn default() -> Self {
1361        Self {
1362            relationship_types: Vec::new(),
1363            allow_orphans: true,
1364            orphan_probability: 0.01,
1365            allow_circular: false,
1366            max_circular_depth: 3,
1367        }
1368    }
1369}
1370
1371/// Configuration for a specific relationship type.
1372#[derive(Debug, Clone, Serialize, Deserialize)]
1373pub struct RelationshipTypeSchemaConfig {
1374    /// Name of the relationship type (e.g., "debits", "credits", "created").
1375    pub name: String,
1376    /// Source entity type (e.g., "journal_entry").
1377    pub source_type: String,
1378    /// Target entity type (e.g., "account").
1379    pub target_type: String,
1380    /// Cardinality rule for this relationship.
1381    #[serde(default)]
1382    pub cardinality: CardinalitySchemaRule,
1383    /// Weight for this relationship in random selection.
1384    #[serde(default = "default_relationship_weight")]
1385    pub weight: f64,
1386    /// Whether this relationship is required.
1387    #[serde(default)]
1388    pub required: bool,
1389    /// Whether this relationship is directed.
1390    #[serde(default = "default_true")]
1391    pub directed: bool,
1392}
1393
1394fn default_relationship_weight() -> f64 {
1395    1.0
1396}
1397
1398impl Default for RelationshipTypeSchemaConfig {
1399    fn default() -> Self {
1400        Self {
1401            name: String::new(),
1402            source_type: String::new(),
1403            target_type: String::new(),
1404            cardinality: CardinalitySchemaRule::default(),
1405            weight: 1.0,
1406            required: false,
1407            directed: true,
1408        }
1409    }
1410}
1411
1412/// Cardinality rule for relationships in schema config.
1413#[derive(Debug, Clone, Serialize, Deserialize)]
1414#[serde(rename_all = "snake_case")]
1415pub enum CardinalitySchemaRule {
1416    /// One source to one target.
1417    OneToOne,
1418    /// One source to many targets.
1419    OneToMany {
1420        /// Minimum number of targets.
1421        min: u32,
1422        /// Maximum number of targets.
1423        max: u32,
1424    },
1425    /// Many sources to one target.
1426    ManyToOne {
1427        /// Minimum number of sources.
1428        min: u32,
1429        /// Maximum number of sources.
1430        max: u32,
1431    },
1432    /// Many sources to many targets.
1433    ManyToMany {
1434        /// Minimum targets per source.
1435        min_per_source: u32,
1436        /// Maximum targets per source.
1437        max_per_source: u32,
1438    },
1439}
1440
1441impl Default for CardinalitySchemaRule {
1442    fn default() -> Self {
1443        Self::OneToMany { min: 1, max: 5 }
1444    }
1445}
1446
1447/// Global configuration settings.
1448#[derive(Debug, Clone, Serialize, Deserialize)]
1449pub struct GlobalConfig {
1450    /// Random seed for reproducibility
1451    pub seed: Option<u64>,
1452    /// Industry sector
1453    pub industry: IndustrySector,
1454    /// Simulation start date (YYYY-MM-DD)
1455    #[serde(alias = "startDate")]
1456    pub start_date: String,
1457    /// Simulation period in months
1458    #[serde(alias = "periodMonths")]
1459    pub period_months: u32,
1460    /// Base currency for group reporting
1461    #[serde(default = "default_currency", alias = "groupCurrency")]
1462    pub group_currency: String,
1463    /// Presentation currency for consolidated financial statements (ISO 4217).
1464    /// If not set, defaults to `group_currency`.
1465    #[serde(default, alias = "presentationCurrency")]
1466    pub presentation_currency: Option<String>,
1467    /// Enable parallel generation
1468    #[serde(default = "default_true")]
1469    pub parallel: bool,
1470    /// Number of worker threads (0 = auto-detect)
1471    #[serde(default, alias = "workerThreads")]
1472    pub worker_threads: usize,
1473    /// Memory limit in MB (0 = unlimited)
1474    #[serde(default, alias = "memoryLimitMb")]
1475    pub memory_limit_mb: usize,
1476    /// Fiscal year length in months (defaults to 12 if not set).
1477    /// Used by session-based generation to split the total period into fiscal years.
1478    #[serde(default, alias = "fiscalYearMonths")]
1479    pub fiscal_year_months: Option<u32>,
1480}
1481
1482fn default_currency() -> String {
1483    "USD".to_string()
1484}
1485fn default_true() -> bool {
1486    true
1487}
1488
1489/// Configuration for generation session behavior.
1490///
1491/// When enabled, the generation pipeline splits the total period into fiscal years
1492/// and generates data period-by-period, carrying forward balance state.
1493#[derive(Debug, Clone, Serialize, Deserialize)]
1494pub struct SessionSchemaConfig {
1495    /// Whether session-based (period-by-period) generation is enabled.
1496    #[serde(default)]
1497    pub enabled: bool,
1498    /// Optional path for saving/loading session checkpoint files.
1499    #[serde(default)]
1500    pub checkpoint_path: Option<String>,
1501    /// Whether to write output files per fiscal period (e.g., `period_01/`).
1502    #[serde(default = "default_true")]
1503    pub per_period_output: bool,
1504    /// Whether to also produce a single consolidated output across all periods.
1505    #[serde(default = "default_true")]
1506    pub consolidated_output: bool,
1507}
1508
1509impl Default for SessionSchemaConfig {
1510    fn default() -> Self {
1511        Self {
1512            enabled: false,
1513            checkpoint_path: None,
1514            per_period_output: true,
1515            consolidated_output: true,
1516        }
1517    }
1518}
1519
1520/// Company code configuration.
1521#[derive(Debug, Clone, Serialize, Deserialize)]
1522pub struct CompanyConfig {
1523    /// Company code identifier
1524    pub code: String,
1525    /// Company name
1526    pub name: String,
1527    /// Local currency (ISO 4217)
1528    pub currency: String,
1529    /// Functional currency for IAS 21 translation (ISO 4217).
1530    /// If not set, defaults to the `currency` field (i.e. local == functional).
1531    #[serde(default, alias = "functionalCurrency")]
1532    pub functional_currency: Option<String>,
1533    /// Country code (ISO 3166-1 alpha-2)
1534    pub country: String,
1535    /// Fiscal year variant
1536    #[serde(default = "default_fiscal_variant", alias = "fiscalYearVariant")]
1537    pub fiscal_year_variant: String,
1538    /// Transaction volume per year
1539    #[serde(alias = "annualTransactionVolume")]
1540    pub annual_transaction_volume: TransactionVolume,
1541    /// Company-specific transaction weight
1542    #[serde(default = "default_weight", alias = "volumeWeight")]
1543    pub volume_weight: f64,
1544}
1545
1546fn default_fiscal_variant() -> String {
1547    "K4".to_string()
1548}
1549fn default_weight() -> f64 {
1550    1.0
1551}
1552
1553/// Transaction volume presets.
1554#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1555#[serde(rename_all = "snake_case")]
1556pub enum TransactionVolume {
1557    /// 10,000 transactions per year
1558    TenK,
1559    /// 50,000 transactions per year
1560    FiftyK,
1561    /// 100,000 transactions per year
1562    HundredK,
1563    /// 1,000,000 transactions per year
1564    OneM,
1565    /// 10,000,000 transactions per year
1566    TenM,
1567    /// 100,000,000 transactions per year
1568    HundredM,
1569    /// Custom count
1570    Custom(u64),
1571}
1572
1573impl TransactionVolume {
1574    /// Get the transaction count.
1575    pub fn count(&self) -> u64 {
1576        match self {
1577            Self::TenK => 10_000,
1578            Self::FiftyK => 50_000,
1579            Self::HundredK => 100_000,
1580            Self::OneM => 1_000_000,
1581            Self::TenM => 10_000_000,
1582            Self::HundredM => 100_000_000,
1583            Self::Custom(n) => *n,
1584        }
1585    }
1586}
1587
1588/// Chart of Accounts configuration.
1589#[derive(Debug, Clone, Serialize, Deserialize)]
1590pub struct ChartOfAccountsConfig {
1591    /// CoA complexity level
1592    pub complexity: CoAComplexity,
1593    /// Use industry-specific accounts
1594    #[serde(default = "default_true")]
1595    pub industry_specific: bool,
1596    /// Custom account definitions file
1597    pub custom_accounts: Option<PathBuf>,
1598    /// Minimum hierarchy depth
1599    #[serde(default = "default_min_depth")]
1600    pub min_hierarchy_depth: u8,
1601    /// Maximum hierarchy depth
1602    #[serde(default = "default_max_depth")]
1603    pub max_hierarchy_depth: u8,
1604    /// **v5.7.0** — expand canonical accounts into industry-specific
1605    /// 6-digit sub-accounts using the embedded
1606    /// [`datasynth_core::industry_packs`] (manufacturing, retail,
1607    /// financial_services, healthcare, technology). When `true`:
1608    ///
1609    /// - Each canonical 4-digit account that has an expansion in the
1610    ///   pack becomes a non-postable control account (`is_postable =
1611    ///   false`).
1612    /// - 2–6 6-digit sub-accounts are added per parent, with
1613    ///   suffix-driven names (`"Product Revenue — Steel Products"`),
1614    ///   industry-realistic gaps, and inherited ISO 21378 codes.
1615    /// - Generators that currently target canonical accounts via
1616    ///   constants will pick a sub-account deterministically per
1617    ///   `document_id` (preserving seed-based reproducibility).
1618    ///
1619    /// Default: `false` (preserves v5.6.0 behaviour exactly — same
1620    /// account count, same numbering, same goldens).
1621    #[serde(default, alias = "expandIndustrySubaccounts")]
1622    pub expand_industry_subaccounts: bool,
1623}
1624
1625fn default_min_depth() -> u8 {
1626    2
1627}
1628fn default_max_depth() -> u8 {
1629    5
1630}
1631
1632impl Default for ChartOfAccountsConfig {
1633    fn default() -> Self {
1634        Self {
1635            complexity: CoAComplexity::Small,
1636            industry_specific: true,
1637            custom_accounts: None,
1638            min_hierarchy_depth: default_min_depth(),
1639            max_hierarchy_depth: default_max_depth(),
1640            expand_industry_subaccounts: false,
1641        }
1642    }
1643}
1644
1645/// Transaction generation configuration.
1646#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1647pub struct TransactionConfig {
1648    /// Line item distribution
1649    #[serde(default)]
1650    pub line_item_distribution: LineItemDistributionConfig,
1651    /// P0a: by default the JE total is coupled to its line count (scaled by
1652    /// `(per-side line count)^0.85`) so multi-line JEs don't split a 2-line-sized
1653    /// total into tiny per-line amounts. Set this to disable that coupling and
1654    /// keep amount independent of line count (e.g. to isolate an explicit
1655    /// amount↔line_count copula). Default (false) keeps the realism coupling on.
1656    #[serde(default)]
1657    pub disable_line_count_amount_coupling: bool,
1658    /// Debit/credit balance distribution
1659    #[serde(default)]
1660    pub debit_credit_distribution: DebitCreditDistributionConfig,
1661    /// Even/odd line count distribution
1662    #[serde(default)]
1663    pub even_odd_distribution: EvenOddDistributionConfig,
1664    /// Transaction source distribution
1665    #[serde(default)]
1666    pub source_distribution: SourceDistribution,
1667    /// **T2-D** Source-mix breadth. When unset or `true` (the default), the
1668    /// emitted `source` column is drawn from a generic SAP document-type mix
1669    /// (~25 codes, entropy ~2.7) instead of the coarse `TransactionSource`
1670    /// enum (~4 values, entropy ~0.75), closing the source-mix gap measured
1671    /// in experiments/ml/FINDINGS.md §6. Industry priors, when loaded, take
1672    /// precedence. Set `false` to restore the legacy enum labels. `Option`
1673    /// (not bare `bool`) so the default is genuinely on under both serde and
1674    /// `Default::default()`.
1675    #[serde(default)]
1676    pub synthetic_source_codes: Option<bool>,
1677    /// **SOTA-1** Recurring / standard-journal templating. When unset or `true`
1678    /// (the default), the no-priors generation path reuses a small per-(company,
1679    /// process) library of standard JE account-archetypes with high probability,
1680    /// so standard postings recur (and a hot subset of accounts dominates)
1681    /// instead of every JE drawing fresh uniform accounts. Matches the corpus's
1682    /// heavy templating (FINDINGS.md sec.8: 97% recurring, top-50 cover 65%; vs
1683    /// the engine's 758/1k unique). Reuse overrides only account *choice* (the
1684    /// main RNG + amounts/dates/counts are unchanged). Set `false` for the
1685    /// legacy uniform-per-line account selection.
1686    #[serde(default)]
1687    pub recurring_templates: Option<bool>,
1688    /// **SOTA-5** Fraction of journal entries that are reversals/corrections of
1689    /// a recent JE (swap dr/cr, reference the original) — a process auditors
1690    /// specifically look for, and largely absent from the engine (FINDINGS.md
1691    /// sec.8: corpus reversal-proxy ~10% vs synthetic ~0.2%). Unset → a default
1692    /// of ~0.10 (matching the corpus proxy); `0.0` disables it. Reversals are
1693    /// interspersed without perturbing the normal JEs (separate RNG + derived id).
1694    #[serde(default)]
1695    pub reversal_rate: Option<f64>,
1696    /// **SOTA-2** Concentrate posting activity onto a hot subset of accounts via
1697    /// a Zipf (power-law) override of the per-line account pick, so a few
1698    /// accounts carry most lines like a real GL (FINDINGS.md sec.8: corpus
1699    /// top-10% of accounts ≈ 95% of lines vs the engine's near-uniform ~0.21).
1700    /// The uniform draw is still consumed (amounts/dates/counts unchanged) — only
1701    /// the chosen account moves toward the hot set. Set `false` for the legacy
1702    /// uniform-over-pool selection. Default-on when unset.
1703    #[serde(default)]
1704    pub account_concentration: Option<bool>,
1705    /// **SOTA-6** Fraction of journal entries that are allocation/assessment
1706    /// batches — large 1-to-many postings (one cost pool spread across many
1707    /// cost centers) that drive the corpus lines-per-JE tail (FINDINGS.md
1708    /// sec.8: AB docs ~52 lines vs the engine's ~4.6 mean with no large-batch
1709    /// process). Each batch carries ~30-80 cost-center-spread sub-lines and
1710    /// stays balanced. Unset → a small default (~0.008, ≈8% of lines); `0.0`
1711    /// disables. Interspersed without perturbing the normal JEs (separate RNG +
1712    /// derived id, reusing a recent JE's header).
1713    #[serde(default)]
1714    pub allocation_batch_rate: Option<f64>,
1715    /// **SOTA-3** Populate a line-level `business_unit` dimension — an
1716    /// organisational segment that rolls up the cost center, or the profit
1717    /// center as fallback (the same dimension value always maps to the same BU).
1718    /// The corpus carries a BU dimension (~11 codes) the engine lacked entirely;
1719    /// this fills it wherever a cost or profit center is present (~corpus fill),
1720    /// so BU-level analytics are coherent. Default-on when unset; `false`
1721    /// leaves `business_unit` empty (legacy).
1722    #[serde(default)]
1723    pub business_unit_dimension: Option<bool>,
1724    /// **SOTA-4** Fraction of journal entries that post in a foreign
1725    /// (document) currency — SAP-style: `debit_amount`/`credit_amount`/
1726    /// `local_amount` stay the company-ledger amount (DMBTR; the trial balance
1727    /// is unaffected), and the line's `transaction_amount` (WRBTR) plus
1728    /// `header.currency` (WAERS) / `header.exchange_rate` carry the foreign
1729    /// value. The corpus shows ~3.5% functional≠reporting (FINDINGS §8).
1730    /// Unset/`0.0` → all company-currency (default). Additive — ledger
1731    /// coherence is preserved; enable for corpus-matching / FX realism.
1732    #[serde(default)]
1733    pub foreign_currency_rate: Option<f64>,
1734    /// Seasonality configuration
1735    #[serde(default)]
1736    pub seasonality: SeasonalityConfig,
1737    /// Amount distribution
1738    #[serde(default)]
1739    pub amounts: AmountDistributionConfig,
1740    /// Benford's Law compliance configuration
1741    #[serde(default)]
1742    pub benford: BenfordConfig,
1743    /// SOTA-10 (FINDINGS §14): optional hard cap on total lines per JE. Corpus has
1744    /// p99.9 ~99 lines / max ~924; the synthetic engine occasionally produces
1745    /// 2000+-line monster JEs that degrade the audit packet's signal-to-noise.
1746    /// `None` = no cap (legacy); ~100 is a realism-matching default. Applies after
1747    /// copula adjustment; preserves balance by scaling debit/credit proportionally.
1748    #[serde(default)]
1749    pub lines_per_je_cap: Option<usize>,
1750    /// SOTA-9 (FINDINGS §14): archetype reuse probability for the recurring-templates
1751    /// process (overrides the historical 0.90 default). Corpus recurring share ~0.97;
1752    /// raising this concentrates `edges/je` toward the corpus value (currently 8.75×
1753    /// too diffuse). Range [0.0, 1.0]. None = use legacy 0.90.
1754    #[serde(default)]
1755    pub archetype_reuse_probability: Option<f64>,
1756    /// SOTA-8 (FINDINGS §14): source-conditional Dirichlet account-pair sampler.
1757    /// Models the corpus finding that per-source account usage is *concentrated*
1758    /// (entropy ~0.68 vs synth 0.97) over a *larger* pool (~23 vs 5 accts/source).
1759    /// Default off — opt-in so existing synthetic streams stay byte-identical;
1760    /// enable for audit-realism + tighter inverse-audit normal manifold.
1761    #[serde(default)]
1762    pub source_conditional_account_pair: SourceConditionalAccountPairConfig,
1763}
1764
1765/// SOTA-8 — per-source Dirichlet over account pairs. Concentration α controls
1766/// per-source structure tightness (low α = razor-tight prior, high α = diffuse);
1767/// `accts_per_source_target` controls the per-source account-pool size.
1768#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1769pub struct SourceConditionalAccountPairConfig {
1770    /// Enable the source-conditional account-pair sampler (default off).
1771    #[serde(default)]
1772    pub enabled: bool,
1773    /// Symmetric Dirichlet α — lower = more concentrated PMF per source.
1774    /// α=0.5 + N_s=25 ⇒ expected normalised entropy ≈ 0.65 (corpus median 0.68).
1775    #[serde(default = "default_source_cond_concentration")]
1776    pub concentration: f64,
1777    /// Expected distinct accounts per source (jittered by LogNormal(0, 0.3)).
1778    /// Corpus median 23.5; synth pre-SOTA-8 is ~5.
1779    #[serde(default = "default_accts_per_source_target")]
1780    pub accts_per_source_target: usize,
1781}
1782
1783fn default_source_cond_concentration() -> f64 {
1784    0.5
1785}
1786
1787fn default_accts_per_source_target() -> usize {
1788    25
1789}
1790
1791impl Default for SourceConditionalAccountPairConfig {
1792    fn default() -> Self {
1793        Self {
1794            enabled: false,
1795            concentration: default_source_cond_concentration(),
1796            accts_per_source_target: default_accts_per_source_target(),
1797        }
1798    }
1799}
1800
1801/// Benford's Law compliance configuration.
1802#[derive(Debug, Clone, Serialize, Deserialize)]
1803pub struct BenfordConfig {
1804    /// Enable Benford's Law compliance for amount generation
1805    #[serde(default = "default_true")]
1806    pub enabled: bool,
1807    /// Tolerance for deviation from ideal Benford distribution (0.0-1.0)
1808    #[serde(default = "default_benford_tolerance")]
1809    pub tolerance: f64,
1810    /// Transaction sources exempt from Benford's Law (fixed amounts)
1811    #[serde(default)]
1812    pub exempt_sources: Vec<BenfordExemption>,
1813}
1814
1815fn default_benford_tolerance() -> f64 {
1816    0.05
1817}
1818
1819impl Default for BenfordConfig {
1820    fn default() -> Self {
1821        Self {
1822            enabled: true,
1823            tolerance: default_benford_tolerance(),
1824            exempt_sources: vec![BenfordExemption::Recurring, BenfordExemption::Payroll],
1825        }
1826    }
1827}
1828
1829/// Types of transactions exempt from Benford's Law.
1830#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1831#[serde(rename_all = "snake_case")]
1832pub enum BenfordExemption {
1833    /// Recurring fixed amounts (rent, subscriptions)
1834    Recurring,
1835    /// Payroll (standardized salaries)
1836    Payroll,
1837    /// Fixed fees and charges
1838    FixedFees,
1839    /// Round number purchases (often legitimate)
1840    RoundAmounts,
1841}
1842
1843/// Distribution of transaction sources.
1844#[derive(Debug, Clone, Serialize, Deserialize)]
1845pub struct SourceDistribution {
1846    /// Manual entries percentage
1847    pub manual: f64,
1848    /// Automated system entries
1849    pub automated: f64,
1850    /// Recurring entries
1851    pub recurring: f64,
1852    /// Adjustment entries
1853    pub adjustment: f64,
1854}
1855
1856impl Default for SourceDistribution {
1857    fn default() -> Self {
1858        Self {
1859            manual: 0.20,
1860            automated: 0.70,
1861            recurring: 0.07,
1862            adjustment: 0.03,
1863        }
1864    }
1865}
1866
1867/// Output configuration.
1868#[derive(Debug, Clone, Serialize, Deserialize)]
1869pub struct OutputConfig {
1870    /// Output mode
1871    #[serde(default)]
1872    pub mode: OutputMode,
1873    /// Output directory
1874    #[serde(alias = "outputDirectory")]
1875    pub output_directory: PathBuf,
1876    /// File formats to generate. Accepts both `formats: [json, csv]`
1877    /// (canonical YAML) and `exportFormat: "json"` / `exportFormats:
1878    /// ["json", "csv"]` (SDK-style camelCase). The single-string
1879    /// `exportFormat` form is deserialised via `one_or_many_formats`
1880    /// so SDK clients submitting `exportFormat: "json"` hit the right
1881    /// code path instead of silently falling through to the Parquet
1882    /// default — the bug the SDK team flagged in v4.4.0.
1883    #[serde(
1884        default = "default_formats",
1885        alias = "exportFormats",
1886        alias = "exportFormat",
1887        deserialize_with = "one_or_many_formats"
1888    )]
1889    pub formats: Vec<FileFormat>,
1890    /// Compression settings
1891    #[serde(default)]
1892    pub compression: CompressionConfig,
1893    /// Batch size for writes
1894    #[serde(default = "default_batch_size", alias = "batchSize")]
1895    pub batch_size: usize,
1896    /// Include ACDOCA format
1897    #[serde(default = "default_true", alias = "includeAcdoca")]
1898    pub include_acdoca: bool,
1899    /// Include BSEG format
1900    #[serde(default, alias = "includeBseg")]
1901    pub include_bseg: bool,
1902    /// Partition by fiscal period
1903    #[serde(default = "default_true", alias = "partitionByPeriod")]
1904    pub partition_by_period: bool,
1905    /// Partition by company code
1906    #[serde(default, alias = "partitionByCompany")]
1907    pub partition_by_company: bool,
1908    /// Numeric serialization mode for JSON output.
1909    /// "string" (default): decimals as `"1729237.30"` — lossless precision.
1910    /// "native": decimals as `1729237.30` — friendlier for pandas/analytics.
1911    #[serde(default, alias = "numericMode")]
1912    pub numeric_mode: NumericMode,
1913    /// JSON export layout for journal entries and document flows.
1914    /// "nested" (default): `{"header": {...}, "lines": [...]}` — natural ERP structure.
1915    /// "flat": header fields repeated on every line — friendlier for analytics/ML.
1916    ///
1917    /// Accepts both `export_layout` (canonical / YAML) and `exportLayout`
1918    /// (camelCase / SDK JSON) so SDKs that follow camelCase conventions
1919    /// hit the flat path rather than silently getting the Nested default.
1920    /// Before v3.1.1 the missing camelCase alias meant SDK requests with
1921    /// `exportLayout: "flat"` were silently ignored, which SDK operators
1922    /// reported as "flat hangs generation" (the job completed with Nested
1923    /// layout, but manifests didn't match the expected flat shape).
1924    #[serde(default, alias = "exportLayout")]
1925    pub export_layout: ExportLayout,
1926    /// SAP / HANA export settings (only read when the CLI
1927    /// `--export-format sap` flag is passed). Empty by default so
1928    /// existing configs don't change behaviour; dialect defaults to
1929    /// `classic` for backward compatibility.
1930    #[serde(default, alias = "sapExport")]
1931    pub sap: SapExportSettings,
1932    /// SAF-T (Standard Audit File for Tax) export settings. Read when
1933    /// the CLI `--export-format saft` flag is passed. Defaults to
1934    /// Portugal (`pt`) because the PT variant is the most mature and
1935    /// cross-jurisdiction compatible. Override with
1936    /// `jurisdiction: pl|ro|no|lu` for the other supported countries.
1937    #[serde(default, alias = "saftExport")]
1938    pub saft: SaftExportSettings,
1939}
1940
1941/// Configuration for the SAP export writers (BKPF / BSEG / ACDOCA and
1942/// master-data tables).
1943///
1944/// Mirror of `datasynth_output::SapExportConfig` in YAML form — the CLI
1945/// translates this into the runtime struct before invoking the exporter,
1946/// replacing the v3.x hardcoded `SapExportConfig::default()`.
1947#[derive(Debug, Clone, Serialize, Deserialize)]
1948pub struct SapExportSettings {
1949    /// SAP client / MANDT column value on every table.
1950    #[serde(default = "default_sap_client")]
1951    pub client: String,
1952    /// Leading ledger for ACDOCA rows (0L for S/4HANA default).
1953    #[serde(default = "default_sap_ledger")]
1954    pub ledger: String,
1955    /// Source system identifier — written to ACDOCA.AWSYS so downstream
1956    /// consumers can distinguish synthetic rows from production ones.
1957    #[serde(default = "default_sap_source_system")]
1958    pub source_system: String,
1959    /// Local currency (WAERS / RWCUR).
1960    #[serde(default = "default_sap_currency")]
1961    pub local_currency: String,
1962    /// Optional group / consolidation currency (triggers the HSL / RHCUR columns).
1963    #[serde(default, skip_serializing_if = "Option::is_none")]
1964    pub group_currency: Option<String>,
1965    /// Which SAP tables to export. Empty = default set (bkpf, bseg, acdoca).
1966    #[serde(default)]
1967    pub tables: Vec<String>,
1968    /// Include ZSIM_* extension columns on ACDOCA rows.
1969    #[serde(default = "default_true")]
1970    pub include_extension_fields: bool,
1971    /// Export dialect — `classic` (R/3 / BODS) or `hana` (S/4HANA CDS).
1972    #[serde(default)]
1973    pub dialect: SapDialectSetting,
1974    /// Legacy flag, retained for backward compatibility. Has no effect
1975    /// when `dialect = hana`.
1976    #[serde(default = "default_true")]
1977    pub use_sap_date_format: bool,
1978}
1979
1980impl Default for SapExportSettings {
1981    fn default() -> Self {
1982        Self {
1983            client: default_sap_client(),
1984            ledger: default_sap_ledger(),
1985            source_system: default_sap_source_system(),
1986            local_currency: default_sap_currency(),
1987            group_currency: None,
1988            tables: Vec::new(),
1989            include_extension_fields: true,
1990            dialect: SapDialectSetting::default(),
1991            use_sap_date_format: true,
1992        }
1993    }
1994}
1995
1996fn default_sap_client() -> String {
1997    "100".to_string()
1998}
1999fn default_sap_ledger() -> String {
2000    "0L".to_string()
2001}
2002fn default_sap_source_system() -> String {
2003    "SYNTH".to_string()
2004}
2005fn default_sap_currency() -> String {
2006    "USD".to_string()
2007}
2008
2009/// SAP export dialect (wire form — `datasynth_output::SapDialect` is the
2010/// runtime form).
2011#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
2012#[serde(rename_all = "snake_case")]
2013pub enum SapDialectSetting {
2014    /// Legacy R/3 / BODS-compatible CSV (default).
2015    #[default]
2016    Classic,
2017    /// S/4HANA CDS dialect (semicolon + UTF-8 BOM + decimal comma + ISO dates).
2018    Hana,
2019}
2020
2021/// SAF-T export settings (v4.3.1).
2022#[derive(Debug, Clone, Serialize, Deserialize)]
2023pub struct SaftExportSettings {
2024    /// ISO-ish two-letter code: `pt` / `pl` / `ro` / `no` / `lu`.
2025    /// Defaults to `pt` (Portugal, most mature variant).
2026    #[serde(default = "default_saft_jurisdiction")]
2027    pub jurisdiction: String,
2028    /// Company tax registration number / VAT ID / TIN used in the
2029    /// `Header.TaxRegistrationNumber` element. Falls back to
2030    /// `"Desconhecido"` (Portuguese for "unknown") when empty.
2031    #[serde(default)]
2032    pub company_tax_id: String,
2033    /// Optional override for the company name used in the Header.
2034    /// When empty, the first configured company's `name` is used.
2035    #[serde(default)]
2036    pub company_name: String,
2037}
2038
2039impl Default for SaftExportSettings {
2040    fn default() -> Self {
2041        Self {
2042            jurisdiction: default_saft_jurisdiction(),
2043            company_tax_id: String::new(),
2044            company_name: String::new(),
2045        }
2046    }
2047}
2048
2049fn default_saft_jurisdiction() -> String {
2050    "pt".to_string()
2051}
2052
2053fn default_formats() -> Vec<FileFormat> {
2054    vec![FileFormat::Parquet]
2055}
2056fn default_batch_size() -> usize {
2057    100_000
2058}
2059
2060/// Custom deserializer for `formats` that accepts either a single
2061/// `FileFormat` (e.g. `"json"` for SDK `exportFormat: "json"`) or a
2062/// vector (e.g. `["json", "csv"]`). Without this shim an SDK config
2063/// with `exportFormat: "json"` would fail to parse (serde expects a
2064/// sequence for a `Vec` field) and silently fall through to defaults.
2065fn one_or_many_formats<'de, D>(deserializer: D) -> Result<Vec<FileFormat>, D::Error>
2066where
2067    D: serde::Deserializer<'de>,
2068{
2069    #[derive(Deserialize)]
2070    #[serde(untagged)]
2071    enum OneOrMany {
2072        One(FileFormat),
2073        Many(Vec<FileFormat>),
2074    }
2075    match OneOrMany::deserialize(deserializer)? {
2076        OneOrMany::One(f) => Ok(vec![f]),
2077        OneOrMany::Many(v) => Ok(v),
2078    }
2079}
2080
2081impl Default for OutputConfig {
2082    fn default() -> Self {
2083        Self {
2084            mode: OutputMode::FlatFile,
2085            output_directory: PathBuf::from("./output"),
2086            formats: default_formats(),
2087            compression: CompressionConfig::default(),
2088            batch_size: default_batch_size(),
2089            include_acdoca: true,
2090            include_bseg: false,
2091            partition_by_period: true,
2092            partition_by_company: false,
2093            numeric_mode: NumericMode::default(),
2094            export_layout: ExportLayout::default(),
2095            sap: SapExportSettings::default(),
2096            saft: SaftExportSettings::default(),
2097        }
2098    }
2099}
2100
2101/// Numeric serialization mode for JSON decimal fields.
2102#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
2103#[serde(rename_all = "snake_case")]
2104pub enum NumericMode {
2105    /// Decimals as JSON strings (e.g. `"1729237.30"`). Preserves full precision.
2106    #[default]
2107    String,
2108    /// Decimals as JSON numbers (e.g. `1729237.30`). Friendlier for pandas/analytics.
2109    Native,
2110}
2111
2112/// JSON export layout for nested structures (journal entries, document flows).
2113#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
2114#[serde(rename_all = "snake_case")]
2115pub enum ExportLayout {
2116    /// Nested structure: `{"header": {...}, "lines": [...]}`. Natural ERP format.
2117    #[default]
2118    Nested,
2119    /// Flat structure: header fields repeated on every line. Analytics-friendly.
2120    Flat,
2121}
2122
2123/// Output mode.
2124#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2125#[serde(rename_all = "snake_case")]
2126pub enum OutputMode {
2127    /// Stream records as generated
2128    Streaming,
2129    /// Write to flat files
2130    #[default]
2131    FlatFile,
2132    /// Both streaming and flat file
2133    Both,
2134}
2135
2136/// Supported file formats.
2137#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
2138#[serde(rename_all = "snake_case")]
2139pub enum FileFormat {
2140    Csv,
2141    Parquet,
2142    Json,
2143    JsonLines,
2144}
2145
2146/// Compression configuration.
2147#[derive(Debug, Clone, Serialize, Deserialize)]
2148pub struct CompressionConfig {
2149    /// Enable compression
2150    #[serde(default = "default_true")]
2151    pub enabled: bool,
2152    /// Compression algorithm
2153    #[serde(default)]
2154    pub algorithm: CompressionAlgorithm,
2155    /// Compression level (1-9)
2156    #[serde(default = "default_compression_level")]
2157    pub level: u8,
2158}
2159
2160fn default_compression_level() -> u8 {
2161    3
2162}
2163
2164impl Default for CompressionConfig {
2165    fn default() -> Self {
2166        Self {
2167            enabled: true,
2168            algorithm: CompressionAlgorithm::default(),
2169            level: default_compression_level(),
2170        }
2171    }
2172}
2173
2174/// Compression algorithms.
2175#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2176#[serde(rename_all = "snake_case")]
2177pub enum CompressionAlgorithm {
2178    Gzip,
2179    #[default]
2180    Zstd,
2181    Lz4,
2182    Snappy,
2183}
2184
2185/// Fraud simulation configuration.
2186///
2187/// ## Document-level vs. line-level fraud
2188///
2189/// `fraud_rate` applies to individual journal-entry lines (line-level).
2190/// `document_fraud_rate` (optional) applies to source documents
2191/// (purchase orders, vendor invoices, customer invoices, payments), and when
2192/// `propagate_to_lines` is true, every JE derived from a fraudulent document
2193/// also gets `is_fraud = true`. This lets users express either:
2194///
2195///  * pure line-level fraud (`document_fraud_rate = None`): legacy behaviour;
2196///  * pure document-level fraud (`fraud_rate ≈ 0` and `document_fraud_rate` set):
2197///    fraud rings expressed at document granularity — realistic for PO/invoice
2198///    fraud schemes where one fraudulent document spawns multiple derived JEs;
2199///  * hybrid (both set): document-level scheme fraud plus unrelated line-level
2200///    slip-ups.
2201///
2202/// `propagate_to_document` does the inverse: when a JE is tagged as fraud by
2203/// the anomaly injector, its source document is also marked fraudulent.
2204#[derive(Debug, Clone, Serialize, Deserialize)]
2205pub struct FraudConfig {
2206    /// Enable fraud scenario generation
2207    #[serde(default)]
2208    pub enabled: bool,
2209    /// Line-level fraud rate: fraction of individual JE lines flagged as fraud (0.0 to 1.0).
2210    ///
2211    /// # Effective line-level prevalence
2212    ///
2213    /// If `document_fraud_rate = Some(d)` and `propagate_to_lines = true`,
2214    /// the observed line-level fraud prevalence is roughly:
2215    ///
2216    /// > `P(line is_fraud) ≈ fraud_rate + d × avg_lines_per_fraud_doc / total_lines`
2217    ///
2218    /// For a typical retail job (avg 3 lines per document, ~30 % of lines
2219    /// come from doc-flow-derived JEs) the combined rate lands near:
2220    ///
2221    /// > `fraud_rate + 0.3 × d`
2222    ///
2223    /// so setting `fraud_rate=0.02, document_fraud_rate=0.05, propagate_to_lines=true`
2224    /// produces ~3.5 % line-level fraud, not 2 %. To target a specific
2225    /// line-level prevalence X, choose `fraud_rate = X - 0.3 × d`.
2226    #[serde(default = "default_fraud_rate", alias = "fraudRate")]
2227    pub fraud_rate: f64,
2228    /// Document-level fraud rate: fraction of source documents (PO, vendor
2229    /// invoice, customer invoice, payment) flagged as fraud. `None` disables
2230    /// document-level injection; `Some(r)` marks ~r × document-count as fraud
2231    /// independently of the line-level rate.
2232    ///
2233    /// v4.4.2+ default: `Some(0.01)` — the SDK team reported
2234    /// `is_fraud_propagated: 0/72` regressed from `12/33` in 3.1.1 because
2235    /// the default had silently become None. A 1% document-fraud default
2236    /// restores the propagation signal (~0.3% of JE headers carry
2237    /// `is_fraud_propagated = true`) without meaningfully changing the
2238    /// line-level fraud prevalence. Set to `Some(0.0)` or `null` in your
2239    /// YAML to explicitly disable document-level injection.
2240    #[serde(default = "default_document_fraud_rate", alias = "documentFraudRate")]
2241    pub document_fraud_rate: Option<f64>,
2242    /// When true, flagging a document as fraudulent cascades `is_fraud = true`
2243    /// and `fraud_type` to every journal entry derived from that document,
2244    /// and records `fraud_source_document_id` on the JE header.
2245    /// Default: `true`.
2246    #[serde(default = "default_true", alias = "propagateToLines")]
2247    pub propagate_to_lines: bool,
2248    /// When true, tagging a JE as fraud via line-level anomaly injection also
2249    /// marks the JE's source document as fraudulent (if it can be resolved).
2250    /// Default: `true`.
2251    #[serde(default = "default_true", alias = "propagateToDocument")]
2252    pub propagate_to_document: bool,
2253    /// Fraud type distribution
2254    #[serde(default)]
2255    pub fraud_type_distribution: FraudTypeDistribution,
2256    /// Enable fraud clustering
2257    #[serde(default)]
2258    pub clustering_enabled: bool,
2259    /// Clustering factor
2260    #[serde(default = "default_clustering_factor")]
2261    pub clustering_factor: f64,
2262    /// Approval thresholds for threshold-adjacent fraud pattern
2263    #[serde(default = "default_approval_thresholds")]
2264    pub approval_thresholds: Vec<f64>,
2265    /// v5.30 B3 (#153) — per-business-process fraud rate overrides.
2266    ///
2267    /// Keys are business-process slugs (`"P2P"`, `"O2C"`, `"R2R"`, `"H2R"`,
2268    /// `"A2R"`); values are line-level fraud rates that **override** the
2269    /// global `fraud_rate` when a JE's selected business process matches a
2270    /// key. Unmatched processes fall back to `fraud_rate`.
2271    ///
2272    /// When empty (the default), per-process rates are disabled and every
2273    /// JE uses the global `fraud_rate` — preserving v5.29 byte-identical
2274    /// output for configs that don't opt in.
2275    ///
2276    /// # Why
2277    ///
2278    /// Real audit data shows process-specific fraud signatures (R2R
2279    /// manual-close and period-end accruals carry higher fraud
2280    /// concentration than P2P invoice-processing). The v5.29 global
2281    /// `fraud_rate` flattens this signal, leaving the GNN fraud detector
2282    /// at a uniform per-process AUC band (0.914-0.925 in the v5.29 retrain).
2283    ///
2284    /// # Example
2285    ///
2286    /// ```yaml
2287    /// fraud:
2288    ///   fraud_rate: 0.02         # baseline for unmapped processes
2289    ///   per_process_rates:
2290    ///     R2R: 0.06              # 3× baseline — period-close hot spot
2291    ///     P2P: 0.04              # 2× baseline — invoice fraud
2292    ///     O2C: 0.025             # 1.25× baseline — revenue manipulation
2293    ///     H2R: 0.015             # below baseline — payroll
2294    ///     A2R: 0.020             # baseline — asset accounting
2295    /// ```
2296    ///
2297    /// Aggregate effective line-level prevalence depends on the
2298    /// `business_processes` weights mix; calibrate to a target X by
2299    /// solving for the weighted average. For default v5.29 weights
2300    /// (P2P 0.35, O2C 0.35, R2R 0.20, H2R 0.05, A2R 0.05) the
2301    /// example above yields ~0.0335 line-level fraud.
2302    #[serde(default, alias = "perProcessRates")]
2303    pub per_process_rates: std::collections::HashMap<String, f64>,
2304    /// Behavioral-bias signatures stamped on fraud-labelled entries (weekend / round-dollar /
2305    /// off-hours / post-close). These are the canonical forensic signals a per-JE detector keys
2306    /// on; lowering them yields *subtler* fraud, raising them yields more obviously-fraudulent
2307    /// entries. Previously hardcoded — exposing them lets generators/experiments tune fraud
2308    /// detectability (the adversary's lever in co-training). See [`FraudBiasConfig`].
2309    #[serde(default, alias = "behavioralBias")]
2310    pub bias: FraudBiasConfig,
2311    /// Persistent fraud *campaigns* — counterparty-pinned, relocation-structured fraud that recurs
2312    /// across periods: a beneficiary (counterparty) account stays fixed while the booking leg
2313    /// rotates period-to-period. Turns the default i.i.d.-in-time fraud DGP into a campaign
2314    /// simulator so cross-period / relational / memory detectors can be benchmarked (FINDINGS
2315    /// §33/§36/§40). Off by default → byte-identical output. See [`FraudCampaignConfig`].
2316    #[serde(default, alias = "fraudCampaigns")]
2317    pub campaigns: FraudCampaignConfig,
2318    /// Fraud *difficulty* preset — a single knob spanning loud-forensic → residual-faint that
2319    /// co-sets the behavioral-bias signatures (the validated subtlety lever, FINDINGS §43/§44).
2320    /// `Standard` (default) uses the explicit `bias` field, preserving byte-identical output; the
2321    /// other levels override it. Gives benchmark builders a controllable hardness axis. See
2322    /// [`FraudDifficulty`] and [`FraudConfig::effective_bias`].
2323    #[serde(default)]
2324    pub difficulty: FraudDifficulty,
2325}
2326
2327/// Fraud difficulty preset — a single knob over fraud detectability, resolved to a
2328/// [`FraudBiasConfig`] by [`FraudConfig::effective_bias`]. Spans the co-training subtlety axis
2329/// (FINDINGS §43/§44): loud forensic signatures at one end, residual-faint (bias-off) at the other.
2330#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2331#[serde(rename_all = "snake_case")]
2332pub enum FraudDifficulty {
2333    /// Use the explicit `bias` config as-is (back-compat; byte-identical default).
2334    #[default]
2335    Standard,
2336    /// Loud forensic signatures — the easiest fraud to detect.
2337    Forensic,
2338    /// Faint signatures — harder, fewer forensic tells.
2339    Subtle,
2340    /// Behavioral bias off entirely — residual-faint fraud; the hardest, label-free-defeating case
2341    /// that motivates the supervised / co-training arm (FINDINGS §44).
2342    Adversarial,
2343}
2344
2345impl FraudConfig {
2346    /// The behavioral-bias config actually applied, after resolving [`FraudConfig::difficulty`].
2347    /// `Standard` returns the explicit `bias` field unchanged (byte-identical); the other presets
2348    /// override it, spanning loud-forensic → residual-faint.
2349    pub fn effective_bias(&self) -> FraudBiasConfig {
2350        match self.difficulty {
2351            FraudDifficulty::Standard => self.bias,
2352            FraudDifficulty::Forensic => FraudBiasConfig {
2353                enabled: true,
2354                weekend_bias: 0.55,
2355                round_dollar_bias: 0.65,
2356                off_hours_bias: 0.55,
2357                post_close_bias: 0.45,
2358            },
2359            FraudDifficulty::Subtle => FraudBiasConfig {
2360                enabled: true,
2361                weekend_bias: 0.10,
2362                round_dollar_bias: 0.10,
2363                off_hours_bias: 0.10,
2364                post_close_bias: 0.05,
2365            },
2366            FraudDifficulty::Adversarial => FraudBiasConfig {
2367                enabled: false,
2368                weekend_bias: 0.0,
2369                round_dollar_bias: 0.0,
2370                off_hours_bias: 0.0,
2371                post_close_bias: 0.0,
2372            },
2373        }
2374    }
2375}
2376
2377/// Persistent fraud-campaign configuration. A campaign restructures a handful of journal entries
2378/// per period into a counterparty-pinned, relocation-structured scheme: the beneficiary account
2379/// stays fixed across the campaign (the relocation-invariant handle, FINDINGS §36/§40) while the
2380/// booking leg rotates from a pool every `rotate_every_periods`. Off by default.
2381#[derive(Debug, Clone, Serialize, Deserialize)]
2382pub struct FraudCampaignConfig {
2383    /// Master switch — when `false` (default), no campaigns are planned and output is byte-identical.
2384    #[serde(default)]
2385    pub enabled: bool,
2386    /// Number of distinct persistent campaigns to plant.
2387    #[serde(default = "default_campaign_count")]
2388    pub count: u32,
2389    /// Fraud journal entries restructured into each campaign per period.
2390    #[serde(default = "default_campaign_per_period", alias = "perPeriodCount")]
2391    pub per_period_count: u32,
2392    /// Size of the rotating booking-leg account pool (the relocating leg).
2393    #[serde(default = "default_campaign_booking_pool", alias = "bookingLegPool")]
2394    pub booking_leg_pool: u32,
2395    /// Relocate the booking leg every N periods (1 = relocate every period).
2396    #[serde(
2397        default = "default_campaign_rotate_every",
2398        alias = "rotateEveryPeriods"
2399    )]
2400    pub rotate_every_periods: u32,
2401    /// Length of a campaign period in days (the JE timeline is bucketed by this).
2402    #[serde(default = "default_campaign_period_days", alias = "periodDays")]
2403    pub period_days: u32,
2404    /// Synthetic prior-year carry-forward register (the confirmation channel, FINDINGS §40/§59).
2405    /// When enabled, the engine emits a partial/noisy register of confirmed prior-period campaign
2406    /// findings — DataSynth knows the planted truth, so it can produce the `PRIOR_YEAR` confirmed
2407    /// findings an audit team would carry forward. Off by default. See [`CarryForwardConfig`].
2408    #[serde(default, alias = "carryForward")]
2409    pub carry_forward: CarryForwardConfig,
2410}
2411
2412/// Synthetic carry-forward (prior-year confirmed-findings) register config. The register confirms a
2413/// `confirmation_rate` fraction of the true campaign counterparties (as a real audit catches only
2414/// some prior fraud) and adds a `false_positive_rate` of legitimate counterparties (auditor errors).
2415/// The memory arm consumes the register; recall scales ~linearly with the confirmation rate (§59).
2416#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
2417pub struct CarryForwardConfig {
2418    /// Master switch — off by default (no register emitted; byte-identical).
2419    #[serde(default)]
2420    pub enabled: bool,
2421    /// Fraction of true campaign counterparties confirmed in the prior period (0..1).
2422    #[serde(default = "default_confirmation_rate", alias = "confirmationRate")]
2423    pub confirmation_rate: f64,
2424    /// Fraction (of the true-finding count) of legitimate counterparties wrongly confirmed (0..1).
2425    #[serde(default = "default_false_positive_rate", alias = "falsePositiveRate")]
2426    pub false_positive_rate: f64,
2427}
2428
2429fn default_confirmation_rate() -> f64 {
2430    0.6
2431}
2432fn default_false_positive_rate() -> f64 {
2433    0.05
2434}
2435
2436impl Default for CarryForwardConfig {
2437    fn default() -> Self {
2438        Self {
2439            enabled: false,
2440            confirmation_rate: default_confirmation_rate(),
2441            false_positive_rate: default_false_positive_rate(),
2442        }
2443    }
2444}
2445
2446fn default_campaign_count() -> u32 {
2447    1
2448}
2449fn default_campaign_per_period() -> u32 {
2450    2
2451}
2452fn default_campaign_booking_pool() -> u32 {
2453    6
2454}
2455fn default_campaign_rotate_every() -> u32 {
2456    1
2457}
2458fn default_campaign_period_days() -> u32 {
2459    30
2460}
2461
2462impl Default for FraudCampaignConfig {
2463    fn default() -> Self {
2464        Self {
2465            enabled: false,
2466            count: default_campaign_count(),
2467            per_period_count: default_campaign_per_period(),
2468            booking_leg_pool: default_campaign_booking_pool(),
2469            rotate_every_periods: default_campaign_rotate_every(),
2470            period_days: default_campaign_period_days(),
2471            carry_forward: CarryForwardConfig::default(),
2472        }
2473    }
2474}
2475
2476impl FraudCampaignConfig {
2477    /// Whether campaigns should actually be planned (enabled with a sane, non-degenerate spec).
2478    pub fn is_active(&self) -> bool {
2479        self.enabled
2480            && self.count >= 1
2481            && self.per_period_count >= 1
2482            && self.booking_leg_pool >= 1
2483            && self.rotate_every_periods >= 1
2484            && self.period_days >= 1
2485    }
2486}
2487
2488/// Probabilities for the four canonical fraud behavioral-bias signatures. Defaults match the
2489/// engine's historical hardcoded values (`datasynth_core::fraud_bias`), so output is byte-identical
2490/// unless a config overrides them.
2491#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
2492pub struct FraudBiasConfig {
2493    /// Master switch — when `false`, no behavioral bias is applied to fraud entries.
2494    #[serde(default = "default_true")]
2495    pub enabled: bool,
2496    /// P(fraud entry's posting_date shifted to a weekend). Default 0.30.
2497    #[serde(default = "default_weekend_bias", alias = "weekendBias")]
2498    pub weekend_bias: f64,
2499    /// P(fraud entry's amount snapped to a round target $1K/$5K/…/$100K, balance preserved). Default 0.40.
2500    #[serde(default = "default_round_dollar_bias", alias = "roundDollarBias")]
2501    pub round_dollar_bias: f64,
2502    /// P(fraud entry's created_at shifted to off-hours 22:00–05:59 UTC). Default 0.35.
2503    #[serde(default = "default_off_hours_bias", alias = "offHoursBias")]
2504    pub off_hours_bias: f64,
2505    /// P(fraud entry marked is_post_close). Default 0.25.
2506    #[serde(default = "default_post_close_bias", alias = "postCloseBias")]
2507    pub post_close_bias: f64,
2508}
2509
2510fn default_weekend_bias() -> f64 {
2511    0.30
2512}
2513fn default_round_dollar_bias() -> f64 {
2514    0.40
2515}
2516fn default_off_hours_bias() -> f64 {
2517    0.35
2518}
2519fn default_post_close_bias() -> f64 {
2520    0.25
2521}
2522
2523impl Default for FraudBiasConfig {
2524    fn default() -> Self {
2525        Self {
2526            enabled: true,
2527            weekend_bias: default_weekend_bias(),
2528            round_dollar_bias: default_round_dollar_bias(),
2529            off_hours_bias: default_off_hours_bias(),
2530            post_close_bias: default_post_close_bias(),
2531        }
2532    }
2533}
2534
2535impl FraudBiasConfig {
2536    /// Map the YAML-facing config to the core engine's bias config.
2537    pub fn to_core(&self) -> datasynth_core::fraud_bias::FraudBehavioralBiasConfig {
2538        datasynth_core::fraud_bias::FraudBehavioralBiasConfig {
2539            enabled: self.enabled,
2540            weekend_bias: self.weekend_bias,
2541            round_dollar_bias: self.round_dollar_bias,
2542            off_hours_bias: self.off_hours_bias,
2543            post_close_bias: self.post_close_bias,
2544        }
2545    }
2546}
2547
2548fn default_approval_thresholds() -> Vec<f64> {
2549    vec![1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0]
2550}
2551
2552fn default_fraud_rate() -> f64 {
2553    0.005
2554}
2555fn default_document_fraud_rate() -> Option<f64> {
2556    // v5.0.1: bumped 0.01 → 0.05 to deliver meaningful scheme-level
2557    // fraud propagation at typical line-level rates. The 1 % default
2558    // (set in v4.4.2 to restore `is_fraud_propagated > 0`) was too
2559    // conservative — at `fraud_rate = 0.08` it produced ~3.6 % observed
2560    // propagation against a 26.7 % target. The new 5 % default + the
2561    // additive formula `P(line is_fraud) ≈ fraud_rate + 0.3 × d` yields
2562    // ~9.5 % combined at fraud_rate=0.08 (closer to the spec target).
2563    // Set explicitly to `Some(0.0)` or `null` in YAML to disable, or to
2564    // a higher value (e.g. 0.20) for scheme-heavy fraud workloads.
2565    Some(0.05)
2566}
2567fn default_clustering_factor() -> f64 {
2568    3.0
2569}
2570
2571impl Default for FraudConfig {
2572    fn default() -> Self {
2573        Self {
2574            enabled: false,
2575            fraud_rate: default_fraud_rate(),
2576            document_fraud_rate: default_document_fraud_rate(),
2577            propagate_to_lines: true,
2578            propagate_to_document: true,
2579            fraud_type_distribution: FraudTypeDistribution::default(),
2580            clustering_enabled: false,
2581            clustering_factor: default_clustering_factor(),
2582            approval_thresholds: default_approval_thresholds(),
2583            per_process_rates: std::collections::HashMap::new(),
2584            bias: FraudBiasConfig::default(),
2585            campaigns: FraudCampaignConfig::default(),
2586            difficulty: FraudDifficulty::default(),
2587        }
2588    }
2589}
2590
2591/// Distribution of fraud types.
2592///
2593/// All fields default to `0.0` if absent from the YAML, so partial
2594/// distributions are accepted; the validator (`validate_sum_to_one`)
2595/// then enforces that the populated weights sum to `1.0 ± 0.01`.
2596#[derive(Debug, Clone, Serialize, Deserialize)]
2597#[serde(deny_unknown_fields)]
2598pub struct FraudTypeDistribution {
2599    #[serde(default)]
2600    pub suspense_account_abuse: f64,
2601    #[serde(default)]
2602    pub fictitious_transaction: f64,
2603    #[serde(default)]
2604    pub revenue_manipulation: f64,
2605    #[serde(default)]
2606    pub expense_capitalization: f64,
2607    #[serde(default)]
2608    pub split_transaction: f64,
2609    #[serde(default)]
2610    pub timing_anomaly: f64,
2611    #[serde(default)]
2612    pub unauthorized_access: f64,
2613    #[serde(default)]
2614    pub duplicate_payment: f64,
2615    /// Vendor kickback scheme.
2616    #[serde(default)]
2617    pub kickback_scheme: f64,
2618    /// Round-tripping funds through multiple entities or accounts.
2619    #[serde(default)]
2620    pub round_tripping: f64,
2621    /// Unauthorized customer/vendor discounts (sweethearting, side deals).
2622    #[serde(default)]
2623    pub unauthorized_discount: f64,
2624}
2625
2626impl Default for FraudTypeDistribution {
2627    fn default() -> Self {
2628        // Preserves the pre-extension default sum=1.0 over the original
2629        // eight fields.  The three additional fields (kickback_scheme,
2630        // round_tripping, unauthorized_discount) default to 0.0 so that
2631        // existing fraud packs / templates that explicitly enumerate the
2632        // original eight fields continue to merge to a 1.0 sum without
2633        // modification.  Users who want those fraud types must set them
2634        // explicitly (and rebalance the others).
2635        Self {
2636            suspense_account_abuse: 0.25,
2637            fictitious_transaction: 0.15,
2638            revenue_manipulation: 0.10,
2639            expense_capitalization: 0.10,
2640            split_transaction: 0.15,
2641            timing_anomaly: 0.10,
2642            unauthorized_access: 0.10,
2643            duplicate_payment: 0.05,
2644            kickback_scheme: 0.0,
2645            round_tripping: 0.0,
2646            unauthorized_discount: 0.0,
2647        }
2648    }
2649}
2650
2651/// Internal Controls System (ICS) configuration.
2652#[derive(Debug, Clone, Serialize, Deserialize)]
2653pub struct InternalControlsConfig {
2654    /// Enable internal controls system
2655    #[serde(default)]
2656    pub enabled: bool,
2657    /// Rate at which controls result in exceptions (0.0 - 1.0)
2658    #[serde(default = "default_exception_rate")]
2659    pub exception_rate: f64,
2660    /// Rate at which SoD violations occur (0.0 - 1.0)
2661    #[serde(default = "default_sod_violation_rate")]
2662    pub sod_violation_rate: f64,
2663    /// Export control master data to separate files
2664    #[serde(default = "default_true")]
2665    pub export_control_master_data: bool,
2666    /// SOX materiality threshold for marking transactions as SOX-relevant
2667    #[serde(default = "default_sox_materiality_threshold")]
2668    pub sox_materiality_threshold: f64,
2669    /// Enable COSO 2013 framework integration
2670    #[serde(default = "default_true")]
2671    pub coso_enabled: bool,
2672    /// Include entity-level controls in generation
2673    #[serde(default)]
2674    pub include_entity_level_controls: bool,
2675    /// Target maturity level for controls
2676    /// Valid values: "ad_hoc", "repeatable", "defined", "managed", "optimized", "mixed"
2677    #[serde(default = "default_target_maturity_level")]
2678    pub target_maturity_level: String,
2679}
2680
2681fn default_exception_rate() -> f64 {
2682    0.02
2683}
2684
2685fn default_sod_violation_rate() -> f64 {
2686    0.01
2687}
2688
2689fn default_sox_materiality_threshold() -> f64 {
2690    10000.0
2691}
2692
2693fn default_target_maturity_level() -> String {
2694    "mixed".to_string()
2695}
2696
2697impl Default for InternalControlsConfig {
2698    fn default() -> Self {
2699        Self {
2700            enabled: false,
2701            exception_rate: default_exception_rate(),
2702            sod_violation_rate: default_sod_violation_rate(),
2703            export_control_master_data: true,
2704            sox_materiality_threshold: default_sox_materiality_threshold(),
2705            coso_enabled: true,
2706            include_entity_level_controls: false,
2707            target_maturity_level: default_target_maturity_level(),
2708        }
2709    }
2710}
2711
2712/// Business process configuration.
2713#[derive(Debug, Clone, Serialize, Deserialize)]
2714pub struct BusinessProcessConfig {
2715    /// Order-to-Cash weight
2716    #[serde(default = "default_o2c")]
2717    pub o2c_weight: f64,
2718    /// Procure-to-Pay weight
2719    #[serde(default = "default_p2p")]
2720    pub p2p_weight: f64,
2721    /// Record-to-Report weight
2722    #[serde(default = "default_r2r")]
2723    pub r2r_weight: f64,
2724    /// Hire-to-Retire weight
2725    #[serde(default = "default_h2r")]
2726    pub h2r_weight: f64,
2727    /// Acquire-to-Retire weight
2728    #[serde(default = "default_a2r")]
2729    pub a2r_weight: f64,
2730}
2731
2732fn default_o2c() -> f64 {
2733    0.35
2734}
2735fn default_p2p() -> f64 {
2736    0.30
2737}
2738fn default_r2r() -> f64 {
2739    0.20
2740}
2741fn default_h2r() -> f64 {
2742    0.10
2743}
2744fn default_a2r() -> f64 {
2745    0.05
2746}
2747
2748impl Default for BusinessProcessConfig {
2749    fn default() -> Self {
2750        Self {
2751            o2c_weight: default_o2c(),
2752            p2p_weight: default_p2p(),
2753            r2r_weight: default_r2r(),
2754            h2r_weight: default_h2r(),
2755            a2r_weight: default_a2r(),
2756        }
2757    }
2758}
2759
2760/// User persona configuration.
2761#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2762pub struct UserPersonaConfig {
2763    /// Distribution of user personas
2764    #[serde(default)]
2765    pub persona_distribution: PersonaDistribution,
2766    /// Users per persona type
2767    #[serde(default)]
2768    pub users_per_persona: UsersPerPersona,
2769}
2770
2771/// Distribution of user personas for transaction generation.
2772#[derive(Debug, Clone, Serialize, Deserialize)]
2773pub struct PersonaDistribution {
2774    pub junior_accountant: f64,
2775    pub senior_accountant: f64,
2776    pub controller: f64,
2777    pub manager: f64,
2778    pub automated_system: f64,
2779}
2780
2781impl Default for PersonaDistribution {
2782    fn default() -> Self {
2783        Self {
2784            junior_accountant: 0.15,
2785            senior_accountant: 0.15,
2786            controller: 0.05,
2787            manager: 0.05,
2788            automated_system: 0.60,
2789        }
2790    }
2791}
2792
2793/// Number of users per persona type.
2794#[derive(Debug, Clone, Serialize, Deserialize)]
2795pub struct UsersPerPersona {
2796    pub junior_accountant: usize,
2797    pub senior_accountant: usize,
2798    pub controller: usize,
2799    pub manager: usize,
2800    pub automated_system: usize,
2801}
2802
2803impl Default for UsersPerPersona {
2804    fn default() -> Self {
2805        Self {
2806            junior_accountant: 10,
2807            senior_accountant: 5,
2808            controller: 2,
2809            manager: 3,
2810            automated_system: 20,
2811        }
2812    }
2813}
2814
2815/// Template configuration for realistic data generation.
2816///
2817/// # User-supplied template packs (v3.2.0+)
2818///
2819/// Set `path` to a directory (or single YAML/JSON file) to override or
2820/// extend the embedded default pools for vendor names, customer names,
2821/// material/asset descriptions, audit findings, bank names, and
2822/// department names. When `path` is `None` (the default), generators
2823/// use the compiled-in pools and output is byte-identical to v3.1.2.
2824///
2825/// See `crates/datasynth-core/src/templates/loader.rs::TemplateData`
2826/// for the full YAML schema. Use `datasynth-data templates export` to
2827/// dump the defaults as a starter pack.
2828#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2829pub struct TemplateConfig {
2830    /// Name generation settings
2831    #[serde(default)]
2832    pub names: NameTemplateConfig,
2833    /// Description generation settings
2834    #[serde(default)]
2835    pub descriptions: DescriptionTemplateConfig,
2836    /// Reference number settings
2837    #[serde(default)]
2838    pub references: ReferenceTemplateConfig,
2839    /// Optional path to a user-supplied template file or directory.
2840    /// When set, entries from the file(s) augment or replace the
2841    /// embedded defaults according to `merge_strategy`.
2842    ///
2843    /// `None` (default) = use embedded pools only (byte-identical to v3.1.2).
2844    #[serde(default, alias = "templatesPath")]
2845    pub path: Option<std::path::PathBuf>,
2846    /// How file-based entries combine with embedded defaults.
2847    ///
2848    /// - `extend` (default): append file entries to embedded pools,
2849    ///   de-duplicating. Safe for incremental overlays.
2850    /// - `replace`: discard embedded pools entirely and use only file
2851    ///   entries. Requires a fully-populated template file.
2852    /// - `merge_prefer_file`: replace individual categories when present
2853    ///   in the file; keep embedded for absent categories.
2854    #[serde(default, alias = "mergeStrategy")]
2855    pub merge_strategy: TemplateMergeStrategy,
2856}
2857
2858/// Strategy for combining user-supplied template files with embedded defaults.
2859#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2860#[serde(rename_all = "snake_case")]
2861pub enum TemplateMergeStrategy {
2862    /// Append file entries to embedded pools (default).
2863    #[default]
2864    Extend,
2865    /// Replace embedded pools entirely with file entries.
2866    Replace,
2867    /// Replace individual categories when present in file; keep embedded for absent ones.
2868    MergePreferFile,
2869}
2870
2871/// Name template configuration.
2872#[derive(Debug, Clone, Serialize, Deserialize)]
2873pub struct NameTemplateConfig {
2874    /// Distribution of name cultures
2875    #[serde(default)]
2876    pub culture_distribution: CultureDistribution,
2877    /// Email domain for generated users
2878    #[serde(default = "default_email_domain")]
2879    pub email_domain: String,
2880    /// Generate realistic display names
2881    #[serde(default = "default_true")]
2882    pub generate_realistic_names: bool,
2883}
2884
2885fn default_email_domain() -> String {
2886    "company.com".to_string()
2887}
2888
2889impl Default for NameTemplateConfig {
2890    fn default() -> Self {
2891        Self {
2892            culture_distribution: CultureDistribution::default(),
2893            email_domain: default_email_domain(),
2894            generate_realistic_names: true,
2895        }
2896    }
2897}
2898
2899/// Distribution of name cultures for generation.
2900#[derive(Debug, Clone, Serialize, Deserialize)]
2901pub struct CultureDistribution {
2902    pub western_us: f64,
2903    pub hispanic: f64,
2904    pub german: f64,
2905    pub french: f64,
2906    pub chinese: f64,
2907    pub japanese: f64,
2908    pub indian: f64,
2909}
2910
2911impl Default for CultureDistribution {
2912    fn default() -> Self {
2913        Self {
2914            western_us: 0.40,
2915            hispanic: 0.20,
2916            german: 0.10,
2917            french: 0.05,
2918            chinese: 0.10,
2919            japanese: 0.05,
2920            indian: 0.10,
2921        }
2922    }
2923}
2924
2925/// Description template configuration.
2926#[derive(Debug, Clone, Serialize, Deserialize)]
2927pub struct DescriptionTemplateConfig {
2928    /// Generate header text for journal entries
2929    #[serde(default = "default_true")]
2930    pub generate_header_text: bool,
2931    /// Generate line text for journal entry lines
2932    #[serde(default = "default_true")]
2933    pub generate_line_text: bool,
2934}
2935
2936impl Default for DescriptionTemplateConfig {
2937    fn default() -> Self {
2938        Self {
2939            generate_header_text: true,
2940            generate_line_text: true,
2941        }
2942    }
2943}
2944
2945/// Reference number template configuration.
2946#[derive(Debug, Clone, Serialize, Deserialize)]
2947pub struct ReferenceTemplateConfig {
2948    /// Generate reference numbers
2949    #[serde(default = "default_true")]
2950    pub generate_references: bool,
2951    /// Invoice prefix
2952    #[serde(default = "default_invoice_prefix")]
2953    pub invoice_prefix: String,
2954    /// Purchase order prefix
2955    #[serde(default = "default_po_prefix")]
2956    pub po_prefix: String,
2957    /// Sales order prefix
2958    #[serde(default = "default_so_prefix")]
2959    pub so_prefix: String,
2960}
2961
2962fn default_invoice_prefix() -> String {
2963    "INV".to_string()
2964}
2965fn default_po_prefix() -> String {
2966    "PO".to_string()
2967}
2968fn default_so_prefix() -> String {
2969    "SO".to_string()
2970}
2971
2972impl Default for ReferenceTemplateConfig {
2973    fn default() -> Self {
2974        Self {
2975            generate_references: true,
2976            invoice_prefix: default_invoice_prefix(),
2977            po_prefix: default_po_prefix(),
2978            so_prefix: default_so_prefix(),
2979        }
2980    }
2981}
2982
2983/// Approval workflow configuration.
2984#[derive(Debug, Clone, Serialize, Deserialize)]
2985pub struct ApprovalConfig {
2986    /// Enable approval workflow generation
2987    #[serde(default)]
2988    pub enabled: bool,
2989    /// Threshold below which transactions are auto-approved
2990    #[serde(default = "default_auto_approve_threshold")]
2991    pub auto_approve_threshold: f64,
2992    /// Rate at which approvals are rejected (0.0 to 1.0)
2993    #[serde(default = "default_rejection_rate")]
2994    pub rejection_rate: f64,
2995    /// Rate at which approvals require revision (0.0 to 1.0)
2996    #[serde(default = "default_revision_rate")]
2997    pub revision_rate: f64,
2998    /// Average delay in hours for approval processing
2999    #[serde(default = "default_approval_delay_hours")]
3000    pub average_approval_delay_hours: f64,
3001    /// Approval chain thresholds
3002    #[serde(default)]
3003    pub thresholds: Vec<ApprovalThresholdConfig>,
3004}
3005
3006fn default_auto_approve_threshold() -> f64 {
3007    1000.0
3008}
3009fn default_rejection_rate() -> f64 {
3010    0.02
3011}
3012fn default_revision_rate() -> f64 {
3013    0.05
3014}
3015fn default_approval_delay_hours() -> f64 {
3016    4.0
3017}
3018
3019impl Default for ApprovalConfig {
3020    fn default() -> Self {
3021        Self {
3022            enabled: false,
3023            auto_approve_threshold: default_auto_approve_threshold(),
3024            rejection_rate: default_rejection_rate(),
3025            revision_rate: default_revision_rate(),
3026            average_approval_delay_hours: default_approval_delay_hours(),
3027            thresholds: vec![
3028                ApprovalThresholdConfig {
3029                    amount: 1000.0,
3030                    level: 1,
3031                    roles: vec!["senior_accountant".to_string()],
3032                },
3033                ApprovalThresholdConfig {
3034                    amount: 10000.0,
3035                    level: 2,
3036                    roles: vec!["senior_accountant".to_string(), "controller".to_string()],
3037                },
3038                ApprovalThresholdConfig {
3039                    amount: 100000.0,
3040                    level: 3,
3041                    roles: vec![
3042                        "senior_accountant".to_string(),
3043                        "controller".to_string(),
3044                        "manager".to_string(),
3045                    ],
3046                },
3047                ApprovalThresholdConfig {
3048                    amount: 500000.0,
3049                    level: 4,
3050                    roles: vec![
3051                        "senior_accountant".to_string(),
3052                        "controller".to_string(),
3053                        "manager".to_string(),
3054                        "executive".to_string(),
3055                    ],
3056                },
3057            ],
3058        }
3059    }
3060}
3061
3062/// Configuration for a single approval threshold.
3063#[derive(Debug, Clone, Serialize, Deserialize)]
3064pub struct ApprovalThresholdConfig {
3065    /// Amount threshold
3066    pub amount: f64,
3067    /// Approval level required
3068    pub level: u8,
3069    /// Roles that can approve at this level
3070    pub roles: Vec<String>,
3071}
3072
3073/// Department configuration.
3074#[derive(Debug, Clone, Serialize, Deserialize)]
3075pub struct DepartmentConfig {
3076    /// Enable department assignment
3077    #[serde(default)]
3078    pub enabled: bool,
3079    /// Multiplier for department headcounts
3080    #[serde(default = "default_headcount_multiplier")]
3081    pub headcount_multiplier: f64,
3082    /// Custom department definitions (optional)
3083    #[serde(default)]
3084    pub custom_departments: Vec<CustomDepartmentConfig>,
3085}
3086
3087fn default_headcount_multiplier() -> f64 {
3088    1.0
3089}
3090
3091impl Default for DepartmentConfig {
3092    fn default() -> Self {
3093        Self {
3094            enabled: false,
3095            headcount_multiplier: default_headcount_multiplier(),
3096            custom_departments: Vec::new(),
3097        }
3098    }
3099}
3100
3101/// Custom department definition.
3102#[derive(Debug, Clone, Serialize, Deserialize)]
3103pub struct CustomDepartmentConfig {
3104    /// Department code
3105    pub code: String,
3106    /// Department name
3107    pub name: String,
3108    /// Associated cost center
3109    #[serde(default)]
3110    pub cost_center: Option<String>,
3111    /// Primary business processes
3112    #[serde(default)]
3113    pub primary_processes: Vec<String>,
3114    /// Parent department code
3115    #[serde(default)]
3116    pub parent_code: Option<String>,
3117}
3118
3119// ============================================================================
3120// Master Data Configuration
3121// ============================================================================
3122
3123/// Master data generation configuration.
3124#[derive(Debug, Clone, Default, Serialize, Deserialize)]
3125pub struct MasterDataConfig {
3126    /// Vendor master data settings
3127    #[serde(default)]
3128    pub vendors: VendorMasterConfig,
3129    /// Customer master data settings
3130    #[serde(default)]
3131    pub customers: CustomerMasterConfig,
3132    /// Material master data settings
3133    #[serde(default)]
3134    pub materials: MaterialMasterConfig,
3135    /// Fixed asset master data settings
3136    #[serde(default)]
3137    pub fixed_assets: FixedAssetMasterConfig,
3138    /// Employee master data settings
3139    #[serde(default)]
3140    pub employees: EmployeeMasterConfig,
3141    /// Cost center master data settings
3142    #[serde(default)]
3143    pub cost_centers: CostCenterMasterConfig,
3144}
3145
3146/// Vendor master data configuration.
3147#[derive(Debug, Clone, Serialize, Deserialize)]
3148pub struct VendorMasterConfig {
3149    /// Number of vendors to generate
3150    #[serde(default = "default_vendor_count")]
3151    pub count: usize,
3152    /// Percentage of vendors that are intercompany (0.0 to 1.0)
3153    #[serde(default = "default_intercompany_percent")]
3154    pub intercompany_percent: f64,
3155    /// Payment terms distribution
3156    #[serde(default)]
3157    pub payment_terms_distribution: PaymentTermsDistribution,
3158    /// Vendor behavior distribution
3159    #[serde(default)]
3160    pub behavior_distribution: VendorBehaviorDistribution,
3161    /// Generate bank account details
3162    #[serde(default = "default_true")]
3163    pub generate_bank_accounts: bool,
3164    /// Generate tax IDs
3165    #[serde(default = "default_true")]
3166    pub generate_tax_ids: bool,
3167}
3168
3169fn default_vendor_count() -> usize {
3170    500
3171}
3172
3173fn default_intercompany_percent() -> f64 {
3174    0.05
3175}
3176
3177impl Default for VendorMasterConfig {
3178    fn default() -> Self {
3179        Self {
3180            count: default_vendor_count(),
3181            intercompany_percent: default_intercompany_percent(),
3182            payment_terms_distribution: PaymentTermsDistribution::default(),
3183            behavior_distribution: VendorBehaviorDistribution::default(),
3184            generate_bank_accounts: true,
3185            generate_tax_ids: true,
3186        }
3187    }
3188}
3189
3190/// Payment terms distribution for vendors.
3191#[derive(Debug, Clone, Serialize, Deserialize)]
3192pub struct PaymentTermsDistribution {
3193    /// Net 30 days
3194    pub net_30: f64,
3195    /// Net 60 days
3196    pub net_60: f64,
3197    /// Net 90 days
3198    pub net_90: f64,
3199    /// 2% 10 Net 30 (early payment discount)
3200    pub two_ten_net_30: f64,
3201    /// Due on receipt
3202    pub due_on_receipt: f64,
3203    /// End of month
3204    pub end_of_month: f64,
3205}
3206
3207impl Default for PaymentTermsDistribution {
3208    fn default() -> Self {
3209        Self {
3210            net_30: 0.40,
3211            net_60: 0.20,
3212            net_90: 0.10,
3213            two_ten_net_30: 0.15,
3214            due_on_receipt: 0.05,
3215            end_of_month: 0.10,
3216        }
3217    }
3218}
3219
3220/// Vendor behavior distribution.
3221///
3222/// All fields default to `0.0` if absent from the YAML, so partial
3223/// distributions are accepted; the validator (`validate_sum_to_one`)
3224/// then enforces that the populated weights sum to `1.0 ± 0.01`.
3225#[derive(Debug, Clone, Serialize, Deserialize)]
3226#[serde(deny_unknown_fields)]
3227pub struct VendorBehaviorDistribution {
3228    /// Reliable vendors (consistent delivery, quality)
3229    #[serde(default)]
3230    pub reliable: f64,
3231    /// Sometimes late vendors
3232    #[serde(default)]
3233    pub sometimes_late: f64,
3234    /// Inconsistent quality vendors
3235    #[serde(default)]
3236    pub inconsistent_quality: f64,
3237    /// Premium vendors (high quality, premium pricing)
3238    #[serde(default)]
3239    pub premium: f64,
3240    /// Budget vendors (lower quality, lower pricing)
3241    #[serde(default)]
3242    pub budget: f64,
3243    /// Erratic vendors (variable behavior, unpredictable performance)
3244    #[serde(default)]
3245    pub erratic: f64,
3246    /// Problematic vendors (frequent issues, high risk for fraud scenarios)
3247    #[serde(default)]
3248    pub problematic: f64,
3249}
3250
3251impl Default for VendorBehaviorDistribution {
3252    fn default() -> Self {
3253        // Preserves the pre-extension default sum=1.0 over the original
3254        // five fields.  `erratic` and `problematic` default to 0.0 so
3255        // that existing configs/packs continue to merge to a 1.0 sum
3256        // without modification.
3257        Self {
3258            reliable: 0.50,
3259            sometimes_late: 0.20,
3260            inconsistent_quality: 0.10,
3261            premium: 0.10,
3262            budget: 0.10,
3263            erratic: 0.0,
3264            problematic: 0.0,
3265        }
3266    }
3267}
3268
3269/// Customer master data configuration.
3270#[derive(Debug, Clone, Serialize, Deserialize)]
3271pub struct CustomerMasterConfig {
3272    /// Number of customers to generate
3273    #[serde(default = "default_customer_count")]
3274    pub count: usize,
3275    /// Percentage of customers that are intercompany (0.0 to 1.0)
3276    #[serde(default = "default_intercompany_percent")]
3277    pub intercompany_percent: f64,
3278    /// Credit rating distribution
3279    #[serde(default)]
3280    pub credit_rating_distribution: CreditRatingDistribution,
3281    /// Payment behavior distribution
3282    #[serde(default)]
3283    pub payment_behavior_distribution: PaymentBehaviorDistribution,
3284    /// Generate credit limits based on rating
3285    #[serde(default = "default_true")]
3286    pub generate_credit_limits: bool,
3287}
3288
3289fn default_customer_count() -> usize {
3290    2000
3291}
3292
3293impl Default for CustomerMasterConfig {
3294    fn default() -> Self {
3295        Self {
3296            count: default_customer_count(),
3297            intercompany_percent: default_intercompany_percent(),
3298            credit_rating_distribution: CreditRatingDistribution::default(),
3299            payment_behavior_distribution: PaymentBehaviorDistribution::default(),
3300            generate_credit_limits: true,
3301        }
3302    }
3303}
3304
3305/// Credit rating distribution for customers.
3306///
3307/// Two parallel vocabularies are accepted:
3308///   * Bond-grade tiers: `aaa`, `aa`, `a`, `bbb`, `bb`, `b`, `below_b`
3309///   * Plain-English tiers: `excellent`, `good`, `fair`, `poor`
3310///
3311/// All fields default to `0.0` if absent; mix and match as needed.
3312/// The validator enforces that the populated weights sum to `1.0`.
3313#[derive(Debug, Clone, Serialize, Deserialize)]
3314#[serde(deny_unknown_fields)]
3315pub struct CreditRatingDistribution {
3316    /// AAA rating
3317    #[serde(default)]
3318    pub aaa: f64,
3319    /// AA rating
3320    #[serde(default)]
3321    pub aa: f64,
3322    /// A rating
3323    #[serde(default)]
3324    pub a: f64,
3325    /// BBB rating
3326    #[serde(default)]
3327    pub bbb: f64,
3328    /// BB rating
3329    #[serde(default)]
3330    pub bb: f64,
3331    /// B rating
3332    #[serde(default)]
3333    pub b: f64,
3334    /// Below B rating
3335    #[serde(default)]
3336    pub below_b: f64,
3337    /// Plain-English: excellent credit (≈ AAA/AA tier)
3338    #[serde(default)]
3339    pub excellent: f64,
3340    /// Plain-English: good credit (≈ A tier)
3341    #[serde(default)]
3342    pub good: f64,
3343    /// Plain-English: fair credit (≈ BBB/BB tier)
3344    #[serde(default)]
3345    pub fair: f64,
3346    /// Plain-English: poor credit (≈ B/below tier)
3347    #[serde(default)]
3348    pub poor: f64,
3349}
3350
3351impl Default for CreditRatingDistribution {
3352    fn default() -> Self {
3353        Self {
3354            aaa: 0.05,
3355            aa: 0.10,
3356            a: 0.20,
3357            bbb: 0.30,
3358            bb: 0.20,
3359            b: 0.10,
3360            below_b: 0.05,
3361            excellent: 0.0,
3362            good: 0.0,
3363            fair: 0.0,
3364            poor: 0.0,
3365        }
3366    }
3367}
3368
3369/// Payment behavior distribution for customers.
3370///
3371/// All fields default to `0.0` if absent from the YAML.  Validator
3372/// enforces that populated weights sum to `1.0 ± 0.01`.
3373#[derive(Debug, Clone, Serialize, Deserialize)]
3374#[serde(deny_unknown_fields)]
3375pub struct PaymentBehaviorDistribution {
3376    /// Always pays early
3377    #[serde(default)]
3378    pub early_payer: f64,
3379    /// Pays on time
3380    #[serde(default)]
3381    pub on_time: f64,
3382    /// Occasionally late
3383    #[serde(default)]
3384    pub occasional_late: f64,
3385    /// Frequently late
3386    #[serde(default)]
3387    pub frequent_late: f64,
3388    /// Takes early payment discounts
3389    #[serde(default)]
3390    pub discount_taker: f64,
3391}
3392
3393impl Default for PaymentBehaviorDistribution {
3394    fn default() -> Self {
3395        Self {
3396            early_payer: 0.10,
3397            on_time: 0.50,
3398            occasional_late: 0.25,
3399            frequent_late: 0.10,
3400            discount_taker: 0.05,
3401        }
3402    }
3403}
3404
3405/// Material master data configuration.
3406#[derive(Debug, Clone, Serialize, Deserialize)]
3407pub struct MaterialMasterConfig {
3408    /// Number of materials to generate
3409    #[serde(default = "default_material_count")]
3410    pub count: usize,
3411    /// Material type distribution
3412    #[serde(default)]
3413    pub type_distribution: MaterialTypeDistribution,
3414    /// Valuation method distribution
3415    #[serde(default)]
3416    pub valuation_distribution: ValuationMethodDistribution,
3417    /// Percentage of materials with BOM (bill of materials)
3418    #[serde(default = "default_bom_percent")]
3419    pub bom_percent: f64,
3420    /// Maximum BOM depth
3421    #[serde(default = "default_max_bom_depth")]
3422    pub max_bom_depth: u8,
3423}
3424
3425fn default_material_count() -> usize {
3426    5000
3427}
3428
3429fn default_bom_percent() -> f64 {
3430    0.20
3431}
3432
3433fn default_max_bom_depth() -> u8 {
3434    3
3435}
3436
3437impl Default for MaterialMasterConfig {
3438    fn default() -> Self {
3439        Self {
3440            count: default_material_count(),
3441            type_distribution: MaterialTypeDistribution::default(),
3442            valuation_distribution: ValuationMethodDistribution::default(),
3443            bom_percent: default_bom_percent(),
3444            max_bom_depth: default_max_bom_depth(),
3445        }
3446    }
3447}
3448
3449/// Material type distribution.
3450#[derive(Debug, Clone, Serialize, Deserialize)]
3451pub struct MaterialTypeDistribution {
3452    /// Raw materials
3453    pub raw_material: f64,
3454    /// Semi-finished goods
3455    pub semi_finished: f64,
3456    /// Finished goods
3457    pub finished_good: f64,
3458    /// Trading goods (purchased for resale)
3459    pub trading_good: f64,
3460    /// Operating supplies
3461    pub operating_supply: f64,
3462    /// Services
3463    pub service: f64,
3464}
3465
3466impl Default for MaterialTypeDistribution {
3467    fn default() -> Self {
3468        Self {
3469            raw_material: 0.30,
3470            semi_finished: 0.15,
3471            finished_good: 0.25,
3472            trading_good: 0.15,
3473            operating_supply: 0.10,
3474            service: 0.05,
3475        }
3476    }
3477}
3478
3479/// Valuation method distribution for materials.
3480#[derive(Debug, Clone, Serialize, Deserialize)]
3481pub struct ValuationMethodDistribution {
3482    /// Standard cost
3483    pub standard_cost: f64,
3484    /// Moving average
3485    pub moving_average: f64,
3486    /// FIFO (First In, First Out)
3487    pub fifo: f64,
3488    /// LIFO (Last In, First Out)
3489    pub lifo: f64,
3490}
3491
3492impl Default for ValuationMethodDistribution {
3493    fn default() -> Self {
3494        Self {
3495            standard_cost: 0.50,
3496            moving_average: 0.30,
3497            fifo: 0.15,
3498            lifo: 0.05,
3499        }
3500    }
3501}
3502
3503/// Fixed asset master data configuration.
3504#[derive(Debug, Clone, Serialize, Deserialize)]
3505pub struct FixedAssetMasterConfig {
3506    /// Number of fixed assets to generate
3507    #[serde(default = "default_asset_count")]
3508    pub count: usize,
3509    /// Asset class distribution
3510    #[serde(default)]
3511    pub class_distribution: AssetClassDistribution,
3512    /// Depreciation method distribution
3513    #[serde(default)]
3514    pub depreciation_distribution: DepreciationMethodDistribution,
3515    /// Percentage of assets that are fully depreciated
3516    #[serde(default = "default_fully_depreciated_percent")]
3517    pub fully_depreciated_percent: f64,
3518    /// Generate acquisition history
3519    #[serde(default = "default_true")]
3520    pub generate_acquisition_history: bool,
3521}
3522
3523fn default_asset_count() -> usize {
3524    800
3525}
3526
3527fn default_fully_depreciated_percent() -> f64 {
3528    0.15
3529}
3530
3531impl Default for FixedAssetMasterConfig {
3532    fn default() -> Self {
3533        Self {
3534            count: default_asset_count(),
3535            class_distribution: AssetClassDistribution::default(),
3536            depreciation_distribution: DepreciationMethodDistribution::default(),
3537            fully_depreciated_percent: default_fully_depreciated_percent(),
3538            generate_acquisition_history: true,
3539        }
3540    }
3541}
3542
3543/// Asset class distribution.
3544#[derive(Debug, Clone, Serialize, Deserialize)]
3545pub struct AssetClassDistribution {
3546    /// Buildings and structures
3547    pub buildings: f64,
3548    /// Machinery and equipment
3549    pub machinery: f64,
3550    /// Vehicles
3551    pub vehicles: f64,
3552    /// IT equipment
3553    pub it_equipment: f64,
3554    /// Furniture and fixtures
3555    pub furniture: f64,
3556    /// Land (non-depreciable)
3557    pub land: f64,
3558    /// Leasehold improvements
3559    pub leasehold: f64,
3560}
3561
3562impl Default for AssetClassDistribution {
3563    fn default() -> Self {
3564        Self {
3565            buildings: 0.15,
3566            machinery: 0.30,
3567            vehicles: 0.15,
3568            it_equipment: 0.20,
3569            furniture: 0.10,
3570            land: 0.05,
3571            leasehold: 0.05,
3572        }
3573    }
3574}
3575
3576/// Depreciation method distribution.
3577#[derive(Debug, Clone, Serialize, Deserialize)]
3578pub struct DepreciationMethodDistribution {
3579    /// Straight line
3580    pub straight_line: f64,
3581    /// Declining balance
3582    pub declining_balance: f64,
3583    /// Double declining balance
3584    pub double_declining: f64,
3585    /// Sum of years' digits
3586    pub sum_of_years: f64,
3587    /// Units of production
3588    pub units_of_production: f64,
3589}
3590
3591impl Default for DepreciationMethodDistribution {
3592    fn default() -> Self {
3593        Self {
3594            straight_line: 0.60,
3595            declining_balance: 0.20,
3596            double_declining: 0.10,
3597            sum_of_years: 0.05,
3598            units_of_production: 0.05,
3599        }
3600    }
3601}
3602
3603/// Employee master data configuration.
3604#[derive(Debug, Clone, Serialize, Deserialize)]
3605pub struct EmployeeMasterConfig {
3606    /// Number of employees to generate
3607    #[serde(default = "default_employee_count")]
3608    pub count: usize,
3609    /// Generate organizational hierarchy
3610    #[serde(default = "default_true")]
3611    pub generate_hierarchy: bool,
3612    /// Maximum hierarchy depth
3613    #[serde(default = "default_hierarchy_depth")]
3614    pub max_hierarchy_depth: u8,
3615    /// Average span of control (direct reports per manager)
3616    #[serde(default = "default_span_of_control")]
3617    pub average_span_of_control: f64,
3618    /// Approval limit distribution by job level
3619    #[serde(default)]
3620    pub approval_limits: ApprovalLimitDistribution,
3621    /// Department distribution
3622    #[serde(default)]
3623    pub department_distribution: EmployeeDepartmentDistribution,
3624}
3625
3626fn default_employee_count() -> usize {
3627    1500
3628}
3629
3630fn default_hierarchy_depth() -> u8 {
3631    6
3632}
3633
3634fn default_span_of_control() -> f64 {
3635    5.0
3636}
3637
3638impl Default for EmployeeMasterConfig {
3639    fn default() -> Self {
3640        Self {
3641            count: default_employee_count(),
3642            generate_hierarchy: true,
3643            max_hierarchy_depth: default_hierarchy_depth(),
3644            average_span_of_control: default_span_of_control(),
3645            approval_limits: ApprovalLimitDistribution::default(),
3646            department_distribution: EmployeeDepartmentDistribution::default(),
3647        }
3648    }
3649}
3650
3651/// Approval limit distribution by job level.
3652#[derive(Debug, Clone, Serialize, Deserialize)]
3653pub struct ApprovalLimitDistribution {
3654    /// Staff level approval limit
3655    #[serde(default = "default_staff_limit")]
3656    pub staff: f64,
3657    /// Senior staff approval limit
3658    #[serde(default = "default_senior_limit")]
3659    pub senior: f64,
3660    /// Manager approval limit
3661    #[serde(default = "default_manager_limit")]
3662    pub manager: f64,
3663    /// Director approval limit
3664    #[serde(default = "default_director_limit")]
3665    pub director: f64,
3666    /// VP approval limit
3667    #[serde(default = "default_vp_limit")]
3668    pub vp: f64,
3669    /// Executive approval limit
3670    #[serde(default = "default_executive_limit")]
3671    pub executive: f64,
3672}
3673
3674fn default_staff_limit() -> f64 {
3675    1000.0
3676}
3677fn default_senior_limit() -> f64 {
3678    5000.0
3679}
3680fn default_manager_limit() -> f64 {
3681    25000.0
3682}
3683fn default_director_limit() -> f64 {
3684    100000.0
3685}
3686fn default_vp_limit() -> f64 {
3687    500000.0
3688}
3689fn default_executive_limit() -> f64 {
3690    f64::INFINITY
3691}
3692
3693impl Default for ApprovalLimitDistribution {
3694    fn default() -> Self {
3695        Self {
3696            staff: default_staff_limit(),
3697            senior: default_senior_limit(),
3698            manager: default_manager_limit(),
3699            director: default_director_limit(),
3700            vp: default_vp_limit(),
3701            executive: default_executive_limit(),
3702        }
3703    }
3704}
3705
3706/// Employee distribution across departments.
3707#[derive(Debug, Clone, Serialize, Deserialize)]
3708pub struct EmployeeDepartmentDistribution {
3709    /// Finance and Accounting
3710    pub finance: f64,
3711    /// Procurement
3712    pub procurement: f64,
3713    /// Sales
3714    pub sales: f64,
3715    /// Warehouse and Logistics
3716    pub warehouse: f64,
3717    /// IT
3718    pub it: f64,
3719    /// Human Resources
3720    pub hr: f64,
3721    /// Operations
3722    pub operations: f64,
3723    /// Executive
3724    pub executive: f64,
3725}
3726
3727impl Default for EmployeeDepartmentDistribution {
3728    fn default() -> Self {
3729        Self {
3730            finance: 0.12,
3731            procurement: 0.10,
3732            sales: 0.25,
3733            warehouse: 0.15,
3734            it: 0.10,
3735            hr: 0.05,
3736            operations: 0.20,
3737            executive: 0.03,
3738        }
3739    }
3740}
3741
3742/// Cost center master data configuration.
3743#[derive(Debug, Clone, Serialize, Deserialize)]
3744pub struct CostCenterMasterConfig {
3745    /// Number of cost centers to generate
3746    #[serde(default = "default_cost_center_count")]
3747    pub count: usize,
3748    /// Generate cost center hierarchy
3749    #[serde(default = "default_true")]
3750    pub generate_hierarchy: bool,
3751    /// Maximum hierarchy depth
3752    #[serde(default = "default_cc_hierarchy_depth")]
3753    pub max_hierarchy_depth: u8,
3754}
3755
3756fn default_cost_center_count() -> usize {
3757    50
3758}
3759
3760fn default_cc_hierarchy_depth() -> u8 {
3761    3
3762}
3763
3764impl Default for CostCenterMasterConfig {
3765    fn default() -> Self {
3766        Self {
3767            count: default_cost_center_count(),
3768            generate_hierarchy: true,
3769            max_hierarchy_depth: default_cc_hierarchy_depth(),
3770        }
3771    }
3772}
3773
3774// ============================================================================
3775// Document Flow Configuration
3776// ============================================================================
3777
3778/// Document flow generation configuration.
3779#[derive(Debug, Clone, Serialize, Deserialize)]
3780pub struct DocumentFlowConfig {
3781    /// P2P (Procure-to-Pay) flow configuration
3782    #[serde(default)]
3783    pub p2p: P2PFlowConfig,
3784    /// O2C (Order-to-Cash) flow configuration
3785    #[serde(default)]
3786    pub o2c: O2CFlowConfig,
3787    /// Generate document reference chains
3788    #[serde(default = "default_true")]
3789    pub generate_document_references: bool,
3790    /// Export document flow graph
3791    #[serde(default)]
3792    pub export_flow_graph: bool,
3793}
3794
3795impl Default for DocumentFlowConfig {
3796    fn default() -> Self {
3797        Self {
3798            p2p: P2PFlowConfig::default(),
3799            o2c: O2CFlowConfig::default(),
3800            generate_document_references: true,
3801            export_flow_graph: false,
3802        }
3803    }
3804}
3805
3806/// P2P (Procure-to-Pay) flow configuration.
3807#[derive(Debug, Clone, Serialize, Deserialize)]
3808pub struct P2PFlowConfig {
3809    /// Enable P2P document flow generation
3810    #[serde(default = "default_true")]
3811    pub enabled: bool,
3812    /// Three-way match success rate (PO-GR-Invoice)
3813    #[serde(default = "default_three_way_match_rate")]
3814    pub three_way_match_rate: f64,
3815    /// Rate of partial deliveries
3816    #[serde(default = "default_partial_delivery_rate")]
3817    pub partial_delivery_rate: f64,
3818    /// Rate of price variances between PO and Invoice
3819    #[serde(default = "default_price_variance_rate")]
3820    pub price_variance_rate: f64,
3821    /// Maximum price variance percentage
3822    #[serde(default = "default_max_price_variance")]
3823    pub max_price_variance_percent: f64,
3824    /// Rate of quantity variances between PO/GR and Invoice
3825    #[serde(default = "default_quantity_variance_rate")]
3826    pub quantity_variance_rate: f64,
3827    /// Average days from PO to goods receipt
3828    #[serde(default = "default_po_to_gr_days")]
3829    pub average_po_to_gr_days: u32,
3830    /// Average days from GR to invoice
3831    #[serde(default = "default_gr_to_invoice_days")]
3832    pub average_gr_to_invoice_days: u32,
3833    /// Average days from invoice to payment
3834    #[serde(default = "default_invoice_to_payment_days")]
3835    pub average_invoice_to_payment_days: u32,
3836    /// PO line count distribution
3837    #[serde(default)]
3838    pub line_count_distribution: DocumentLineCountDistribution,
3839    /// Payment behavior configuration
3840    #[serde(default)]
3841    pub payment_behavior: P2PPaymentBehaviorConfig,
3842    /// Rate of over-deliveries (quantity received exceeds PO quantity)
3843    #[serde(default)]
3844    pub over_delivery_rate: Option<f64>,
3845    /// Rate of early payment discounts being taken
3846    #[serde(default)]
3847    pub early_payment_discount_rate: Option<f64>,
3848}
3849
3850fn default_three_way_match_rate() -> f64 {
3851    0.95
3852}
3853
3854fn default_partial_delivery_rate() -> f64 {
3855    0.15
3856}
3857
3858fn default_price_variance_rate() -> f64 {
3859    0.08
3860}
3861
3862fn default_max_price_variance() -> f64 {
3863    0.05
3864}
3865
3866fn default_quantity_variance_rate() -> f64 {
3867    0.05
3868}
3869
3870fn default_po_to_gr_days() -> u32 {
3871    14
3872}
3873
3874fn default_gr_to_invoice_days() -> u32 {
3875    5
3876}
3877
3878fn default_invoice_to_payment_days() -> u32 {
3879    30
3880}
3881
3882impl Default for P2PFlowConfig {
3883    fn default() -> Self {
3884        Self {
3885            enabled: true,
3886            three_way_match_rate: default_three_way_match_rate(),
3887            partial_delivery_rate: default_partial_delivery_rate(),
3888            price_variance_rate: default_price_variance_rate(),
3889            max_price_variance_percent: default_max_price_variance(),
3890            quantity_variance_rate: default_quantity_variance_rate(),
3891            average_po_to_gr_days: default_po_to_gr_days(),
3892            average_gr_to_invoice_days: default_gr_to_invoice_days(),
3893            average_invoice_to_payment_days: default_invoice_to_payment_days(),
3894            line_count_distribution: DocumentLineCountDistribution::default(),
3895            payment_behavior: P2PPaymentBehaviorConfig::default(),
3896            over_delivery_rate: None,
3897            early_payment_discount_rate: None,
3898        }
3899    }
3900}
3901
3902// ============================================================================
3903// P2P Payment Behavior Configuration
3904// ============================================================================
3905
3906/// P2P payment behavior configuration.
3907#[derive(Debug, Clone, Serialize, Deserialize)]
3908pub struct P2PPaymentBehaviorConfig {
3909    /// Rate of late payments (beyond due date)
3910    #[serde(default = "default_p2p_late_payment_rate")]
3911    pub late_payment_rate: f64,
3912    /// Distribution of late payment days
3913    #[serde(default)]
3914    pub late_payment_days_distribution: LatePaymentDaysDistribution,
3915    /// Rate of partial payments
3916    #[serde(default = "default_p2p_partial_payment_rate")]
3917    pub partial_payment_rate: f64,
3918    /// Rate of payment corrections (NSF, chargebacks, reversals)
3919    #[serde(default = "default_p2p_payment_correction_rate")]
3920    pub payment_correction_rate: f64,
3921    /// Average days until partial payment remainder is paid
3922    #[serde(default = "default_p2p_avg_days_until_remainder")]
3923    pub avg_days_until_remainder: u32,
3924}
3925
3926fn default_p2p_late_payment_rate() -> f64 {
3927    0.15
3928}
3929
3930fn default_p2p_partial_payment_rate() -> f64 {
3931    0.05
3932}
3933
3934fn default_p2p_payment_correction_rate() -> f64 {
3935    0.02
3936}
3937
3938fn default_p2p_avg_days_until_remainder() -> u32 {
3939    30
3940}
3941
3942impl Default for P2PPaymentBehaviorConfig {
3943    fn default() -> Self {
3944        Self {
3945            late_payment_rate: default_p2p_late_payment_rate(),
3946            late_payment_days_distribution: LatePaymentDaysDistribution::default(),
3947            partial_payment_rate: default_p2p_partial_payment_rate(),
3948            payment_correction_rate: default_p2p_payment_correction_rate(),
3949            avg_days_until_remainder: default_p2p_avg_days_until_remainder(),
3950        }
3951    }
3952}
3953
3954/// Distribution of late payment days for P2P.
3955#[derive(Debug, Clone, Serialize, Deserialize)]
3956pub struct LatePaymentDaysDistribution {
3957    /// 1-7 days late (slightly late)
3958    #[serde(default = "default_slightly_late")]
3959    pub slightly_late_1_to_7: f64,
3960    /// 8-14 days late
3961    #[serde(default = "default_late_8_14")]
3962    pub late_8_to_14: f64,
3963    /// 15-30 days late (very late)
3964    #[serde(default = "default_very_late")]
3965    pub very_late_15_to_30: f64,
3966    /// 31-60 days late (severely late)
3967    #[serde(default = "default_severely_late")]
3968    pub severely_late_31_to_60: f64,
3969    /// Over 60 days late (extremely late)
3970    #[serde(default = "default_extremely_late")]
3971    pub extremely_late_over_60: f64,
3972}
3973
3974fn default_slightly_late() -> f64 {
3975    0.50
3976}
3977
3978fn default_late_8_14() -> f64 {
3979    0.25
3980}
3981
3982fn default_very_late() -> f64 {
3983    0.15
3984}
3985
3986fn default_severely_late() -> f64 {
3987    0.07
3988}
3989
3990fn default_extremely_late() -> f64 {
3991    0.03
3992}
3993
3994impl Default for LatePaymentDaysDistribution {
3995    fn default() -> Self {
3996        Self {
3997            slightly_late_1_to_7: default_slightly_late(),
3998            late_8_to_14: default_late_8_14(),
3999            very_late_15_to_30: default_very_late(),
4000            severely_late_31_to_60: default_severely_late(),
4001            extremely_late_over_60: default_extremely_late(),
4002        }
4003    }
4004}
4005
4006/// O2C (Order-to-Cash) flow configuration.
4007#[derive(Debug, Clone, Serialize, Deserialize)]
4008pub struct O2CFlowConfig {
4009    /// Enable O2C document flow generation
4010    #[serde(default = "default_true")]
4011    pub enabled: bool,
4012    /// Credit check failure rate
4013    #[serde(default = "default_credit_check_failure_rate")]
4014    pub credit_check_failure_rate: f64,
4015    /// Rate of partial shipments
4016    #[serde(default = "default_partial_shipment_rate")]
4017    pub partial_shipment_rate: f64,
4018    /// Rate of returns
4019    #[serde(default = "default_return_rate")]
4020    pub return_rate: f64,
4021    /// Bad debt write-off rate
4022    #[serde(default = "default_bad_debt_rate")]
4023    pub bad_debt_rate: f64,
4024    /// Average days from SO to delivery
4025    #[serde(default = "default_so_to_delivery_days")]
4026    pub average_so_to_delivery_days: u32,
4027    /// Average days from delivery to invoice
4028    #[serde(default = "default_delivery_to_invoice_days")]
4029    pub average_delivery_to_invoice_days: u32,
4030    /// Average days from invoice to receipt
4031    #[serde(default = "default_invoice_to_receipt_days")]
4032    pub average_invoice_to_receipt_days: u32,
4033    /// SO line count distribution
4034    #[serde(default)]
4035    pub line_count_distribution: DocumentLineCountDistribution,
4036    /// Cash discount configuration
4037    #[serde(default)]
4038    pub cash_discount: CashDiscountConfig,
4039    /// Payment behavior configuration
4040    #[serde(default)]
4041    pub payment_behavior: O2CPaymentBehaviorConfig,
4042    /// Rate of late payments
4043    #[serde(default)]
4044    pub late_payment_rate: Option<f64>,
4045}
4046
4047fn default_credit_check_failure_rate() -> f64 {
4048    0.02
4049}
4050
4051fn default_partial_shipment_rate() -> f64 {
4052    0.10
4053}
4054
4055fn default_return_rate() -> f64 {
4056    0.03
4057}
4058
4059fn default_bad_debt_rate() -> f64 {
4060    0.01
4061}
4062
4063fn default_so_to_delivery_days() -> u32 {
4064    7
4065}
4066
4067fn default_delivery_to_invoice_days() -> u32 {
4068    1
4069}
4070
4071fn default_invoice_to_receipt_days() -> u32 {
4072    45
4073}
4074
4075impl Default for O2CFlowConfig {
4076    fn default() -> Self {
4077        Self {
4078            enabled: true,
4079            credit_check_failure_rate: default_credit_check_failure_rate(),
4080            partial_shipment_rate: default_partial_shipment_rate(),
4081            return_rate: default_return_rate(),
4082            bad_debt_rate: default_bad_debt_rate(),
4083            average_so_to_delivery_days: default_so_to_delivery_days(),
4084            average_delivery_to_invoice_days: default_delivery_to_invoice_days(),
4085            average_invoice_to_receipt_days: default_invoice_to_receipt_days(),
4086            line_count_distribution: DocumentLineCountDistribution::default(),
4087            cash_discount: CashDiscountConfig::default(),
4088            payment_behavior: O2CPaymentBehaviorConfig::default(),
4089            late_payment_rate: None,
4090        }
4091    }
4092}
4093
4094// ============================================================================
4095// O2C Payment Behavior Configuration
4096// ============================================================================
4097
4098/// O2C payment behavior configuration.
4099#[derive(Debug, Clone, Serialize, Deserialize, Default)]
4100pub struct O2CPaymentBehaviorConfig {
4101    /// Dunning (Mahnung) configuration
4102    #[serde(default)]
4103    pub dunning: DunningConfig,
4104    /// Partial payment configuration
4105    #[serde(default)]
4106    pub partial_payments: PartialPaymentConfig,
4107    /// Short payment configuration (unauthorized deductions)
4108    #[serde(default)]
4109    pub short_payments: ShortPaymentConfig,
4110    /// On-account payment configuration (unapplied payments)
4111    #[serde(default)]
4112    pub on_account_payments: OnAccountPaymentConfig,
4113    /// Payment correction configuration (NSF, chargebacks)
4114    #[serde(default)]
4115    pub payment_corrections: PaymentCorrectionConfig,
4116}
4117
4118/// Dunning (Mahnungen) configuration for AR collections.
4119#[derive(Debug, Clone, Serialize, Deserialize)]
4120pub struct DunningConfig {
4121    /// Enable dunning process
4122    #[serde(default)]
4123    pub enabled: bool,
4124    /// Days overdue for level 1 dunning (1st reminder)
4125    #[serde(default = "default_dunning_level_1_days")]
4126    pub level_1_days_overdue: u32,
4127    /// Days overdue for level 2 dunning (2nd reminder)
4128    #[serde(default = "default_dunning_level_2_days")]
4129    pub level_2_days_overdue: u32,
4130    /// Days overdue for level 3 dunning (final notice)
4131    #[serde(default = "default_dunning_level_3_days")]
4132    pub level_3_days_overdue: u32,
4133    /// Days overdue for collection handover
4134    #[serde(default = "default_collection_days")]
4135    pub collection_days_overdue: u32,
4136    /// Payment rates after each dunning level
4137    #[serde(default)]
4138    pub payment_after_dunning_rates: DunningPaymentRates,
4139    /// Rate of invoices blocked from dunning (disputes)
4140    #[serde(default = "default_dunning_block_rate")]
4141    pub dunning_block_rate: f64,
4142    /// Interest rate per year for overdue amounts
4143    #[serde(default = "default_dunning_interest_rate")]
4144    pub interest_rate_per_year: f64,
4145    /// Fixed dunning charge per letter
4146    #[serde(default = "default_dunning_charge")]
4147    pub dunning_charge: f64,
4148}
4149
4150fn default_dunning_level_1_days() -> u32 {
4151    14
4152}
4153
4154fn default_dunning_level_2_days() -> u32 {
4155    28
4156}
4157
4158fn default_dunning_level_3_days() -> u32 {
4159    42
4160}
4161
4162fn default_collection_days() -> u32 {
4163    60
4164}
4165
4166fn default_dunning_block_rate() -> f64 {
4167    0.05
4168}
4169
4170fn default_dunning_interest_rate() -> f64 {
4171    0.09
4172}
4173
4174fn default_dunning_charge() -> f64 {
4175    25.0
4176}
4177
4178impl Default for DunningConfig {
4179    fn default() -> Self {
4180        Self {
4181            enabled: false,
4182            level_1_days_overdue: default_dunning_level_1_days(),
4183            level_2_days_overdue: default_dunning_level_2_days(),
4184            level_3_days_overdue: default_dunning_level_3_days(),
4185            collection_days_overdue: default_collection_days(),
4186            payment_after_dunning_rates: DunningPaymentRates::default(),
4187            dunning_block_rate: default_dunning_block_rate(),
4188            interest_rate_per_year: default_dunning_interest_rate(),
4189            dunning_charge: default_dunning_charge(),
4190        }
4191    }
4192}
4193
4194/// Payment rates after each dunning level.
4195#[derive(Debug, Clone, Serialize, Deserialize)]
4196pub struct DunningPaymentRates {
4197    /// Rate that pays after level 1 reminder
4198    #[serde(default = "default_after_level_1")]
4199    pub after_level_1: f64,
4200    /// Rate that pays after level 2 reminder
4201    #[serde(default = "default_after_level_2")]
4202    pub after_level_2: f64,
4203    /// Rate that pays after level 3 final notice
4204    #[serde(default = "default_after_level_3")]
4205    pub after_level_3: f64,
4206    /// Rate that pays during collection
4207    #[serde(default = "default_during_collection")]
4208    pub during_collection: f64,
4209    /// Rate that never pays (becomes bad debt)
4210    #[serde(default = "default_never_pay")]
4211    pub never_pay: f64,
4212}
4213
4214fn default_after_level_1() -> f64 {
4215    0.40
4216}
4217
4218fn default_after_level_2() -> f64 {
4219    0.30
4220}
4221
4222fn default_after_level_3() -> f64 {
4223    0.15
4224}
4225
4226fn default_during_collection() -> f64 {
4227    0.05
4228}
4229
4230fn default_never_pay() -> f64 {
4231    0.10
4232}
4233
4234impl Default for DunningPaymentRates {
4235    fn default() -> Self {
4236        Self {
4237            after_level_1: default_after_level_1(),
4238            after_level_2: default_after_level_2(),
4239            after_level_3: default_after_level_3(),
4240            during_collection: default_during_collection(),
4241            never_pay: default_never_pay(),
4242        }
4243    }
4244}
4245
4246/// Partial payment configuration.
4247#[derive(Debug, Clone, Serialize, Deserialize)]
4248pub struct PartialPaymentConfig {
4249    /// Rate of invoices paid partially
4250    #[serde(default = "default_partial_payment_rate")]
4251    pub rate: f64,
4252    /// Distribution of partial payment percentages
4253    #[serde(default)]
4254    pub percentage_distribution: PartialPaymentPercentageDistribution,
4255    /// Average days until remainder is paid
4256    #[serde(default = "default_avg_days_until_remainder")]
4257    pub avg_days_until_remainder: u32,
4258}
4259
4260fn default_partial_payment_rate() -> f64 {
4261    0.08
4262}
4263
4264fn default_avg_days_until_remainder() -> u32 {
4265    30
4266}
4267
4268impl Default for PartialPaymentConfig {
4269    fn default() -> Self {
4270        Self {
4271            rate: default_partial_payment_rate(),
4272            percentage_distribution: PartialPaymentPercentageDistribution::default(),
4273            avg_days_until_remainder: default_avg_days_until_remainder(),
4274        }
4275    }
4276}
4277
4278/// Distribution of partial payment percentages.
4279#[derive(Debug, Clone, Serialize, Deserialize)]
4280pub struct PartialPaymentPercentageDistribution {
4281    /// Pay 25% of invoice
4282    #[serde(default = "default_partial_25")]
4283    pub pay_25_percent: f64,
4284    /// Pay 50% of invoice
4285    #[serde(default = "default_partial_50")]
4286    pub pay_50_percent: f64,
4287    /// Pay 75% of invoice
4288    #[serde(default = "default_partial_75")]
4289    pub pay_75_percent: f64,
4290    /// Pay random percentage
4291    #[serde(default = "default_partial_random")]
4292    pub pay_random_percent: f64,
4293}
4294
4295fn default_partial_25() -> f64 {
4296    0.15
4297}
4298
4299fn default_partial_50() -> f64 {
4300    0.50
4301}
4302
4303fn default_partial_75() -> f64 {
4304    0.25
4305}
4306
4307fn default_partial_random() -> f64 {
4308    0.10
4309}
4310
4311impl Default for PartialPaymentPercentageDistribution {
4312    fn default() -> Self {
4313        Self {
4314            pay_25_percent: default_partial_25(),
4315            pay_50_percent: default_partial_50(),
4316            pay_75_percent: default_partial_75(),
4317            pay_random_percent: default_partial_random(),
4318        }
4319    }
4320}
4321
4322/// Short payment configuration (unauthorized deductions).
4323#[derive(Debug, Clone, Serialize, Deserialize)]
4324pub struct ShortPaymentConfig {
4325    /// Rate of payments that are short
4326    #[serde(default = "default_short_payment_rate")]
4327    pub rate: f64,
4328    /// Distribution of short payment reasons
4329    #[serde(default)]
4330    pub reason_distribution: ShortPaymentReasonDistribution,
4331    /// Maximum percentage that can be short
4332    #[serde(default = "default_max_short_percent")]
4333    pub max_short_percent: f64,
4334}
4335
4336fn default_short_payment_rate() -> f64 {
4337    0.03
4338}
4339
4340fn default_max_short_percent() -> f64 {
4341    0.10
4342}
4343
4344impl Default for ShortPaymentConfig {
4345    fn default() -> Self {
4346        Self {
4347            rate: default_short_payment_rate(),
4348            reason_distribution: ShortPaymentReasonDistribution::default(),
4349            max_short_percent: default_max_short_percent(),
4350        }
4351    }
4352}
4353
4354/// Distribution of short payment reasons.
4355#[derive(Debug, Clone, Serialize, Deserialize)]
4356pub struct ShortPaymentReasonDistribution {
4357    /// Pricing dispute
4358    #[serde(default = "default_pricing_dispute")]
4359    pub pricing_dispute: f64,
4360    /// Quality issue
4361    #[serde(default = "default_quality_issue")]
4362    pub quality_issue: f64,
4363    /// Quantity discrepancy
4364    #[serde(default = "default_quantity_discrepancy")]
4365    pub quantity_discrepancy: f64,
4366    /// Unauthorized deduction
4367    #[serde(default = "default_unauthorized_deduction")]
4368    pub unauthorized_deduction: f64,
4369    /// Early payment discount taken incorrectly
4370    #[serde(default = "default_incorrect_discount")]
4371    pub incorrect_discount: f64,
4372}
4373
4374fn default_pricing_dispute() -> f64 {
4375    0.30
4376}
4377
4378fn default_quality_issue() -> f64 {
4379    0.20
4380}
4381
4382fn default_quantity_discrepancy() -> f64 {
4383    0.20
4384}
4385
4386fn default_unauthorized_deduction() -> f64 {
4387    0.15
4388}
4389
4390fn default_incorrect_discount() -> f64 {
4391    0.15
4392}
4393
4394impl Default for ShortPaymentReasonDistribution {
4395    fn default() -> Self {
4396        Self {
4397            pricing_dispute: default_pricing_dispute(),
4398            quality_issue: default_quality_issue(),
4399            quantity_discrepancy: default_quantity_discrepancy(),
4400            unauthorized_deduction: default_unauthorized_deduction(),
4401            incorrect_discount: default_incorrect_discount(),
4402        }
4403    }
4404}
4405
4406/// On-account payment configuration (unapplied payments).
4407#[derive(Debug, Clone, Serialize, Deserialize)]
4408pub struct OnAccountPaymentConfig {
4409    /// Rate of payments that are on-account (unapplied)
4410    #[serde(default = "default_on_account_rate")]
4411    pub rate: f64,
4412    /// Average days until on-account payments are applied
4413    #[serde(default = "default_avg_days_until_applied")]
4414    pub avg_days_until_applied: u32,
4415}
4416
4417fn default_on_account_rate() -> f64 {
4418    0.02
4419}
4420
4421fn default_avg_days_until_applied() -> u32 {
4422    14
4423}
4424
4425impl Default for OnAccountPaymentConfig {
4426    fn default() -> Self {
4427        Self {
4428            rate: default_on_account_rate(),
4429            avg_days_until_applied: default_avg_days_until_applied(),
4430        }
4431    }
4432}
4433
4434/// Payment correction configuration.
4435#[derive(Debug, Clone, Serialize, Deserialize)]
4436pub struct PaymentCorrectionConfig {
4437    /// Rate of payments requiring correction
4438    #[serde(default = "default_payment_correction_rate")]
4439    pub rate: f64,
4440    /// Distribution of correction types
4441    #[serde(default)]
4442    pub type_distribution: PaymentCorrectionTypeDistribution,
4443}
4444
4445fn default_payment_correction_rate() -> f64 {
4446    0.02
4447}
4448
4449impl Default for PaymentCorrectionConfig {
4450    fn default() -> Self {
4451        Self {
4452            rate: default_payment_correction_rate(),
4453            type_distribution: PaymentCorrectionTypeDistribution::default(),
4454        }
4455    }
4456}
4457
4458/// Distribution of payment correction types.
4459#[derive(Debug, Clone, Serialize, Deserialize)]
4460pub struct PaymentCorrectionTypeDistribution {
4461    /// NSF (Non-sufficient funds) / bounced check
4462    #[serde(default = "default_nsf_rate")]
4463    pub nsf: f64,
4464    /// Chargeback
4465    #[serde(default = "default_chargeback_rate")]
4466    pub chargeback: f64,
4467    /// Wrong amount applied
4468    #[serde(default = "default_wrong_amount_rate")]
4469    pub wrong_amount: f64,
4470    /// Wrong customer applied
4471    #[serde(default = "default_wrong_customer_rate")]
4472    pub wrong_customer: f64,
4473    /// Duplicate payment
4474    #[serde(default = "default_duplicate_payment_rate")]
4475    pub duplicate_payment: f64,
4476}
4477
4478fn default_nsf_rate() -> f64 {
4479    0.30
4480}
4481
4482fn default_chargeback_rate() -> f64 {
4483    0.20
4484}
4485
4486fn default_wrong_amount_rate() -> f64 {
4487    0.20
4488}
4489
4490fn default_wrong_customer_rate() -> f64 {
4491    0.15
4492}
4493
4494fn default_duplicate_payment_rate() -> f64 {
4495    0.15
4496}
4497
4498impl Default for PaymentCorrectionTypeDistribution {
4499    fn default() -> Self {
4500        Self {
4501            nsf: default_nsf_rate(),
4502            chargeback: default_chargeback_rate(),
4503            wrong_amount: default_wrong_amount_rate(),
4504            wrong_customer: default_wrong_customer_rate(),
4505            duplicate_payment: default_duplicate_payment_rate(),
4506        }
4507    }
4508}
4509
4510/// Document line count distribution.
4511#[derive(Debug, Clone, Serialize, Deserialize)]
4512pub struct DocumentLineCountDistribution {
4513    /// Minimum number of lines
4514    #[serde(default = "default_min_lines")]
4515    pub min_lines: u32,
4516    /// Maximum number of lines
4517    #[serde(default = "default_max_lines")]
4518    pub max_lines: u32,
4519    /// Most common line count (mode)
4520    #[serde(default = "default_mode_lines")]
4521    pub mode_lines: u32,
4522}
4523
4524fn default_min_lines() -> u32 {
4525    1
4526}
4527
4528fn default_max_lines() -> u32 {
4529    20
4530}
4531
4532fn default_mode_lines() -> u32 {
4533    3
4534}
4535
4536impl Default for DocumentLineCountDistribution {
4537    fn default() -> Self {
4538        Self {
4539            min_lines: default_min_lines(),
4540            max_lines: default_max_lines(),
4541            mode_lines: default_mode_lines(),
4542        }
4543    }
4544}
4545
4546/// Cash discount configuration.
4547#[derive(Debug, Clone, Serialize, Deserialize)]
4548pub struct CashDiscountConfig {
4549    /// Percentage of invoices eligible for cash discount
4550    #[serde(default = "default_discount_eligible_rate")]
4551    pub eligible_rate: f64,
4552    /// Rate at which customers take the discount
4553    #[serde(default = "default_discount_taken_rate")]
4554    pub taken_rate: f64,
4555    /// Standard discount percentage
4556    #[serde(default = "default_discount_percent")]
4557    pub discount_percent: f64,
4558    /// Days within which discount must be taken
4559    #[serde(default = "default_discount_days")]
4560    pub discount_days: u32,
4561}
4562
4563fn default_discount_eligible_rate() -> f64 {
4564    0.30
4565}
4566
4567fn default_discount_taken_rate() -> f64 {
4568    0.60
4569}
4570
4571fn default_discount_percent() -> f64 {
4572    0.02
4573}
4574
4575fn default_discount_days() -> u32 {
4576    10
4577}
4578
4579impl Default for CashDiscountConfig {
4580    fn default() -> Self {
4581        Self {
4582            eligible_rate: default_discount_eligible_rate(),
4583            taken_rate: default_discount_taken_rate(),
4584            discount_percent: default_discount_percent(),
4585            discount_days: default_discount_days(),
4586        }
4587    }
4588}
4589
4590// ============================================================================
4591// Intercompany Configuration
4592// ============================================================================
4593
4594/// Intercompany transaction configuration.
4595#[derive(Debug, Clone, Serialize, Deserialize)]
4596pub struct IntercompanyConfig {
4597    /// Enable intercompany transaction generation
4598    #[serde(default)]
4599    pub enabled: bool,
4600    /// Rate of transactions that are intercompany
4601    #[serde(default = "default_ic_transaction_rate")]
4602    pub ic_transaction_rate: f64,
4603    /// Transfer pricing method
4604    #[serde(default)]
4605    pub transfer_pricing_method: TransferPricingMethod,
4606    /// Transfer pricing markup percentage (for cost-plus)
4607    #[serde(default = "default_markup_percent")]
4608    pub markup_percent: f64,
4609    /// Generate matched IC pairs (offsetting entries)
4610    #[serde(default = "default_true")]
4611    pub generate_matched_pairs: bool,
4612    /// IC transaction type distribution
4613    #[serde(default)]
4614    pub transaction_type_distribution: ICTransactionTypeDistribution,
4615    /// Generate elimination entries for consolidation
4616    #[serde(default)]
4617    pub generate_eliminations: bool,
4618}
4619
4620fn default_ic_transaction_rate() -> f64 {
4621    0.15
4622}
4623
4624fn default_markup_percent() -> f64 {
4625    0.05
4626}
4627
4628impl Default for IntercompanyConfig {
4629    fn default() -> Self {
4630        Self {
4631            enabled: false,
4632            ic_transaction_rate: default_ic_transaction_rate(),
4633            transfer_pricing_method: TransferPricingMethod::default(),
4634            markup_percent: default_markup_percent(),
4635            generate_matched_pairs: true,
4636            transaction_type_distribution: ICTransactionTypeDistribution::default(),
4637            generate_eliminations: false,
4638        }
4639    }
4640}
4641
4642/// Transfer pricing method.
4643#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
4644#[serde(rename_all = "snake_case")]
4645pub enum TransferPricingMethod {
4646    /// Cost plus a markup
4647    #[default]
4648    CostPlus,
4649    /// Comparable uncontrolled price
4650    ComparableUncontrolled,
4651    /// Resale price method
4652    ResalePrice,
4653    /// Transactional net margin method
4654    TransactionalNetMargin,
4655    /// Profit split method
4656    ProfitSplit,
4657}
4658
4659/// IC transaction type distribution.
4660#[derive(Debug, Clone, Serialize, Deserialize)]
4661pub struct ICTransactionTypeDistribution {
4662    /// Goods sales between entities
4663    pub goods_sale: f64,
4664    /// Services provided
4665    pub service_provided: f64,
4666    /// Intercompany loans
4667    pub loan: f64,
4668    /// Dividends
4669    pub dividend: f64,
4670    /// Management fees
4671    pub management_fee: f64,
4672    /// Royalties
4673    pub royalty: f64,
4674    /// Cost sharing
4675    pub cost_sharing: f64,
4676}
4677
4678impl Default for ICTransactionTypeDistribution {
4679    fn default() -> Self {
4680        Self {
4681            goods_sale: 0.35,
4682            service_provided: 0.20,
4683            loan: 0.10,
4684            dividend: 0.05,
4685            management_fee: 0.15,
4686            royalty: 0.10,
4687            cost_sharing: 0.05,
4688        }
4689    }
4690}
4691
4692// ============================================================================
4693// Balance Configuration
4694// ============================================================================
4695
4696/// Balance and trial balance configuration.
4697#[derive(Debug, Clone, Serialize, Deserialize)]
4698pub struct BalanceConfig {
4699    /// Generate opening balances
4700    #[serde(default)]
4701    pub generate_opening_balances: bool,
4702    /// Generate trial balances
4703    #[serde(default = "default_true")]
4704    pub generate_trial_balances: bool,
4705    /// Target gross margin (for revenue/COGS coherence)
4706    #[serde(default = "default_gross_margin")]
4707    pub target_gross_margin: f64,
4708    /// Target DSO (Days Sales Outstanding)
4709    #[serde(default = "default_dso")]
4710    pub target_dso_days: u32,
4711    /// Target DPO (Days Payable Outstanding)
4712    #[serde(default = "default_dpo")]
4713    pub target_dpo_days: u32,
4714    /// Target current ratio
4715    #[serde(default = "default_current_ratio")]
4716    pub target_current_ratio: f64,
4717    /// Target debt-to-equity ratio
4718    #[serde(default = "default_debt_equity")]
4719    pub target_debt_to_equity: f64,
4720    /// Validate balance sheet equation (A = L + E)
4721    #[serde(default = "default_true")]
4722    pub validate_balance_equation: bool,
4723    /// Reconcile subledgers to GL control accounts
4724    #[serde(default = "default_true")]
4725    pub reconcile_subledgers: bool,
4726}
4727
4728fn default_gross_margin() -> f64 {
4729    0.35
4730}
4731
4732fn default_dso() -> u32 {
4733    45
4734}
4735
4736fn default_dpo() -> u32 {
4737    30
4738}
4739
4740fn default_current_ratio() -> f64 {
4741    1.5
4742}
4743
4744fn default_debt_equity() -> f64 {
4745    0.5
4746}
4747
4748impl Default for BalanceConfig {
4749    fn default() -> Self {
4750        Self {
4751            generate_opening_balances: false,
4752            generate_trial_balances: true,
4753            target_gross_margin: default_gross_margin(),
4754            target_dso_days: default_dso(),
4755            target_dpo_days: default_dpo(),
4756            target_current_ratio: default_current_ratio(),
4757            target_debt_to_equity: default_debt_equity(),
4758            validate_balance_equation: true,
4759            reconcile_subledgers: true,
4760        }
4761    }
4762}
4763
4764// ==========================================================================
4765// OCPM (Object-Centric Process Mining) Configuration
4766// ==========================================================================
4767
4768/// OCPM (Object-Centric Process Mining) configuration.
4769///
4770/// Controls generation of OCEL 2.0 compatible event logs with
4771/// many-to-many event-to-object relationships.
4772#[derive(Debug, Clone, Serialize, Deserialize)]
4773pub struct OcpmConfig {
4774    /// Enable OCPM event log generation
4775    #[serde(default)]
4776    pub enabled: bool,
4777
4778    /// Generate lifecycle events (Start/Complete pairs vs atomic events)
4779    #[serde(default = "default_true")]
4780    pub generate_lifecycle_events: bool,
4781
4782    /// Include object-to-object relationships in output
4783    #[serde(default = "default_true")]
4784    pub include_object_relationships: bool,
4785
4786    /// Compute and export process variants
4787    #[serde(default = "default_true")]
4788    pub compute_variants: bool,
4789
4790    /// Maximum variants to track (0 = unlimited)
4791    #[serde(default)]
4792    pub max_variants: usize,
4793
4794    /// P2P process configuration
4795    #[serde(default)]
4796    pub p2p_process: OcpmProcessConfig,
4797
4798    /// O2C process configuration
4799    #[serde(default)]
4800    pub o2c_process: OcpmProcessConfig,
4801
4802    /// Output format configuration
4803    #[serde(default)]
4804    pub output: OcpmOutputConfig,
4805}
4806
4807impl Default for OcpmConfig {
4808    fn default() -> Self {
4809        Self {
4810            enabled: false,
4811            generate_lifecycle_events: true,
4812            include_object_relationships: true,
4813            compute_variants: true,
4814            max_variants: 0,
4815            p2p_process: OcpmProcessConfig::default(),
4816            o2c_process: OcpmProcessConfig::default(),
4817            output: OcpmOutputConfig::default(),
4818        }
4819    }
4820}
4821
4822/// Process-specific OCPM configuration.
4823#[derive(Debug, Clone, Serialize, Deserialize)]
4824pub struct OcpmProcessConfig {
4825    /// Rework probability (0.0-1.0)
4826    #[serde(default = "default_rework_probability")]
4827    pub rework_probability: f64,
4828
4829    /// Skip step probability (0.0-1.0)
4830    #[serde(default = "default_skip_probability")]
4831    pub skip_step_probability: f64,
4832
4833    /// Out-of-order step probability (0.0-1.0)
4834    #[serde(default = "default_out_of_order_probability")]
4835    pub out_of_order_probability: f64,
4836}
4837
4838// Defaults deliberately produce variant counts and Inductive-Miner fitness
4839// in the range seen in real ERP data (dozens of variants, ~0.7–0.9 fitness).
4840// Lowering them all to 0 yields a single-variant happy-path log.
4841fn default_rework_probability() -> f64 {
4842    0.15
4843}
4844
4845fn default_skip_probability() -> f64 {
4846    0.10
4847}
4848
4849fn default_out_of_order_probability() -> f64 {
4850    0.08
4851}
4852
4853impl Default for OcpmProcessConfig {
4854    fn default() -> Self {
4855        Self {
4856            rework_probability: default_rework_probability(),
4857            skip_step_probability: default_skip_probability(),
4858            out_of_order_probability: default_out_of_order_probability(),
4859        }
4860    }
4861}
4862
4863/// OCPM output format configuration.
4864#[derive(Debug, Clone, Serialize, Deserialize)]
4865pub struct OcpmOutputConfig {
4866    /// Export OCEL 2.0 JSON format
4867    #[serde(default = "default_true")]
4868    pub ocel_json: bool,
4869
4870    /// Export OCEL 2.0 XML format
4871    #[serde(default)]
4872    pub ocel_xml: bool,
4873
4874    /// Export XES 2.0 XML format (IEEE standard for process mining tools)
4875    #[serde(default)]
4876    pub xes: bool,
4877
4878    /// Include lifecycle transitions in XES output (start/complete pairs)
4879    #[serde(default = "default_true")]
4880    pub xes_include_lifecycle: bool,
4881
4882    /// Include resource attributes in XES output
4883    #[serde(default = "default_true")]
4884    pub xes_include_resources: bool,
4885
4886    /// Export flattened CSV for each object type
4887    #[serde(default = "default_true")]
4888    pub flattened_csv: bool,
4889
4890    /// Export event-object relationship table
4891    #[serde(default = "default_true")]
4892    pub event_object_csv: bool,
4893
4894    /// Export object-object relationship table
4895    #[serde(default = "default_true")]
4896    pub object_relationship_csv: bool,
4897
4898    /// Export process variants summary
4899    #[serde(default = "default_true")]
4900    pub variants_csv: bool,
4901
4902    /// Export reference process models (canonical P2P, O2C, R2R)
4903    #[serde(default)]
4904    pub export_reference_models: bool,
4905}
4906
4907impl Default for OcpmOutputConfig {
4908    fn default() -> Self {
4909        Self {
4910            ocel_json: true,
4911            ocel_xml: false,
4912            xes: false,
4913            xes_include_lifecycle: true,
4914            xes_include_resources: true,
4915            flattened_csv: true,
4916            event_object_csv: true,
4917            object_relationship_csv: true,
4918            variants_csv: true,
4919            export_reference_models: false,
4920        }
4921    }
4922}
4923
4924/// Audit engagement and workpaper generation configuration.
4925#[derive(Debug, Clone, Serialize, Deserialize)]
4926pub struct AuditGenerationConfig {
4927    /// Enable audit engagement generation
4928    #[serde(default)]
4929    pub enabled: bool,
4930
4931    /// Gate for workpaper generation (v3.3.2+).
4932    /// When `false`, workpapers and dependent evidence are skipped
4933    /// while engagements / risk assessments / findings still generate.
4934    #[serde(default = "default_true")]
4935    pub generate_workpapers: bool,
4936
4937    /// Engagement type distribution (v3.3.2+). Drives per-engagement
4938    /// type draw via `AuditEngagementGenerator::draw_engagement_type`.
4939    #[serde(default)]
4940    pub engagement_types: AuditEngagementTypesConfig,
4941
4942    /// Workpaper configuration (v3.3.2+). `average_per_phase` maps onto
4943    /// `WorkpaperGenerator.workpapers_per_section` as a ±50% band
4944    /// around the average. Sampling / ISA / cross-reference flags are
4945    /// surfaced for downstream formatting overlays.
4946    #[serde(default)]
4947    pub workpapers: WorkpaperConfig,
4948
4949    /// Audit team configuration (v3.3.2+). `min_team_size` /
4950    /// `max_team_size` map directly onto
4951    /// `AuditEngagementGenerator.team_size_range`.
4952    /// `specialist_probability` is reserved for v3.4 (explicit
4953    /// specialist-role support).
4954    #[serde(default)]
4955    pub team: AuditTeamConfig,
4956
4957    /// Review workflow configuration (v3.3.2+).
4958    /// `average_review_delay_days` drives both
4959    /// `first_review_delay_range` and `second_review_delay_range` as
4960    /// a ±1-day band around the average. `rework_probability` and
4961    /// `require_partner_signoff` are reserved for v3.4 workflow
4962    /// modeling.
4963    #[serde(default)]
4964    pub review: ReviewWorkflowConfig,
4965
4966    /// FSM-driven audit generation configuration.
4967    #[serde(default)]
4968    pub fsm: Option<AuditFsmConfig>,
4969
4970    /// v3.3.0: IT general controls (access logs, change management
4971    /// records) emitted alongside audit engagements. Requires both
4972    /// `audit.enabled = true` and `audit.it_controls.enabled = true`
4973    /// to take effect — the latter defaults to `false` so current
4974    /// archives are byte-identical to v3.2.1.
4975    #[serde(default)]
4976    pub it_controls: ItControlsConfig,
4977}
4978
4979/// IT general controls config (v3.3.0+).
4980#[derive(Debug, Clone, Serialize, Deserialize)]
4981pub struct ItControlsConfig {
4982    /// Master switch — when `false`, no access logs or change records
4983    /// are generated.
4984    #[serde(default)]
4985    pub enabled: bool,
4986    /// Number of access-log entries per engagement (approximate — the
4987    /// generator may round or scale based on company size).
4988    #[serde(default = "default_access_log_count")]
4989    pub access_logs_per_engagement: usize,
4990    /// Number of change-management records per engagement.
4991    #[serde(default = "default_change_record_count")]
4992    pub change_records_per_engagement: usize,
4993}
4994
4995fn default_access_log_count() -> usize {
4996    500
4997}
4998fn default_change_record_count() -> usize {
4999    50
5000}
5001
5002impl Default for ItControlsConfig {
5003    fn default() -> Self {
5004        Self {
5005            enabled: false,
5006            access_logs_per_engagement: default_access_log_count(),
5007            change_records_per_engagement: default_change_record_count(),
5008        }
5009    }
5010}
5011
5012impl Default for AuditGenerationConfig {
5013    fn default() -> Self {
5014        Self {
5015            enabled: false,
5016            generate_workpapers: true,
5017            engagement_types: AuditEngagementTypesConfig::default(),
5018            workpapers: WorkpaperConfig::default(),
5019            team: AuditTeamConfig::default(),
5020            review: ReviewWorkflowConfig::default(),
5021            fsm: None,
5022            it_controls: ItControlsConfig::default(),
5023        }
5024    }
5025}
5026
5027/// FSM-driven audit generation configuration.
5028#[derive(Debug, Clone, Serialize, Deserialize)]
5029pub struct AuditFsmConfig {
5030    /// Enable FSM-driven audit generation.
5031    #[serde(default)]
5032    pub enabled: bool,
5033
5034    /// Blueprint source: "builtin:fsa", "builtin:ia", or a file path.
5035    #[serde(default = "default_audit_fsm_blueprint")]
5036    pub blueprint: String,
5037
5038    /// Overlay source: "builtin:default", "builtin:thorough", "builtin:rushed", or a file path.
5039    #[serde(default = "default_audit_fsm_overlay")]
5040    pub overlay: String,
5041
5042    /// Depth level override.
5043    #[serde(default)]
5044    pub depth: Option<String>,
5045
5046    /// Discriminator filter.
5047    #[serde(default)]
5048    pub discriminators: std::collections::HashMap<String, Vec<String>>,
5049
5050    /// Event trail output config.
5051    #[serde(default)]
5052    pub event_trail: AuditEventTrailConfig,
5053
5054    /// RNG seed override.
5055    #[serde(default)]
5056    pub seed: Option<u64>,
5057}
5058
5059impl Default for AuditFsmConfig {
5060    fn default() -> Self {
5061        Self {
5062            enabled: false,
5063            blueprint: default_audit_fsm_blueprint(),
5064            overlay: default_audit_fsm_overlay(),
5065            depth: None,
5066            discriminators: std::collections::HashMap::new(),
5067            event_trail: AuditEventTrailConfig::default(),
5068            seed: None,
5069        }
5070    }
5071}
5072
5073fn default_audit_fsm_blueprint() -> String {
5074    "builtin:fsa".to_string()
5075}
5076
5077fn default_audit_fsm_overlay() -> String {
5078    "builtin:default".to_string()
5079}
5080
5081/// Event trail output configuration for FSM-driven audit generation.
5082#[derive(Debug, Clone, Serialize, Deserialize)]
5083pub struct AuditEventTrailConfig {
5084    /// Emit a flat event log.
5085    #[serde(default = "default_true")]
5086    pub flat_log: bool,
5087    /// Project events to OCEL 2.0 format.
5088    #[serde(default)]
5089    pub ocel_projection: bool,
5090}
5091
5092impl Default for AuditEventTrailConfig {
5093    fn default() -> Self {
5094        Self {
5095            flat_log: true,
5096            ocel_projection: false,
5097        }
5098    }
5099}
5100
5101/// Engagement type distribution configuration.
5102#[derive(Debug, Clone, Serialize, Deserialize)]
5103pub struct AuditEngagementTypesConfig {
5104    /// Financial statement audit probability
5105    #[serde(default = "default_financial_audit_prob")]
5106    pub financial_statement: f64,
5107    /// SOX/ICFR audit probability
5108    #[serde(default = "default_sox_audit_prob")]
5109    pub sox_icfr: f64,
5110    /// Integrated audit probability
5111    #[serde(default = "default_integrated_audit_prob")]
5112    pub integrated: f64,
5113    /// Review engagement probability
5114    #[serde(default = "default_review_prob")]
5115    pub review: f64,
5116    /// Agreed-upon procedures probability
5117    #[serde(default = "default_aup_prob")]
5118    pub agreed_upon_procedures: f64,
5119}
5120
5121fn default_financial_audit_prob() -> f64 {
5122    0.40
5123}
5124fn default_sox_audit_prob() -> f64 {
5125    0.20
5126}
5127fn default_integrated_audit_prob() -> f64 {
5128    0.25
5129}
5130fn default_review_prob() -> f64 {
5131    0.10
5132}
5133fn default_aup_prob() -> f64 {
5134    0.05
5135}
5136
5137impl Default for AuditEngagementTypesConfig {
5138    fn default() -> Self {
5139        Self {
5140            financial_statement: default_financial_audit_prob(),
5141            sox_icfr: default_sox_audit_prob(),
5142            integrated: default_integrated_audit_prob(),
5143            review: default_review_prob(),
5144            agreed_upon_procedures: default_aup_prob(),
5145        }
5146    }
5147}
5148
5149/// Workpaper generation configuration.
5150#[derive(Debug, Clone, Serialize, Deserialize)]
5151pub struct WorkpaperConfig {
5152    /// Average workpapers per engagement phase
5153    #[serde(default = "default_workpapers_per_phase")]
5154    pub average_per_phase: usize,
5155
5156    /// Include ISA compliance references
5157    #[serde(default = "default_true")]
5158    pub include_isa_references: bool,
5159
5160    /// Generate sample details
5161    #[serde(default = "default_true")]
5162    pub include_sample_details: bool,
5163
5164    /// Include cross-references between workpapers
5165    #[serde(default = "default_true")]
5166    pub include_cross_references: bool,
5167
5168    /// Sampling configuration
5169    #[serde(default)]
5170    pub sampling: SamplingConfig,
5171}
5172
5173fn default_workpapers_per_phase() -> usize {
5174    5
5175}
5176
5177impl Default for WorkpaperConfig {
5178    fn default() -> Self {
5179        Self {
5180            average_per_phase: default_workpapers_per_phase(),
5181            include_isa_references: true,
5182            include_sample_details: true,
5183            include_cross_references: true,
5184            sampling: SamplingConfig::default(),
5185        }
5186    }
5187}
5188
5189/// Sampling method configuration.
5190#[derive(Debug, Clone, Serialize, Deserialize)]
5191pub struct SamplingConfig {
5192    /// Statistical sampling rate (0.0-1.0)
5193    #[serde(default = "default_statistical_rate")]
5194    pub statistical_rate: f64,
5195    /// Judgmental sampling rate (0.0-1.0)
5196    #[serde(default = "default_judgmental_rate")]
5197    pub judgmental_rate: f64,
5198    /// Haphazard sampling rate (0.0-1.0)
5199    #[serde(default = "default_haphazard_rate")]
5200    pub haphazard_rate: f64,
5201    /// 100% examination rate (0.0-1.0)
5202    #[serde(default = "default_complete_examination_rate")]
5203    pub complete_examination_rate: f64,
5204}
5205
5206fn default_statistical_rate() -> f64 {
5207    0.40
5208}
5209fn default_judgmental_rate() -> f64 {
5210    0.30
5211}
5212fn default_haphazard_rate() -> f64 {
5213    0.20
5214}
5215fn default_complete_examination_rate() -> f64 {
5216    0.10
5217}
5218
5219impl Default for SamplingConfig {
5220    fn default() -> Self {
5221        Self {
5222            statistical_rate: default_statistical_rate(),
5223            judgmental_rate: default_judgmental_rate(),
5224            haphazard_rate: default_haphazard_rate(),
5225            complete_examination_rate: default_complete_examination_rate(),
5226        }
5227    }
5228}
5229
5230/// Audit team configuration.
5231#[derive(Debug, Clone, Serialize, Deserialize)]
5232pub struct AuditTeamConfig {
5233    /// Minimum team size
5234    #[serde(default = "default_min_team_size")]
5235    pub min_team_size: usize,
5236    /// Maximum team size
5237    #[serde(default = "default_max_team_size")]
5238    pub max_team_size: usize,
5239    /// Probability of having a specialist on the team
5240    #[serde(default = "default_specialist_probability")]
5241    pub specialist_probability: f64,
5242}
5243
5244fn default_min_team_size() -> usize {
5245    3
5246}
5247fn default_max_team_size() -> usize {
5248    8
5249}
5250fn default_specialist_probability() -> f64 {
5251    0.30
5252}
5253
5254impl Default for AuditTeamConfig {
5255    fn default() -> Self {
5256        Self {
5257            min_team_size: default_min_team_size(),
5258            max_team_size: default_max_team_size(),
5259            specialist_probability: default_specialist_probability(),
5260        }
5261    }
5262}
5263
5264/// Review workflow configuration.
5265#[derive(Debug, Clone, Serialize, Deserialize)]
5266pub struct ReviewWorkflowConfig {
5267    /// Average days between preparer completion and first review
5268    #[serde(default = "default_review_delay_days")]
5269    pub average_review_delay_days: u32,
5270    /// Probability of review notes requiring rework
5271    #[serde(default = "default_rework_probability_review")]
5272    pub rework_probability: f64,
5273    /// Require partner sign-off for all workpapers
5274    #[serde(default = "default_true")]
5275    pub require_partner_signoff: bool,
5276}
5277
5278fn default_review_delay_days() -> u32 {
5279    2
5280}
5281fn default_rework_probability_review() -> f64 {
5282    0.15
5283}
5284
5285impl Default for ReviewWorkflowConfig {
5286    fn default() -> Self {
5287        Self {
5288            average_review_delay_days: default_review_delay_days(),
5289            rework_probability: default_rework_probability_review(),
5290            require_partner_signoff: true,
5291        }
5292    }
5293}
5294
5295// =============================================================================
5296// Data Quality Configuration
5297// =============================================================================
5298
5299/// Data quality variation settings for realistic flakiness injection.
5300#[derive(Debug, Clone, Serialize, Deserialize)]
5301pub struct DataQualitySchemaConfig {
5302    /// Enable data quality variations
5303    #[serde(default)]
5304    pub enabled: bool,
5305    /// Preset to use (overrides individual settings if set)
5306    #[serde(default)]
5307    pub preset: DataQualityPreset,
5308    /// Missing value injection settings
5309    #[serde(default)]
5310    pub missing_values: MissingValuesSchemaConfig,
5311    /// Typo injection settings
5312    #[serde(default)]
5313    pub typos: TypoSchemaConfig,
5314    /// Format variation settings
5315    #[serde(default)]
5316    pub format_variations: FormatVariationSchemaConfig,
5317    /// Duplicate injection settings
5318    #[serde(default)]
5319    pub duplicates: DuplicateSchemaConfig,
5320    /// Encoding issue settings
5321    #[serde(default)]
5322    pub encoding_issues: EncodingIssueSchemaConfig,
5323    /// Generate quality issue labels for ML training
5324    #[serde(default)]
5325    pub generate_labels: bool,
5326    /// Per-sink quality profiles (different settings for CSV vs JSON etc.)
5327    #[serde(default)]
5328    pub sink_profiles: SinkQualityProfiles,
5329}
5330
5331impl Default for DataQualitySchemaConfig {
5332    fn default() -> Self {
5333        Self {
5334            enabled: false,
5335            preset: DataQualityPreset::None,
5336            missing_values: MissingValuesSchemaConfig::default(),
5337            typos: TypoSchemaConfig::default(),
5338            format_variations: FormatVariationSchemaConfig::default(),
5339            duplicates: DuplicateSchemaConfig::default(),
5340            encoding_issues: EncodingIssueSchemaConfig::default(),
5341            generate_labels: true,
5342            sink_profiles: SinkQualityProfiles::default(),
5343        }
5344    }
5345}
5346
5347impl DataQualitySchemaConfig {
5348    /// Creates a config for a specific preset profile.
5349    pub fn with_preset(preset: DataQualityPreset) -> Self {
5350        let mut config = Self {
5351            preset,
5352            ..Default::default()
5353        };
5354        config.apply_preset();
5355        config
5356    }
5357
5358    /// Applies the preset settings to the individual configuration fields.
5359    /// Call this after deserializing if preset is not Custom or None.
5360    pub fn apply_preset(&mut self) {
5361        if !self.preset.overrides_settings() {
5362            return;
5363        }
5364
5365        self.enabled = true;
5366
5367        // Missing values
5368        self.missing_values.enabled = self.preset.missing_rate() > 0.0;
5369        self.missing_values.rate = self.preset.missing_rate();
5370
5371        // Typos
5372        self.typos.enabled = self.preset.typo_rate() > 0.0;
5373        self.typos.char_error_rate = self.preset.typo_rate();
5374
5375        // Duplicates
5376        self.duplicates.enabled = self.preset.duplicate_rate() > 0.0;
5377        self.duplicates.exact_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
5378        self.duplicates.near_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
5379        self.duplicates.fuzzy_duplicate_ratio = self.preset.duplicate_rate() * 0.2;
5380
5381        // Format variations
5382        self.format_variations.enabled = self.preset.format_variations_enabled();
5383
5384        // Encoding issues
5385        self.encoding_issues.enabled = self.preset.encoding_issues_enabled();
5386        self.encoding_issues.rate = self.preset.encoding_issue_rate();
5387
5388        // OCR errors for typos in legacy preset
5389        if self.preset.ocr_errors_enabled() {
5390            self.typos.type_weights.ocr_errors = 0.3;
5391        }
5392    }
5393
5394    /// Returns the effective missing value rate (considering preset).
5395    pub fn effective_missing_rate(&self) -> f64 {
5396        if self.preset.overrides_settings() {
5397            self.preset.missing_rate()
5398        } else {
5399            self.missing_values.rate
5400        }
5401    }
5402
5403    /// Returns the effective typo rate (considering preset).
5404    pub fn effective_typo_rate(&self) -> f64 {
5405        if self.preset.overrides_settings() {
5406            self.preset.typo_rate()
5407        } else {
5408            self.typos.char_error_rate
5409        }
5410    }
5411
5412    /// Returns the effective duplicate rate (considering preset).
5413    pub fn effective_duplicate_rate(&self) -> f64 {
5414        if self.preset.overrides_settings() {
5415            self.preset.duplicate_rate()
5416        } else {
5417            self.duplicates.exact_duplicate_ratio
5418                + self.duplicates.near_duplicate_ratio
5419                + self.duplicates.fuzzy_duplicate_ratio
5420        }
5421    }
5422
5423    /// Creates a clean profile config.
5424    pub fn clean() -> Self {
5425        Self::with_preset(DataQualityPreset::Clean)
5426    }
5427
5428    /// Creates a noisy profile config.
5429    pub fn noisy() -> Self {
5430        Self::with_preset(DataQualityPreset::Noisy)
5431    }
5432
5433    /// Creates a legacy profile config.
5434    pub fn legacy() -> Self {
5435        Self::with_preset(DataQualityPreset::Legacy)
5436    }
5437}
5438
5439/// Preset configurations for common data quality scenarios.
5440#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5441#[serde(rename_all = "snake_case")]
5442pub enum DataQualityPreset {
5443    /// No data quality variations (clean data)
5444    #[default]
5445    None,
5446    /// Minimal variations (very clean data with rare issues)
5447    Minimal,
5448    /// Normal variations (realistic enterprise data quality)
5449    Normal,
5450    /// High variations (messy data for stress testing)
5451    High,
5452    /// Custom (use individual settings)
5453    Custom,
5454
5455    // ========================================
5456    // ML-Oriented Profiles (Phase 2.1)
5457    // ========================================
5458    /// Clean profile for ML training - minimal data quality issues
5459    /// Missing: 0.1%, Typos: 0.05%, Duplicates: 0%, Format: None
5460    Clean,
5461    /// Noisy profile simulating typical production data issues
5462    /// Missing: 5%, Typos: 2%, Duplicates: 1%, Format: Medium
5463    Noisy,
5464    /// Legacy profile simulating migrated/OCR'd historical data
5465    /// Missing: 10%, Typos: 5%, Duplicates: 3%, Format: Heavy + OCR
5466    Legacy,
5467}
5468
5469impl DataQualityPreset {
5470    /// Returns the missing value rate for this preset.
5471    pub fn missing_rate(&self) -> f64 {
5472        match self {
5473            DataQualityPreset::None => 0.0,
5474            DataQualityPreset::Minimal => 0.005,
5475            DataQualityPreset::Normal => 0.02,
5476            DataQualityPreset::High => 0.08,
5477            DataQualityPreset::Custom => 0.01, // Use config value
5478            DataQualityPreset::Clean => 0.001,
5479            DataQualityPreset::Noisy => 0.05,
5480            DataQualityPreset::Legacy => 0.10,
5481        }
5482    }
5483
5484    /// Returns the typo rate for this preset.
5485    pub fn typo_rate(&self) -> f64 {
5486        match self {
5487            DataQualityPreset::None => 0.0,
5488            DataQualityPreset::Minimal => 0.0005,
5489            DataQualityPreset::Normal => 0.002,
5490            DataQualityPreset::High => 0.01,
5491            DataQualityPreset::Custom => 0.001, // Use config value
5492            DataQualityPreset::Clean => 0.0005,
5493            DataQualityPreset::Noisy => 0.02,
5494            DataQualityPreset::Legacy => 0.05,
5495        }
5496    }
5497
5498    /// Returns the duplicate rate for this preset.
5499    pub fn duplicate_rate(&self) -> f64 {
5500        match self {
5501            DataQualityPreset::None => 0.0,
5502            DataQualityPreset::Minimal => 0.001,
5503            DataQualityPreset::Normal => 0.005,
5504            DataQualityPreset::High => 0.02,
5505            DataQualityPreset::Custom => 0.0, // Use config value
5506            DataQualityPreset::Clean => 0.0,
5507            DataQualityPreset::Noisy => 0.01,
5508            DataQualityPreset::Legacy => 0.03,
5509        }
5510    }
5511
5512    /// Returns whether format variations are enabled for this preset.
5513    pub fn format_variations_enabled(&self) -> bool {
5514        match self {
5515            DataQualityPreset::None | DataQualityPreset::Clean => false,
5516            DataQualityPreset::Minimal => true,
5517            DataQualityPreset::Normal => true,
5518            DataQualityPreset::High => true,
5519            DataQualityPreset::Custom => true,
5520            DataQualityPreset::Noisy => true,
5521            DataQualityPreset::Legacy => true,
5522        }
5523    }
5524
5525    /// Returns whether OCR-style errors are enabled for this preset.
5526    pub fn ocr_errors_enabled(&self) -> bool {
5527        matches!(self, DataQualityPreset::Legacy | DataQualityPreset::High)
5528    }
5529
5530    /// Returns whether encoding issues are enabled for this preset.
5531    pub fn encoding_issues_enabled(&self) -> bool {
5532        matches!(
5533            self,
5534            DataQualityPreset::Legacy | DataQualityPreset::High | DataQualityPreset::Noisy
5535        )
5536    }
5537
5538    /// Returns the encoding issue rate for this preset.
5539    pub fn encoding_issue_rate(&self) -> f64 {
5540        match self {
5541            DataQualityPreset::None | DataQualityPreset::Clean | DataQualityPreset::Minimal => 0.0,
5542            DataQualityPreset::Normal => 0.002,
5543            DataQualityPreset::High => 0.01,
5544            DataQualityPreset::Custom => 0.0,
5545            DataQualityPreset::Noisy => 0.005,
5546            DataQualityPreset::Legacy => 0.02,
5547        }
5548    }
5549
5550    /// Returns true if this preset overrides individual settings.
5551    pub fn overrides_settings(&self) -> bool {
5552        !matches!(self, DataQualityPreset::Custom | DataQualityPreset::None)
5553    }
5554
5555    /// Returns a human-readable description of this preset.
5556    pub fn description(&self) -> &'static str {
5557        match self {
5558            DataQualityPreset::None => "No data quality issues (pristine data)",
5559            DataQualityPreset::Minimal => "Very rare data quality issues",
5560            DataQualityPreset::Normal => "Realistic enterprise data quality",
5561            DataQualityPreset::High => "Messy data for stress testing",
5562            DataQualityPreset::Custom => "Custom settings from configuration",
5563            DataQualityPreset::Clean => "ML-ready clean data with minimal issues",
5564            DataQualityPreset::Noisy => "Typical production data with moderate issues",
5565            DataQualityPreset::Legacy => "Legacy/migrated data with heavy issues and OCR errors",
5566        }
5567    }
5568}
5569
5570/// Missing value injection configuration.
5571#[derive(Debug, Clone, Serialize, Deserialize)]
5572pub struct MissingValuesSchemaConfig {
5573    /// Enable missing value injection
5574    #[serde(default)]
5575    pub enabled: bool,
5576    /// Global missing rate (0.0 to 1.0)
5577    #[serde(default = "default_missing_rate")]
5578    pub rate: f64,
5579    /// Missing value strategy
5580    #[serde(default)]
5581    pub strategy: MissingValueStrategy,
5582    /// Field-specific rates (field name -> rate)
5583    #[serde(default)]
5584    pub field_rates: std::collections::HashMap<String, f64>,
5585    /// Fields that should never have missing values
5586    #[serde(default)]
5587    pub protected_fields: Vec<String>,
5588}
5589
5590fn default_missing_rate() -> f64 {
5591    0.01
5592}
5593
5594impl Default for MissingValuesSchemaConfig {
5595    fn default() -> Self {
5596        Self {
5597            enabled: false,
5598            rate: default_missing_rate(),
5599            strategy: MissingValueStrategy::Mcar,
5600            field_rates: std::collections::HashMap::new(),
5601            protected_fields: vec![
5602                "document_id".to_string(),
5603                "company_code".to_string(),
5604                "posting_date".to_string(),
5605            ],
5606        }
5607    }
5608}
5609
5610/// Missing value strategy types.
5611#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5612#[serde(rename_all = "snake_case")]
5613pub enum MissingValueStrategy {
5614    /// Missing Completely At Random - equal probability for all values
5615    #[default]
5616    Mcar,
5617    /// Missing At Random - depends on other observed values
5618    Mar,
5619    /// Missing Not At Random - depends on the value itself
5620    Mnar,
5621    /// Systematic - entire field groups missing together
5622    Systematic,
5623}
5624
5625/// Typo injection configuration.
5626#[derive(Debug, Clone, Serialize, Deserialize)]
5627pub struct TypoSchemaConfig {
5628    /// Enable typo injection
5629    #[serde(default)]
5630    pub enabled: bool,
5631    /// Character error rate (per character, not per field)
5632    #[serde(default = "default_typo_rate")]
5633    pub char_error_rate: f64,
5634    /// Typo type weights
5635    #[serde(default)]
5636    pub type_weights: TypoTypeWeights,
5637    /// Fields that should never have typos
5638    #[serde(default)]
5639    pub protected_fields: Vec<String>,
5640}
5641
5642fn default_typo_rate() -> f64 {
5643    0.001
5644}
5645
5646impl Default for TypoSchemaConfig {
5647    fn default() -> Self {
5648        Self {
5649            enabled: false,
5650            char_error_rate: default_typo_rate(),
5651            type_weights: TypoTypeWeights::default(),
5652            protected_fields: vec![
5653                "document_id".to_string(),
5654                "gl_account".to_string(),
5655                "company_code".to_string(),
5656            ],
5657        }
5658    }
5659}
5660
5661/// Weights for different typo types.
5662#[derive(Debug, Clone, Serialize, Deserialize)]
5663pub struct TypoTypeWeights {
5664    /// Keyboard-adjacent substitution (e.g., 'a' -> 's')
5665    #[serde(default = "default_substitution_weight")]
5666    pub substitution: f64,
5667    /// Adjacent character transposition (e.g., 'ab' -> 'ba')
5668    #[serde(default = "default_transposition_weight")]
5669    pub transposition: f64,
5670    /// Character insertion
5671    #[serde(default = "default_insertion_weight")]
5672    pub insertion: f64,
5673    /// Character deletion
5674    #[serde(default = "default_deletion_weight")]
5675    pub deletion: f64,
5676    /// OCR-style errors (e.g., '0' -> 'O')
5677    #[serde(default = "default_ocr_weight")]
5678    pub ocr_errors: f64,
5679    /// Homophone substitution (e.g., 'their' -> 'there')
5680    #[serde(default = "default_homophone_weight")]
5681    pub homophones: f64,
5682}
5683
5684fn default_substitution_weight() -> f64 {
5685    0.35
5686}
5687fn default_transposition_weight() -> f64 {
5688    0.25
5689}
5690fn default_insertion_weight() -> f64 {
5691    0.10
5692}
5693fn default_deletion_weight() -> f64 {
5694    0.15
5695}
5696fn default_ocr_weight() -> f64 {
5697    0.10
5698}
5699fn default_homophone_weight() -> f64 {
5700    0.05
5701}
5702
5703impl Default for TypoTypeWeights {
5704    fn default() -> Self {
5705        Self {
5706            substitution: default_substitution_weight(),
5707            transposition: default_transposition_weight(),
5708            insertion: default_insertion_weight(),
5709            deletion: default_deletion_weight(),
5710            ocr_errors: default_ocr_weight(),
5711            homophones: default_homophone_weight(),
5712        }
5713    }
5714}
5715
5716/// Format variation configuration.
5717#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5718pub struct FormatVariationSchemaConfig {
5719    /// Enable format variations
5720    #[serde(default)]
5721    pub enabled: bool,
5722    /// Date format variation settings
5723    #[serde(default)]
5724    pub dates: DateFormatVariationConfig,
5725    /// Amount format variation settings
5726    #[serde(default)]
5727    pub amounts: AmountFormatVariationConfig,
5728    /// Identifier format variation settings
5729    #[serde(default)]
5730    pub identifiers: IdentifierFormatVariationConfig,
5731}
5732
5733/// Date format variation configuration.
5734#[derive(Debug, Clone, Serialize, Deserialize)]
5735pub struct DateFormatVariationConfig {
5736    /// Enable date format variations
5737    #[serde(default)]
5738    pub enabled: bool,
5739    /// Overall variation rate
5740    #[serde(default = "default_date_variation_rate")]
5741    pub rate: f64,
5742    /// Include ISO format (2024-01-15)
5743    #[serde(default = "default_true")]
5744    pub iso_format: bool,
5745    /// Include US format (01/15/2024)
5746    #[serde(default)]
5747    pub us_format: bool,
5748    /// Include EU format (15.01.2024)
5749    #[serde(default)]
5750    pub eu_format: bool,
5751    /// Include long format (January 15, 2024)
5752    #[serde(default)]
5753    pub long_format: bool,
5754}
5755
5756fn default_date_variation_rate() -> f64 {
5757    0.05
5758}
5759
5760impl Default for DateFormatVariationConfig {
5761    fn default() -> Self {
5762        Self {
5763            enabled: false,
5764            rate: default_date_variation_rate(),
5765            iso_format: true,
5766            us_format: false,
5767            eu_format: false,
5768            long_format: false,
5769        }
5770    }
5771}
5772
5773/// Amount format variation configuration.
5774#[derive(Debug, Clone, Serialize, Deserialize)]
5775pub struct AmountFormatVariationConfig {
5776    /// Enable amount format variations
5777    #[serde(default)]
5778    pub enabled: bool,
5779    /// Overall variation rate
5780    #[serde(default = "default_amount_variation_rate")]
5781    pub rate: f64,
5782    /// Include US comma format (1,234.56)
5783    #[serde(default)]
5784    pub us_comma_format: bool,
5785    /// Include EU format (1.234,56)
5786    #[serde(default)]
5787    pub eu_format: bool,
5788    /// Include currency prefix ($1,234.56)
5789    #[serde(default)]
5790    pub currency_prefix: bool,
5791    /// Include accounting format with parentheses for negatives
5792    #[serde(default)]
5793    pub accounting_format: bool,
5794}
5795
5796fn default_amount_variation_rate() -> f64 {
5797    0.02
5798}
5799
5800impl Default for AmountFormatVariationConfig {
5801    fn default() -> Self {
5802        Self {
5803            enabled: false,
5804            rate: default_amount_variation_rate(),
5805            us_comma_format: false,
5806            eu_format: false,
5807            currency_prefix: false,
5808            accounting_format: false,
5809        }
5810    }
5811}
5812
5813/// Identifier format variation configuration.
5814#[derive(Debug, Clone, Serialize, Deserialize)]
5815pub struct IdentifierFormatVariationConfig {
5816    /// Enable identifier format variations
5817    #[serde(default)]
5818    pub enabled: bool,
5819    /// Overall variation rate
5820    #[serde(default = "default_identifier_variation_rate")]
5821    pub rate: f64,
5822    /// Case variations (uppercase, lowercase, mixed)
5823    #[serde(default)]
5824    pub case_variations: bool,
5825    /// Padding variations (leading zeros)
5826    #[serde(default)]
5827    pub padding_variations: bool,
5828    /// Separator variations (dash vs underscore)
5829    #[serde(default)]
5830    pub separator_variations: bool,
5831}
5832
5833fn default_identifier_variation_rate() -> f64 {
5834    0.02
5835}
5836
5837impl Default for IdentifierFormatVariationConfig {
5838    fn default() -> Self {
5839        Self {
5840            enabled: false,
5841            rate: default_identifier_variation_rate(),
5842            case_variations: false,
5843            padding_variations: false,
5844            separator_variations: false,
5845        }
5846    }
5847}
5848
5849/// Duplicate injection configuration.
5850#[derive(Debug, Clone, Serialize, Deserialize)]
5851pub struct DuplicateSchemaConfig {
5852    /// Enable duplicate injection
5853    #[serde(default)]
5854    pub enabled: bool,
5855    /// Overall duplicate rate
5856    #[serde(default = "default_duplicate_rate")]
5857    pub rate: f64,
5858    /// Exact duplicate proportion (out of duplicates)
5859    #[serde(default = "default_exact_duplicate_ratio")]
5860    pub exact_duplicate_ratio: f64,
5861    /// Near duplicate proportion (slight variations)
5862    #[serde(default = "default_near_duplicate_ratio")]
5863    pub near_duplicate_ratio: f64,
5864    /// Fuzzy duplicate proportion (typos in key fields)
5865    #[serde(default = "default_fuzzy_duplicate_ratio")]
5866    pub fuzzy_duplicate_ratio: f64,
5867    /// Maximum date offset for near/fuzzy duplicates (days)
5868    #[serde(default = "default_max_date_offset")]
5869    pub max_date_offset_days: u32,
5870    /// Maximum amount variance for near duplicates (fraction)
5871    #[serde(default = "default_max_amount_variance")]
5872    pub max_amount_variance: f64,
5873}
5874
5875fn default_duplicate_rate() -> f64 {
5876    0.005
5877}
5878fn default_exact_duplicate_ratio() -> f64 {
5879    0.4
5880}
5881fn default_near_duplicate_ratio() -> f64 {
5882    0.35
5883}
5884fn default_fuzzy_duplicate_ratio() -> f64 {
5885    0.25
5886}
5887fn default_max_date_offset() -> u32 {
5888    3
5889}
5890fn default_max_amount_variance() -> f64 {
5891    0.01
5892}
5893
5894impl Default for DuplicateSchemaConfig {
5895    fn default() -> Self {
5896        Self {
5897            enabled: false,
5898            rate: default_duplicate_rate(),
5899            exact_duplicate_ratio: default_exact_duplicate_ratio(),
5900            near_duplicate_ratio: default_near_duplicate_ratio(),
5901            fuzzy_duplicate_ratio: default_fuzzy_duplicate_ratio(),
5902            max_date_offset_days: default_max_date_offset(),
5903            max_amount_variance: default_max_amount_variance(),
5904        }
5905    }
5906}
5907
5908/// Encoding issue configuration.
5909#[derive(Debug, Clone, Serialize, Deserialize)]
5910pub struct EncodingIssueSchemaConfig {
5911    /// Enable encoding issue injection
5912    #[serde(default)]
5913    pub enabled: bool,
5914    /// Overall encoding issue rate
5915    #[serde(default = "default_encoding_rate")]
5916    pub rate: f64,
5917    /// Include mojibake (UTF-8/Latin-1 confusion)
5918    #[serde(default)]
5919    pub mojibake: bool,
5920    /// Include HTML entity corruption
5921    #[serde(default)]
5922    pub html_entities: bool,
5923    /// Include BOM issues
5924    #[serde(default)]
5925    pub bom_issues: bool,
5926}
5927
5928fn default_encoding_rate() -> f64 {
5929    0.001
5930}
5931
5932impl Default for EncodingIssueSchemaConfig {
5933    fn default() -> Self {
5934        Self {
5935            enabled: false,
5936            rate: default_encoding_rate(),
5937            mojibake: false,
5938            html_entities: false,
5939            bom_issues: false,
5940        }
5941    }
5942}
5943
5944/// Per-sink quality profiles for different output formats.
5945#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5946pub struct SinkQualityProfiles {
5947    /// CSV-specific quality settings
5948    #[serde(default)]
5949    pub csv: Option<SinkQualityOverride>,
5950    /// JSON-specific quality settings
5951    #[serde(default)]
5952    pub json: Option<SinkQualityOverride>,
5953    /// Parquet-specific quality settings
5954    #[serde(default)]
5955    pub parquet: Option<SinkQualityOverride>,
5956}
5957
5958/// Quality setting overrides for a specific sink type.
5959#[derive(Debug, Clone, Serialize, Deserialize)]
5960pub struct SinkQualityOverride {
5961    /// Override enabled state
5962    pub enabled: Option<bool>,
5963    /// Override missing value rate
5964    pub missing_rate: Option<f64>,
5965    /// Override typo rate
5966    pub typo_rate: Option<f64>,
5967    /// Override format variation rate
5968    pub format_variation_rate: Option<f64>,
5969    /// Override duplicate rate
5970    pub duplicate_rate: Option<f64>,
5971}
5972
5973// =============================================================================
5974// Accounting Standards Configuration
5975// =============================================================================
5976
5977/// Accounting standards framework configuration for generating standards-compliant data.
5978///
5979/// Supports US GAAP, IFRS, and French GAAP (PCG) frameworks with specific standards:
5980/// - ASC 606/IFRS 15/PCG: Revenue Recognition
5981/// - ASC 842/IFRS 16/PCG: Leases
5982/// - ASC 820/IFRS 13/PCG: Fair Value Measurement
5983/// - ASC 360/IAS 36/PCG: Impairment
5984#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5985pub struct AccountingStandardsConfig {
5986    /// Enable accounting standards generation
5987    #[serde(default)]
5988    pub enabled: bool,
5989
5990    /// Accounting framework to use.
5991    /// When `None`, the country pack's `accounting.framework` is used as fallback;
5992    /// if that is also absent the orchestrator defaults to US GAAP.
5993    #[serde(default, skip_serializing_if = "Option::is_none")]
5994    pub framework: Option<AccountingFrameworkConfig>,
5995
5996    /// Revenue recognition configuration (ASC 606/IFRS 15)
5997    #[serde(default)]
5998    pub revenue_recognition: RevenueRecognitionConfig,
5999
6000    /// Lease accounting configuration (ASC 842/IFRS 16)
6001    #[serde(default)]
6002    pub leases: LeaseAccountingConfig,
6003
6004    /// Fair value measurement configuration (ASC 820/IFRS 13)
6005    #[serde(default)]
6006    pub fair_value: FairValueConfig,
6007
6008    /// Impairment testing configuration (ASC 360/IAS 36)
6009    #[serde(default)]
6010    pub impairment: ImpairmentConfig,
6011
6012    /// Business combination configuration (IFRS 3 / ASC 805)
6013    #[serde(default)]
6014    pub business_combinations: BusinessCombinationsConfig,
6015
6016    /// Expected Credit Loss configuration (IFRS 9 / ASC 326)
6017    #[serde(default)]
6018    pub expected_credit_loss: EclConfig,
6019
6020    /// Generate framework differences for dual reporting
6021    #[serde(default)]
6022    pub generate_differences: bool,
6023}
6024
6025/// Accounting framework selection.
6026#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
6027#[serde(rename_all = "snake_case")]
6028pub enum AccountingFrameworkConfig {
6029    /// US Generally Accepted Accounting Principles
6030    #[default]
6031    UsGaap,
6032    /// International Financial Reporting Standards
6033    Ifrs,
6034    /// Generate data for both frameworks with reconciliation
6035    DualReporting,
6036    /// French GAAP (Plan Comptable Général – PCG)
6037    FrenchGaap,
6038    /// German GAAP (Handelsgesetzbuch – HGB, §238-263)
6039    GermanGaap,
6040}
6041
6042/// Revenue recognition configuration (ASC 606/IFRS 15).
6043#[derive(Debug, Clone, Serialize, Deserialize)]
6044pub struct RevenueRecognitionConfig {
6045    /// Enable revenue recognition generation
6046    #[serde(default)]
6047    pub enabled: bool,
6048
6049    /// Generate customer contracts
6050    #[serde(default = "default_true")]
6051    pub generate_contracts: bool,
6052
6053    /// Average number of performance obligations per contract
6054    #[serde(default = "default_avg_obligations")]
6055    pub avg_obligations_per_contract: f64,
6056
6057    /// Rate of contracts with variable consideration
6058    #[serde(default = "default_variable_consideration_rate")]
6059    pub variable_consideration_rate: f64,
6060
6061    /// Rate of over-time revenue recognition (vs point-in-time)
6062    #[serde(default = "default_over_time_rate")]
6063    pub over_time_recognition_rate: f64,
6064
6065    /// Number of contracts to generate
6066    #[serde(default = "default_contract_count")]
6067    pub contract_count: usize,
6068}
6069
6070fn default_avg_obligations() -> f64 {
6071    2.0
6072}
6073
6074fn default_variable_consideration_rate() -> f64 {
6075    0.15
6076}
6077
6078fn default_over_time_rate() -> f64 {
6079    0.30
6080}
6081
6082fn default_contract_count() -> usize {
6083    100
6084}
6085
6086impl Default for RevenueRecognitionConfig {
6087    fn default() -> Self {
6088        Self {
6089            enabled: false,
6090            generate_contracts: true,
6091            avg_obligations_per_contract: default_avg_obligations(),
6092            variable_consideration_rate: default_variable_consideration_rate(),
6093            over_time_recognition_rate: default_over_time_rate(),
6094            contract_count: default_contract_count(),
6095        }
6096    }
6097}
6098
6099/// Lease accounting configuration (ASC 842/IFRS 16).
6100#[derive(Debug, Clone, Serialize, Deserialize)]
6101pub struct LeaseAccountingConfig {
6102    /// Enable lease accounting generation
6103    #[serde(default)]
6104    pub enabled: bool,
6105
6106    /// Number of leases to generate
6107    #[serde(default = "default_lease_count")]
6108    pub lease_count: usize,
6109
6110    /// Percentage of finance leases (vs operating)
6111    #[serde(default = "default_finance_lease_pct")]
6112    pub finance_lease_percent: f64,
6113
6114    /// Average lease term in months
6115    #[serde(default = "default_avg_lease_term")]
6116    pub avg_lease_term_months: u32,
6117
6118    /// Generate amortization schedules
6119    #[serde(default = "default_true")]
6120    pub generate_amortization: bool,
6121
6122    /// Real estate lease percentage
6123    #[serde(default = "default_real_estate_pct")]
6124    pub real_estate_percent: f64,
6125}
6126
6127fn default_lease_count() -> usize {
6128    50
6129}
6130
6131fn default_finance_lease_pct() -> f64 {
6132    0.30
6133}
6134
6135fn default_avg_lease_term() -> u32 {
6136    60
6137}
6138
6139fn default_real_estate_pct() -> f64 {
6140    0.40
6141}
6142
6143impl Default for LeaseAccountingConfig {
6144    fn default() -> Self {
6145        Self {
6146            enabled: false,
6147            lease_count: default_lease_count(),
6148            finance_lease_percent: default_finance_lease_pct(),
6149            avg_lease_term_months: default_avg_lease_term(),
6150            generate_amortization: true,
6151            real_estate_percent: default_real_estate_pct(),
6152        }
6153    }
6154}
6155
6156/// Fair value measurement configuration (ASC 820/IFRS 13).
6157#[derive(Debug, Clone, Serialize, Deserialize)]
6158pub struct FairValueConfig {
6159    /// Enable fair value measurement generation
6160    #[serde(default)]
6161    pub enabled: bool,
6162
6163    /// Number of fair value measurements to generate
6164    #[serde(default = "default_fv_count")]
6165    pub measurement_count: usize,
6166
6167    /// Level 1 (quoted prices) percentage
6168    #[serde(default = "default_level1_pct")]
6169    pub level1_percent: f64,
6170
6171    /// Level 2 (observable inputs) percentage
6172    #[serde(default = "default_level2_pct")]
6173    pub level2_percent: f64,
6174
6175    /// Level 3 (unobservable inputs) percentage
6176    #[serde(default = "default_level3_pct")]
6177    pub level3_percent: f64,
6178
6179    /// Include sensitivity analysis for Level 3
6180    #[serde(default)]
6181    pub include_sensitivity_analysis: bool,
6182}
6183
6184fn default_fv_count() -> usize {
6185    25
6186}
6187
6188fn default_level1_pct() -> f64 {
6189    0.40
6190}
6191
6192fn default_level2_pct() -> f64 {
6193    0.35
6194}
6195
6196fn default_level3_pct() -> f64 {
6197    0.25
6198}
6199
6200impl Default for FairValueConfig {
6201    fn default() -> Self {
6202        Self {
6203            enabled: false,
6204            measurement_count: default_fv_count(),
6205            level1_percent: default_level1_pct(),
6206            level2_percent: default_level2_pct(),
6207            level3_percent: default_level3_pct(),
6208            include_sensitivity_analysis: false,
6209        }
6210    }
6211}
6212
6213/// Impairment testing configuration (ASC 360/IAS 36).
6214#[derive(Debug, Clone, Serialize, Deserialize)]
6215pub struct ImpairmentConfig {
6216    /// Enable impairment testing generation
6217    #[serde(default)]
6218    pub enabled: bool,
6219
6220    /// Number of impairment tests to generate
6221    #[serde(default = "default_impairment_count")]
6222    pub test_count: usize,
6223
6224    /// Rate of tests resulting in impairment
6225    #[serde(default = "default_impairment_rate")]
6226    pub impairment_rate: f64,
6227
6228    /// Generate cash flow projections
6229    #[serde(default = "default_true")]
6230    pub generate_projections: bool,
6231
6232    /// Include goodwill impairment tests
6233    #[serde(default)]
6234    pub include_goodwill: bool,
6235}
6236
6237fn default_impairment_count() -> usize {
6238    15
6239}
6240
6241fn default_impairment_rate() -> f64 {
6242    0.10
6243}
6244
6245impl Default for ImpairmentConfig {
6246    fn default() -> Self {
6247        Self {
6248            enabled: false,
6249            test_count: default_impairment_count(),
6250            impairment_rate: default_impairment_rate(),
6251            generate_projections: true,
6252            include_goodwill: false,
6253        }
6254    }
6255}
6256
6257// =============================================================================
6258// Business Combinations Configuration (IFRS 3 / ASC 805)
6259// =============================================================================
6260
6261/// Configuration for generating business combination (acquisition) data.
6262#[derive(Debug, Clone, Serialize, Deserialize)]
6263pub struct BusinessCombinationsConfig {
6264    /// Enable business combination generation
6265    #[serde(default)]
6266    pub enabled: bool,
6267
6268    /// Number of acquisitions to generate per company (1-5)
6269    #[serde(default = "default_bc_acquisition_count")]
6270    pub acquisition_count: usize,
6271}
6272
6273fn default_bc_acquisition_count() -> usize {
6274    2
6275}
6276
6277impl Default for BusinessCombinationsConfig {
6278    fn default() -> Self {
6279        Self {
6280            enabled: false,
6281            acquisition_count: default_bc_acquisition_count(),
6282        }
6283    }
6284}
6285
6286// =============================================================================
6287// ECL Configuration (IFRS 9 / ASC 326)
6288// =============================================================================
6289
6290/// Configuration for Expected Credit Loss generation.
6291#[derive(Debug, Clone, Serialize, Deserialize)]
6292pub struct EclConfig {
6293    /// Enable ECL generation.
6294    #[serde(default)]
6295    pub enabled: bool,
6296
6297    /// Weight for base economic scenario (0–1).
6298    #[serde(default = "default_ecl_base_weight")]
6299    pub base_scenario_weight: f64,
6300
6301    /// Multiplier for base scenario (typically 1.0).
6302    #[serde(default = "default_ecl_base_multiplier")]
6303    pub base_scenario_multiplier: f64,
6304
6305    /// Weight for optimistic economic scenario (0–1).
6306    #[serde(default = "default_ecl_optimistic_weight")]
6307    pub optimistic_scenario_weight: f64,
6308
6309    /// Multiplier for optimistic scenario (< 1.0 means lower losses).
6310    #[serde(default = "default_ecl_optimistic_multiplier")]
6311    pub optimistic_scenario_multiplier: f64,
6312
6313    /// Weight for pessimistic economic scenario (0–1).
6314    #[serde(default = "default_ecl_pessimistic_weight")]
6315    pub pessimistic_scenario_weight: f64,
6316
6317    /// Multiplier for pessimistic scenario (> 1.0 means higher losses).
6318    #[serde(default = "default_ecl_pessimistic_multiplier")]
6319    pub pessimistic_scenario_multiplier: f64,
6320}
6321
6322fn default_ecl_base_weight() -> f64 {
6323    0.50
6324}
6325fn default_ecl_base_multiplier() -> f64 {
6326    1.0
6327}
6328fn default_ecl_optimistic_weight() -> f64 {
6329    0.30
6330}
6331fn default_ecl_optimistic_multiplier() -> f64 {
6332    0.8
6333}
6334fn default_ecl_pessimistic_weight() -> f64 {
6335    0.20
6336}
6337fn default_ecl_pessimistic_multiplier() -> f64 {
6338    1.4
6339}
6340
6341impl Default for EclConfig {
6342    fn default() -> Self {
6343        Self {
6344            enabled: false,
6345            base_scenario_weight: default_ecl_base_weight(),
6346            base_scenario_multiplier: default_ecl_base_multiplier(),
6347            optimistic_scenario_weight: default_ecl_optimistic_weight(),
6348            optimistic_scenario_multiplier: default_ecl_optimistic_multiplier(),
6349            pessimistic_scenario_weight: default_ecl_pessimistic_weight(),
6350            pessimistic_scenario_multiplier: default_ecl_pessimistic_multiplier(),
6351        }
6352    }
6353}
6354
6355// =============================================================================
6356// Audit Standards Configuration
6357// =============================================================================
6358
6359/// Audit standards framework configuration for generating standards-compliant audit data.
6360///
6361/// Supports ISA (International Standards on Auditing) and PCAOB standards:
6362/// - ISA 200-720: Complete coverage of audit standards
6363/// - ISA 520: Analytical Procedures
6364/// - ISA 505: External Confirmations
6365/// - ISA 700/705/706/701: Audit Reports
6366/// - PCAOB AS 2201: ICFR Auditing
6367#[derive(Debug, Clone, Serialize, Deserialize, Default)]
6368pub struct AuditStandardsConfig {
6369    /// Enable audit standards generation
6370    #[serde(default)]
6371    pub enabled: bool,
6372
6373    /// ISA compliance configuration
6374    #[serde(default)]
6375    pub isa_compliance: IsaComplianceConfig,
6376
6377    /// Analytical procedures configuration (ISA 520)
6378    #[serde(default)]
6379    pub analytical_procedures: AnalyticalProceduresConfig,
6380
6381    /// External confirmations configuration (ISA 505)
6382    #[serde(default)]
6383    pub confirmations: ConfirmationsConfig,
6384
6385    /// Audit opinion configuration (ISA 700/705/706/701)
6386    #[serde(default)]
6387    pub opinion: AuditOpinionConfig,
6388
6389    /// Generate complete audit trail with traceability
6390    #[serde(default)]
6391    pub generate_audit_trail: bool,
6392
6393    /// SOX 302/404 compliance configuration
6394    #[serde(default)]
6395    pub sox: SoxComplianceConfig,
6396
6397    /// PCAOB-specific configuration
6398    #[serde(default)]
6399    pub pcaob: PcaobConfig,
6400}
6401
6402/// ISA compliance level configuration.
6403#[derive(Debug, Clone, Serialize, Deserialize)]
6404pub struct IsaComplianceConfig {
6405    /// Enable ISA compliance tracking
6406    #[serde(default)]
6407    pub enabled: bool,
6408
6409    /// Compliance level: "basic", "standard", "comprehensive"
6410    #[serde(default = "default_compliance_level")]
6411    pub compliance_level: String,
6412
6413    /// Generate ISA requirement mappings
6414    #[serde(default = "default_true")]
6415    pub generate_isa_mappings: bool,
6416
6417    /// Generate ISA coverage summary
6418    #[serde(default = "default_true")]
6419    pub generate_coverage_summary: bool,
6420
6421    /// Include PCAOB standard mappings (for dual framework)
6422    #[serde(default)]
6423    pub include_pcaob: bool,
6424
6425    /// Framework to use: "isa", "pcaob", "dual"
6426    #[serde(default = "default_audit_framework")]
6427    pub framework: String,
6428}
6429
6430fn default_compliance_level() -> String {
6431    "standard".to_string()
6432}
6433
6434fn default_audit_framework() -> String {
6435    "isa".to_string()
6436}
6437
6438impl Default for IsaComplianceConfig {
6439    fn default() -> Self {
6440        Self {
6441            enabled: false,
6442            compliance_level: default_compliance_level(),
6443            generate_isa_mappings: true,
6444            generate_coverage_summary: true,
6445            include_pcaob: false,
6446            framework: default_audit_framework(),
6447        }
6448    }
6449}
6450
6451/// Analytical procedures configuration (ISA 520).
6452#[derive(Debug, Clone, Serialize, Deserialize)]
6453pub struct AnalyticalProceduresConfig {
6454    /// Enable analytical procedures generation
6455    #[serde(default)]
6456    pub enabled: bool,
6457
6458    /// Number of procedures per account/area
6459    #[serde(default = "default_procedures_per_account")]
6460    pub procedures_per_account: usize,
6461
6462    /// Probability of variance exceeding threshold
6463    #[serde(default = "default_variance_probability")]
6464    pub variance_probability: f64,
6465
6466    /// Include variance investigations
6467    #[serde(default = "default_true")]
6468    pub generate_investigations: bool,
6469
6470    /// Include financial ratio analysis
6471    #[serde(default = "default_true")]
6472    pub include_ratio_analysis: bool,
6473}
6474
6475fn default_procedures_per_account() -> usize {
6476    3
6477}
6478
6479fn default_variance_probability() -> f64 {
6480    0.20
6481}
6482
6483impl Default for AnalyticalProceduresConfig {
6484    fn default() -> Self {
6485        Self {
6486            enabled: false,
6487            procedures_per_account: default_procedures_per_account(),
6488            variance_probability: default_variance_probability(),
6489            generate_investigations: true,
6490            include_ratio_analysis: true,
6491        }
6492    }
6493}
6494
6495/// External confirmations configuration (ISA 505).
6496#[derive(Debug, Clone, Serialize, Deserialize)]
6497pub struct ConfirmationsConfig {
6498    /// Enable confirmation generation
6499    #[serde(default)]
6500    pub enabled: bool,
6501
6502    /// Number of confirmations to generate
6503    #[serde(default = "default_confirmation_count")]
6504    pub confirmation_count: usize,
6505
6506    /// Positive response rate
6507    #[serde(default = "default_positive_response_rate")]
6508    pub positive_response_rate: f64,
6509
6510    /// Exception rate (responses with differences)
6511    #[serde(default = "default_exception_rate_confirm")]
6512    pub exception_rate: f64,
6513
6514    /// Non-response rate
6515    #[serde(default = "default_non_response_rate")]
6516    pub non_response_rate: f64,
6517
6518    /// Generate alternative procedures for non-responses
6519    #[serde(default = "default_true")]
6520    pub generate_alternative_procedures: bool,
6521}
6522
6523fn default_confirmation_count() -> usize {
6524    50
6525}
6526
6527fn default_positive_response_rate() -> f64 {
6528    0.85
6529}
6530
6531fn default_exception_rate_confirm() -> f64 {
6532    0.10
6533}
6534
6535fn default_non_response_rate() -> f64 {
6536    0.05
6537}
6538
6539impl Default for ConfirmationsConfig {
6540    fn default() -> Self {
6541        Self {
6542            enabled: false,
6543            confirmation_count: default_confirmation_count(),
6544            positive_response_rate: default_positive_response_rate(),
6545            exception_rate: default_exception_rate_confirm(),
6546            non_response_rate: default_non_response_rate(),
6547            generate_alternative_procedures: true,
6548        }
6549    }
6550}
6551
6552/// Audit opinion configuration (ISA 700/705/706/701).
6553#[derive(Debug, Clone, Serialize, Deserialize)]
6554pub struct AuditOpinionConfig {
6555    /// Enable audit opinion generation
6556    #[serde(default)]
6557    pub enabled: bool,
6558
6559    /// Generate Key Audit Matters (KAM) / Critical Audit Matters (CAM)
6560    #[serde(default = "default_true")]
6561    pub generate_kam: bool,
6562
6563    /// Average number of KAMs/CAMs per opinion
6564    #[serde(default = "default_kam_count")]
6565    pub average_kam_count: usize,
6566
6567    /// Rate of modified opinions
6568    #[serde(default = "default_modified_opinion_rate")]
6569    pub modified_opinion_rate: f64,
6570
6571    /// Include emphasis of matter paragraphs
6572    #[serde(default)]
6573    pub include_emphasis_of_matter: bool,
6574
6575    /// Include going concern conclusions
6576    #[serde(default = "default_true")]
6577    pub include_going_concern: bool,
6578}
6579
6580fn default_kam_count() -> usize {
6581    3
6582}
6583
6584fn default_modified_opinion_rate() -> f64 {
6585    0.05
6586}
6587
6588impl Default for AuditOpinionConfig {
6589    fn default() -> Self {
6590        Self {
6591            enabled: false,
6592            generate_kam: true,
6593            average_kam_count: default_kam_count(),
6594            modified_opinion_rate: default_modified_opinion_rate(),
6595            include_emphasis_of_matter: false,
6596            include_going_concern: true,
6597        }
6598    }
6599}
6600
6601/// SOX compliance configuration (Sections 302/404).
6602#[derive(Debug, Clone, Serialize, Deserialize)]
6603pub struct SoxComplianceConfig {
6604    /// Enable SOX compliance generation
6605    #[serde(default)]
6606    pub enabled: bool,
6607
6608    /// Generate Section 302 CEO/CFO certifications
6609    #[serde(default = "default_true")]
6610    pub generate_302_certifications: bool,
6611
6612    /// Generate Section 404 ICFR assessments
6613    #[serde(default = "default_true")]
6614    pub generate_404_assessments: bool,
6615
6616    /// Materiality threshold for SOX testing
6617    #[serde(default = "default_sox_materiality_threshold")]
6618    pub materiality_threshold: f64,
6619
6620    /// Rate of material weaknesses
6621    #[serde(default = "default_material_weakness_rate")]
6622    pub material_weakness_rate: f64,
6623
6624    /// Rate of significant deficiencies
6625    #[serde(default = "default_significant_deficiency_rate")]
6626    pub significant_deficiency_rate: f64,
6627}
6628
6629fn default_material_weakness_rate() -> f64 {
6630    0.02
6631}
6632
6633fn default_significant_deficiency_rate() -> f64 {
6634    0.08
6635}
6636
6637impl Default for SoxComplianceConfig {
6638    fn default() -> Self {
6639        Self {
6640            enabled: false,
6641            generate_302_certifications: true,
6642            generate_404_assessments: true,
6643            materiality_threshold: default_sox_materiality_threshold(),
6644            material_weakness_rate: default_material_weakness_rate(),
6645            significant_deficiency_rate: default_significant_deficiency_rate(),
6646        }
6647    }
6648}
6649
6650/// PCAOB-specific configuration.
6651#[derive(Debug, Clone, Serialize, Deserialize)]
6652pub struct PcaobConfig {
6653    /// Enable PCAOB-specific elements
6654    #[serde(default)]
6655    pub enabled: bool,
6656
6657    /// Treat as PCAOB audit (vs ISA-only)
6658    #[serde(default)]
6659    pub is_pcaob_audit: bool,
6660
6661    /// Generate Critical Audit Matters (CAM)
6662    #[serde(default = "default_true")]
6663    pub generate_cam: bool,
6664
6665    /// Include ICFR opinion (for integrated audits)
6666    #[serde(default)]
6667    pub include_icfr_opinion: bool,
6668
6669    /// Generate PCAOB-ISA standard mappings
6670    #[serde(default)]
6671    pub generate_standard_mappings: bool,
6672}
6673
6674impl Default for PcaobConfig {
6675    fn default() -> Self {
6676        Self {
6677            enabled: false,
6678            is_pcaob_audit: false,
6679            generate_cam: true,
6680            include_icfr_opinion: false,
6681            generate_standard_mappings: false,
6682        }
6683    }
6684}
6685
6686// =============================================================================
6687// Advanced Distribution Configuration
6688// =============================================================================
6689
6690/// Advanced distribution configuration for realistic data generation.
6691///
6692/// This section enables sophisticated distribution models including:
6693/// - Mixture models (multi-modal distributions)
6694/// - Cross-field correlations
6695/// - Conditional distributions
6696/// - Regime changes and economic cycles
6697/// - Statistical validation
6698#[derive(Debug, Clone, Serialize, Deserialize, Default)]
6699pub struct AdvancedDistributionConfig {
6700    /// Enable advanced distribution features.
6701    #[serde(default)]
6702    pub enabled: bool,
6703
6704    /// Mixture model configuration for amounts.
6705    #[serde(default)]
6706    pub amounts: MixtureDistributionSchemaConfig,
6707
6708    /// Cross-field correlation configuration.
6709    #[serde(default)]
6710    pub correlations: CorrelationSchemaConfig,
6711
6712    /// Conditional distribution configurations.
6713    #[serde(default)]
6714    pub conditional: Vec<ConditionalDistributionSchemaConfig>,
6715
6716    /// Regime change configuration.
6717    #[serde(default)]
6718    pub regime_changes: RegimeChangeSchemaConfig,
6719
6720    /// Industry-specific distribution profile.
6721    ///
6722    /// Accepts either the legacy bare-name form (`industry_profile: retail`) or
6723    /// the SP3 extended struct form with optional `priors` sub-section.
6724    #[serde(default)]
6725    pub industry_profile: Option<IndustryProfileField>,
6726
6727    /// Statistical validation configuration.
6728    #[serde(default)]
6729    pub validation: StatisticalValidationSchemaConfig,
6730
6731    /// v3.4.4+ — Pareto heavy-tailed distribution for monetary amounts.
6732    /// When set and `enabled`, overrides `amounts` mixture model for the
6733    /// non-fraud amount-sampling path (fraud patterns remain orthogonal).
6734    /// Useful for capex, strategic contracts, and any domain where a small
6735    /// number of very large values dominates the tail.
6736    #[serde(default)]
6737    pub pareto: Option<ParetoSchemaConfig>,
6738}
6739
6740/// Schema-level Pareto distribution configuration (v3.4.4+).
6741///
6742/// Thin wrapper around `datasynth_core::distributions::ParetoConfig` that
6743/// adds an `enabled` gate and serde-friendly field names.
6744#[derive(Debug, Clone, Serialize, Deserialize)]
6745pub struct ParetoSchemaConfig {
6746    /// Enable Pareto sampling. When true, replaces the `amounts` mixture
6747    /// model for the non-fraud amount-sampling path.
6748    #[serde(default)]
6749    pub enabled: bool,
6750
6751    /// Shape parameter (tail heaviness). Lower values → heavier tail.
6752    /// Typical range: 1.5-3.0. Default: 2.0.
6753    #[serde(default = "default_pareto_alpha")]
6754    pub alpha: f64,
6755
6756    /// Scale / minimum value. All samples are >= x_min.
6757    /// Typical: 1000 (for capex) to 100,000 (for large contracts). Default: 100.
6758    #[serde(default = "default_pareto_x_min")]
6759    pub x_min: f64,
6760
6761    /// Optional upper clamp. `None` = unbounded (recommended for realistic
6762    /// heavy tails).
6763    #[serde(default)]
6764    pub max_value: Option<f64>,
6765
6766    /// Decimal places for rounding. Default: 2.
6767    #[serde(default = "default_pareto_decimal_places")]
6768    pub decimal_places: u8,
6769}
6770
6771fn default_pareto_alpha() -> f64 {
6772    2.0
6773}
6774
6775fn default_pareto_x_min() -> f64 {
6776    100.0
6777}
6778
6779fn default_pareto_decimal_places() -> u8 {
6780    2
6781}
6782
6783impl Default for ParetoSchemaConfig {
6784    fn default() -> Self {
6785        Self {
6786            enabled: false,
6787            alpha: default_pareto_alpha(),
6788            x_min: default_pareto_x_min(),
6789            max_value: None,
6790            decimal_places: default_pareto_decimal_places(),
6791        }
6792    }
6793}
6794
6795impl ParetoSchemaConfig {
6796    /// Convert this schema config into a `datasynth_core::distributions::ParetoConfig`.
6797    pub fn to_core_config(&self) -> datasynth_core::distributions::ParetoConfig {
6798        datasynth_core::distributions::ParetoConfig {
6799            alpha: self.alpha,
6800            x_min: self.x_min,
6801            max_value: self.max_value,
6802            decimal_places: self.decimal_places,
6803        }
6804    }
6805}
6806
6807/// Industry profile types for pre-configured distribution settings.
6808#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
6809#[serde(rename_all = "snake_case")]
6810pub enum IndustryProfileType {
6811    /// Retail industry profile (POS sales, inventory, seasonal)
6812    Retail,
6813    /// Manufacturing industry profile (raw materials, maintenance, capital)
6814    Manufacturing,
6815    /// Financial services profile (wire transfers, ACH, fee income)
6816    FinancialServices,
6817    /// Healthcare profile (claims, procedures, supplies)
6818    Healthcare,
6819    /// Technology profile (subscriptions, services, R&D)
6820    Technology,
6821}
6822
6823impl IndustryProfileType {
6824    /// Return the lowercase ASCII slug used for bundled-priors filenames.
6825    ///
6826    /// E.g. `IndustryProfileType::FinancialServices => "financial_services"`.
6827    pub fn slug(self) -> &'static str {
6828        match self {
6829            Self::Retail => "retail",
6830            Self::Manufacturing => "manufacturing",
6831            Self::FinancialServices => "financial_services",
6832            // Matches SP2's bundle naming (corpus uses "Health", not "Healthcare").
6833            Self::Healthcare => "health",
6834            Self::Technology => "technology",
6835        }
6836    }
6837}
6838
6839// ---------------------------------------------------------------------------
6840// SP3 — IndustryProfileField: backward-compatible wrapper
6841// ---------------------------------------------------------------------------
6842
6843/// The value of `distributions.industry_profile` in config YAML.
6844///
6845/// Accepts both the legacy bare-name form:
6846/// ```yaml
6847/// distributions:
6848///   industry_profile: retail
6849/// ```
6850/// and the new SP3 extended struct form with optional `priors` sub-section:
6851/// ```yaml
6852/// distributions:
6853///   industry_profile:
6854///     name: retail
6855///     priors:
6856///       enabled: true
6857///       source: bundled
6858/// ```
6859#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6860#[serde(untagged)]
6861pub enum IndustryProfileField {
6862    /// Legacy form: `industry_profile: retail`.
6863    Name(IndustryProfileType),
6864    /// New form: `industry_profile: { name: retail, priors: { ... } }`.
6865    Full(IndustryProfileFull),
6866}
6867
6868impl IndustryProfileField {
6869    /// Return the bare `IndustryProfileType` regardless of which form was used.
6870    pub fn profile_type(&self) -> IndustryProfileType {
6871        match self {
6872            IndustryProfileField::Name(t) => *t,
6873            IndustryProfileField::Full(f) => f.name,
6874        }
6875    }
6876
6877    /// Return the optional `priors` sub-section, if present.
6878    pub fn priors(&self) -> Option<&IndustryPriorsConfig> {
6879        match self {
6880            IndustryProfileField::Name(_) => None,
6881            IndustryProfileField::Full(f) => f.priors.as_ref(),
6882        }
6883    }
6884}
6885
6886/// Extended industry profile struct used when `priors` is needed (SP3).
6887#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6888pub struct IndustryProfileFull {
6889    /// The industry variant (same values as the bare-name legacy form).
6890    pub name: IndustryProfileType,
6891    /// Optional SP3 priors sub-section.
6892    #[serde(default, skip_serializing_if = "Option::is_none")]
6893    pub priors: Option<IndustryPriorsConfig>,
6894}
6895
6896/// SP3 — configuration for industry-prior injection.
6897///
6898/// When `enabled = true`, the generator uses pre-baked statistical priors
6899/// for the given industry. `source` selects whether to use bundled priors or
6900/// load from a user-supplied file (requires `path`).
6901#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
6902pub struct IndustryPriorsConfig {
6903    /// Enable prior injection. When false the rest of the struct is ignored.
6904    #[serde(default)]
6905    pub enabled: bool,
6906
6907    /// Where to load the priors from.
6908    #[serde(default)]
6909    pub source: PriorsSource,
6910
6911    /// Path to the priors file. Required when `source = file`.
6912    #[serde(default, skip_serializing_if = "Option::is_none")]
6913    pub path: Option<std::path::PathBuf>,
6914
6915    /// SP3.4 — enable online velocity-rule calibrator. Adds per-line overhead
6916    /// when `true`; default `false` keeps v5.12/v5.13-without-calibration behavior.
6917    #[serde(default)]
6918    pub velocity_calibration: bool,
6919}
6920
6921/// Source of industry priors.
6922#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)]
6923#[serde(rename_all = "lowercase")]
6924pub enum PriorsSource {
6925    /// Use the priors bundled with the binary (default).
6926    #[default]
6927    Bundled,
6928    /// Load priors from a user-supplied file (requires `path`).
6929    File,
6930}
6931
6932/// Mixture model distribution configuration.
6933#[derive(Debug, Clone, Serialize, Deserialize)]
6934pub struct MixtureDistributionSchemaConfig {
6935    /// Enable mixture model for amount generation.
6936    #[serde(default)]
6937    pub enabled: bool,
6938
6939    /// Distribution type: "gaussian" or "lognormal".
6940    #[serde(default = "default_mixture_type")]
6941    pub distribution_type: MixtureDistributionType,
6942
6943    /// Mixture components with weights.
6944    #[serde(default)]
6945    pub components: Vec<MixtureComponentConfig>,
6946
6947    /// Minimum value constraint.
6948    #[serde(default = "default_min_amount")]
6949    pub min_value: f64,
6950
6951    /// Maximum value constraint (optional).
6952    #[serde(default)]
6953    pub max_value: Option<f64>,
6954
6955    /// Decimal places for rounding.
6956    #[serde(default = "default_decimal_places")]
6957    pub decimal_places: u8,
6958}
6959
6960fn default_mixture_type() -> MixtureDistributionType {
6961    MixtureDistributionType::LogNormal
6962}
6963
6964fn default_min_amount() -> f64 {
6965    0.01
6966}
6967
6968fn default_decimal_places() -> u8 {
6969    2
6970}
6971
6972impl Default for MixtureDistributionSchemaConfig {
6973    fn default() -> Self {
6974        Self {
6975            enabled: false,
6976            distribution_type: MixtureDistributionType::LogNormal,
6977            components: Vec::new(),
6978            min_value: 0.01,
6979            max_value: None,
6980            decimal_places: 2,
6981        }
6982    }
6983}
6984
6985impl MixtureDistributionSchemaConfig {
6986    /// Convert this schema-level config into a `LogNormalMixtureConfig`
6987    /// suitable for `LogNormalMixtureSampler::new`. Returns `None` if there
6988    /// are no components (schema default is an empty list, which cannot
6989    /// drive a sampler).
6990    ///
6991    /// Callers should gate this with `self.enabled` before invoking.
6992    pub fn to_log_normal_config(
6993        &self,
6994    ) -> Option<datasynth_core::distributions::LogNormalMixtureConfig> {
6995        if self.components.is_empty() {
6996            return None;
6997        }
6998        Some(datasynth_core::distributions::LogNormalMixtureConfig {
6999            components: self
7000                .components
7001                .iter()
7002                .map(|c| match &c.label {
7003                    Some(lbl) => datasynth_core::distributions::LogNormalComponent::with_label(
7004                        c.weight,
7005                        c.mu,
7006                        c.sigma,
7007                        lbl.clone(),
7008                    ),
7009                    None => datasynth_core::distributions::LogNormalComponent::new(
7010                        c.weight, c.mu, c.sigma,
7011                    ),
7012                })
7013                .collect(),
7014            min_value: self.min_value,
7015            max_value: self.max_value,
7016            decimal_places: self.decimal_places,
7017        })
7018    }
7019
7020    /// Convert this schema-level config into a `GaussianMixtureConfig`.
7021    /// Returns `None` if there are no components.
7022    pub fn to_gaussian_config(
7023        &self,
7024    ) -> Option<datasynth_core::distributions::GaussianMixtureConfig> {
7025        if self.components.is_empty() {
7026            return None;
7027        }
7028        Some(datasynth_core::distributions::GaussianMixtureConfig {
7029            components: self
7030                .components
7031                .iter()
7032                .map(|c| {
7033                    datasynth_core::distributions::GaussianComponent::new(c.weight, c.mu, c.sigma)
7034                })
7035                .collect(),
7036            allow_negative: true,
7037            min_value: Some(self.min_value),
7038            max_value: self.max_value,
7039        })
7040    }
7041}
7042
7043/// Mixture distribution type.
7044#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7045#[serde(rename_all = "snake_case")]
7046pub enum MixtureDistributionType {
7047    /// Gaussian (normal) mixture
7048    Gaussian,
7049    /// Log-normal mixture (for positive amounts)
7050    #[default]
7051    LogNormal,
7052}
7053
7054/// Configuration for a single mixture component.
7055#[derive(Debug, Clone, Serialize, Deserialize)]
7056pub struct MixtureComponentConfig {
7057    /// Weight of this component (must sum to 1.0 across all components).
7058    pub weight: f64,
7059
7060    /// Location parameter (mean for Gaussian, mu for log-normal).
7061    pub mu: f64,
7062
7063    /// Scale parameter (std dev for Gaussian, sigma for log-normal).
7064    pub sigma: f64,
7065
7066    /// Optional label for this component (e.g., "routine", "significant", "major").
7067    #[serde(default)]
7068    pub label: Option<String>,
7069}
7070
7071/// Cross-field correlation configuration.
7072#[derive(Debug, Clone, Serialize, Deserialize)]
7073pub struct CorrelationSchemaConfig {
7074    /// Enable correlation modeling.
7075    #[serde(default)]
7076    pub enabled: bool,
7077
7078    /// Copula type for dependency modeling.
7079    #[serde(default)]
7080    pub copula_type: CopulaSchemaType,
7081
7082    /// Field definitions for correlation.
7083    #[serde(default)]
7084    pub fields: Vec<CorrelatedFieldConfig>,
7085
7086    /// Correlation matrix (upper triangular, row-major).
7087    /// For n fields, this should have n*(n-1)/2 values.
7088    #[serde(default)]
7089    pub matrix: Vec<f64>,
7090
7091    /// Expected correlations for validation.
7092    #[serde(default)]
7093    pub expected_correlations: Vec<ExpectedCorrelationConfig>,
7094}
7095
7096impl Default for CorrelationSchemaConfig {
7097    fn default() -> Self {
7098        Self {
7099            enabled: false,
7100            copula_type: CopulaSchemaType::Gaussian,
7101            fields: Vec::new(),
7102            matrix: Vec::new(),
7103            expected_correlations: Vec::new(),
7104        }
7105    }
7106}
7107
7108impl CorrelationSchemaConfig {
7109    /// v3.5.4+: extract the correlation for a specific field pair from
7110    /// either the upper-triangular flat matrix (n*(n-1)/2 values) or a
7111    /// full symmetric n×n matrix (n*n values). Returns `None` when the
7112    /// named fields aren't both present or the matrix shape doesn't
7113    /// match.
7114    pub fn correlation_between(&self, field_a: &str, field_b: &str) -> Option<f64> {
7115        let idx_a = self.fields.iter().position(|f| f.name == field_a)?;
7116        let idx_b = self.fields.iter().position(|f| f.name == field_b)?;
7117        if idx_a == idx_b {
7118            return Some(1.0);
7119        }
7120        let (i, j) = if idx_a < idx_b {
7121            (idx_a, idx_b)
7122        } else {
7123            (idx_b, idx_a)
7124        };
7125        let n = self.fields.len();
7126        // Full n×n symmetric matrix?
7127        if self.matrix.len() == n * n {
7128            return self.matrix.get(idx_a * n + idx_b).copied();
7129        }
7130        // Upper triangular flat (row-major, excluding diagonal)?
7131        let expected_tri = n * (n - 1) / 2;
7132        if self.matrix.len() == expected_tri {
7133            // Row i, col j where j > i: flat index is
7134            //   sum_{k=0..i}((n-1-k)) + (j - i - 1)
7135            // = i*(n-1) - i*(i-1)/2 + (j - i - 1)
7136            let flat = i * (n - 1) - i * (i.saturating_sub(1)) / 2 + (j - i - 1);
7137            return self.matrix.get(flat).copied();
7138        }
7139        None
7140    }
7141
7142    /// Convert this schema config to a core `CopulaConfig` when the
7143    /// declared field pair `(field_a, field_b)` has a valid correlation
7144    /// entry. Returns `None` when disabled, fields missing, or matrix
7145    /// malformed.
7146    pub fn to_core_config_for_pair(
7147        &self,
7148        field_a: &str,
7149        field_b: &str,
7150    ) -> Option<datasynth_core::distributions::CopulaConfig> {
7151        if !self.enabled {
7152            return None;
7153        }
7154        let rho = self.correlation_between(field_a, field_b)?;
7155        use datasynth_core::distributions::{CopulaConfig, CopulaType};
7156        let copula_type = match self.copula_type {
7157            CopulaSchemaType::Gaussian => CopulaType::Gaussian,
7158            CopulaSchemaType::Clayton => CopulaType::Clayton,
7159            CopulaSchemaType::Gumbel => CopulaType::Gumbel,
7160            CopulaSchemaType::Frank => CopulaType::Frank,
7161            CopulaSchemaType::StudentT => CopulaType::StudentT,
7162        };
7163        // Gaussian / StudentT interpret theta as correlation; others
7164        // as a shape parameter. Minimal v3.5.4 only wires Gaussian in
7165        // the runtime, but the converter is general so follow-ups can
7166        // light up the other copulas.
7167        let theta = rho.clamp(-0.999, 0.999);
7168        Some(CopulaConfig {
7169            copula_type,
7170            theta,
7171            degrees_of_freedom: 4.0,
7172        })
7173    }
7174}
7175
7176/// Copula type for dependency modeling.
7177#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7178#[serde(rename_all = "snake_case")]
7179pub enum CopulaSchemaType {
7180    /// Gaussian copula (symmetric, no tail dependence)
7181    #[default]
7182    Gaussian,
7183    /// Clayton copula (lower tail dependence)
7184    Clayton,
7185    /// Gumbel copula (upper tail dependence)
7186    Gumbel,
7187    /// Frank copula (symmetric, no tail dependence)
7188    Frank,
7189    /// Student-t copula (both tail dependencies)
7190    StudentT,
7191}
7192
7193/// Configuration for a correlated field.
7194#[derive(Debug, Clone, Serialize, Deserialize)]
7195pub struct CorrelatedFieldConfig {
7196    /// Field name.
7197    pub name: String,
7198
7199    /// Marginal distribution type.
7200    #[serde(default)]
7201    pub distribution: MarginalDistributionConfig,
7202}
7203
7204/// Marginal distribution configuration.
7205#[derive(Debug, Clone, Serialize, Deserialize)]
7206#[serde(tag = "type", rename_all = "snake_case")]
7207pub enum MarginalDistributionConfig {
7208    /// Normal distribution.
7209    Normal {
7210        /// Mean
7211        mu: f64,
7212        /// Standard deviation
7213        sigma: f64,
7214    },
7215    /// Log-normal distribution.
7216    LogNormal {
7217        /// Location parameter
7218        mu: f64,
7219        /// Scale parameter
7220        sigma: f64,
7221    },
7222    /// Uniform distribution.
7223    Uniform {
7224        /// Minimum value
7225        min: f64,
7226        /// Maximum value
7227        max: f64,
7228    },
7229    /// Discrete uniform distribution.
7230    DiscreteUniform {
7231        /// Minimum integer value
7232        min: i32,
7233        /// Maximum integer value
7234        max: i32,
7235    },
7236}
7237
7238impl Default for MarginalDistributionConfig {
7239    fn default() -> Self {
7240        Self::Normal {
7241            mu: 0.0,
7242            sigma: 1.0,
7243        }
7244    }
7245}
7246
7247/// Expected correlation for validation.
7248#[derive(Debug, Clone, Serialize, Deserialize)]
7249pub struct ExpectedCorrelationConfig {
7250    /// First field name.
7251    pub field1: String,
7252    /// Second field name.
7253    pub field2: String,
7254    /// Expected correlation coefficient.
7255    pub expected_r: f64,
7256    /// Acceptable tolerance.
7257    #[serde(default = "default_correlation_tolerance")]
7258    pub tolerance: f64,
7259}
7260
7261fn default_correlation_tolerance() -> f64 {
7262    0.10
7263}
7264
7265/// Conditional distribution configuration.
7266#[derive(Debug, Clone, Serialize, Deserialize)]
7267pub struct ConditionalDistributionSchemaConfig {
7268    /// Output field name to generate.
7269    pub output_field: String,
7270
7271    /// Input field name that conditions the distribution.
7272    pub input_field: String,
7273
7274    /// Breakpoints defining distribution changes.
7275    #[serde(default)]
7276    pub breakpoints: Vec<ConditionalBreakpointConfig>,
7277
7278    /// Default distribution when below all breakpoints.
7279    #[serde(default)]
7280    pub default_distribution: ConditionalDistributionParamsConfig,
7281
7282    /// Minimum output value constraint.
7283    #[serde(default)]
7284    pub min_value: Option<f64>,
7285
7286    /// Maximum output value constraint.
7287    #[serde(default)]
7288    pub max_value: Option<f64>,
7289
7290    /// Decimal places for output rounding.
7291    #[serde(default = "default_decimal_places")]
7292    pub decimal_places: u8,
7293}
7294
7295/// Breakpoint for conditional distribution.
7296#[derive(Debug, Clone, Serialize, Deserialize)]
7297pub struct ConditionalBreakpointConfig {
7298    /// Input value threshold.
7299    pub threshold: f64,
7300
7301    /// Distribution to use when input >= threshold.
7302    pub distribution: ConditionalDistributionParamsConfig,
7303}
7304
7305impl ConditionalDistributionSchemaConfig {
7306    /// Convert this schema config into a core
7307    /// `ConditionalDistributionConfig` suitable for
7308    /// `ConditionalSampler::new`. v3.5.3+.
7309    pub fn to_core_config(&self) -> datasynth_core::distributions::ConditionalDistributionConfig {
7310        use datasynth_core::distributions::{
7311            Breakpoint, ConditionalDistributionConfig, ConditionalDistributionParams,
7312        };
7313
7314        let default_distribution = convert_conditional_params(&self.default_distribution);
7315        let breakpoints: Vec<Breakpoint> = self
7316            .breakpoints
7317            .iter()
7318            .map(|bp| Breakpoint {
7319                threshold: bp.threshold,
7320                distribution: convert_conditional_params(&bp.distribution),
7321            })
7322            .collect();
7323
7324        // Use a sentinel default_distribution when the schema default is
7325        // its factory default (Fixed { value: 0.0 })  and we have
7326        // breakpoints — we don't want to clobber data for values below
7327        // the first breakpoint.
7328        let final_default = if breakpoints.is_empty() {
7329            default_distribution
7330        } else {
7331            match default_distribution {
7332                ConditionalDistributionParams::Fixed { value: 0.0 } => {
7333                    // Reuse the first breakpoint's distribution as the
7334                    // default to avoid surprising zeros.
7335                    breakpoints[0].distribution.clone()
7336                }
7337                other => other,
7338            }
7339        };
7340
7341        ConditionalDistributionConfig {
7342            output_field: self.output_field.clone(),
7343            input_field: self.input_field.clone(),
7344            breakpoints,
7345            default_distribution: final_default,
7346            min_value: self.min_value,
7347            max_value: self.max_value,
7348            decimal_places: self.decimal_places,
7349        }
7350    }
7351}
7352
7353fn convert_conditional_params(
7354    p: &ConditionalDistributionParamsConfig,
7355) -> datasynth_core::distributions::ConditionalDistributionParams {
7356    use datasynth_core::distributions::ConditionalDistributionParams as Core;
7357    match p {
7358        ConditionalDistributionParamsConfig::Fixed { value } => Core::Fixed { value: *value },
7359        ConditionalDistributionParamsConfig::Normal { mu, sigma } => Core::Normal {
7360            mu: *mu,
7361            sigma: *sigma,
7362        },
7363        ConditionalDistributionParamsConfig::LogNormal { mu, sigma } => Core::LogNormal {
7364            mu: *mu,
7365            sigma: *sigma,
7366        },
7367        ConditionalDistributionParamsConfig::Uniform { min, max } => Core::Uniform {
7368            min: *min,
7369            max: *max,
7370        },
7371        ConditionalDistributionParamsConfig::Beta {
7372            alpha,
7373            beta,
7374            min,
7375            max,
7376        } => Core::Beta {
7377            alpha: *alpha,
7378            beta: *beta,
7379            min: *min,
7380            max: *max,
7381        },
7382        ConditionalDistributionParamsConfig::Discrete { values, weights } => Core::Discrete {
7383            values: values.clone(),
7384            weights: weights.clone(),
7385        },
7386    }
7387}
7388
7389/// Distribution parameters for conditional distributions.
7390#[derive(Debug, Clone, Serialize, Deserialize)]
7391#[serde(tag = "type", rename_all = "snake_case")]
7392pub enum ConditionalDistributionParamsConfig {
7393    /// Fixed value.
7394    Fixed {
7395        /// The fixed value
7396        value: f64,
7397    },
7398    /// Normal distribution.
7399    Normal {
7400        /// Mean
7401        mu: f64,
7402        /// Standard deviation
7403        sigma: f64,
7404    },
7405    /// Log-normal distribution.
7406    LogNormal {
7407        /// Location parameter
7408        mu: f64,
7409        /// Scale parameter
7410        sigma: f64,
7411    },
7412    /// Uniform distribution.
7413    Uniform {
7414        /// Minimum
7415        min: f64,
7416        /// Maximum
7417        max: f64,
7418    },
7419    /// Beta distribution (scaled).
7420    Beta {
7421        /// Alpha parameter
7422        alpha: f64,
7423        /// Beta parameter
7424        beta: f64,
7425        /// Minimum output value
7426        min: f64,
7427        /// Maximum output value
7428        max: f64,
7429    },
7430    /// Discrete values with weights.
7431    Discrete {
7432        /// Possible values
7433        values: Vec<f64>,
7434        /// Weights (should sum to 1.0)
7435        weights: Vec<f64>,
7436    },
7437}
7438
7439impl Default for ConditionalDistributionParamsConfig {
7440    fn default() -> Self {
7441        Self::Normal {
7442            mu: 0.0,
7443            sigma: 1.0,
7444        }
7445    }
7446}
7447
7448/// Regime change configuration.
7449#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7450pub struct RegimeChangeSchemaConfig {
7451    /// Enable regime change modeling.
7452    #[serde(default)]
7453    pub enabled: bool,
7454
7455    /// List of regime changes.
7456    #[serde(default)]
7457    pub changes: Vec<RegimeChangeEventConfig>,
7458
7459    /// Economic cycle configuration.
7460    #[serde(default)]
7461    pub economic_cycle: Option<EconomicCycleSchemaConfig>,
7462
7463    /// Parameter drift configurations.
7464    #[serde(default)]
7465    pub parameter_drifts: Vec<ParameterDriftSchemaConfig>,
7466}
7467
7468/// A single regime change event.
7469#[derive(Debug, Clone, Serialize, Deserialize)]
7470pub struct RegimeChangeEventConfig {
7471    /// Date when the change occurs (ISO 8601 format).
7472    pub date: String,
7473
7474    /// Type of regime change.
7475    pub change_type: RegimeChangeTypeConfig,
7476
7477    /// Description of the change.
7478    #[serde(default)]
7479    pub description: Option<String>,
7480
7481    /// Effects of this regime change.
7482    #[serde(default)]
7483    pub effects: Vec<RegimeEffectConfig>,
7484}
7485
7486/// Type of regime change.
7487#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
7488#[serde(rename_all = "snake_case")]
7489pub enum RegimeChangeTypeConfig {
7490    /// Acquisition - sudden volume and amount increase
7491    Acquisition,
7492    /// Divestiture - sudden volume and amount decrease
7493    Divestiture,
7494    /// Price increase - amounts increase
7495    PriceIncrease,
7496    /// Price decrease - amounts decrease
7497    PriceDecrease,
7498    /// New product launch - volume ramp-up
7499    ProductLaunch,
7500    /// Product discontinuation - volume ramp-down
7501    ProductDiscontinuation,
7502    /// Policy change - affects patterns
7503    PolicyChange,
7504    /// Competitor entry - market disruption
7505    CompetitorEntry,
7506    /// Custom effect
7507    Custom,
7508}
7509
7510/// Effect of a regime change on a specific field.
7511#[derive(Debug, Clone, Serialize, Deserialize)]
7512pub struct RegimeEffectConfig {
7513    /// Field being affected.
7514    pub field: String,
7515
7516    /// Multiplier to apply (1.0 = no change, 1.5 = 50% increase).
7517    pub multiplier: f64,
7518}
7519
7520/// Economic cycle configuration.
7521#[derive(Debug, Clone, Serialize, Deserialize)]
7522pub struct EconomicCycleSchemaConfig {
7523    /// Enable economic cycle modeling.
7524    #[serde(default)]
7525    pub enabled: bool,
7526
7527    /// Cycle period in months (e.g., 48 for 4-year business cycle).
7528    #[serde(default = "default_cycle_period")]
7529    pub period_months: u32,
7530
7531    /// Amplitude of cycle effect (0.0-1.0).
7532    #[serde(default = "default_cycle_amplitude")]
7533    pub amplitude: f64,
7534
7535    /// Phase offset in months.
7536    #[serde(default)]
7537    pub phase_offset: u32,
7538
7539    /// Recession periods (start_month, duration_months).
7540    #[serde(default)]
7541    pub recessions: Vec<RecessionPeriodConfig>,
7542}
7543
7544fn default_cycle_period() -> u32 {
7545    48
7546}
7547
7548fn default_cycle_amplitude() -> f64 {
7549    0.15
7550}
7551
7552impl Default for EconomicCycleSchemaConfig {
7553    fn default() -> Self {
7554        Self {
7555            enabled: false,
7556            period_months: 48,
7557            amplitude: 0.15,
7558            phase_offset: 0,
7559            recessions: Vec::new(),
7560        }
7561    }
7562}
7563
7564/// Recession period configuration.
7565#[derive(Debug, Clone, Serialize, Deserialize)]
7566pub struct RecessionPeriodConfig {
7567    /// Start month (0-indexed from generation start).
7568    pub start_month: u32,
7569
7570    /// Duration in months.
7571    pub duration_months: u32,
7572
7573    /// Severity (0.0-1.0, affects volume reduction).
7574    #[serde(default = "default_recession_severity")]
7575    pub severity: f64,
7576}
7577
7578impl RegimeChangeSchemaConfig {
7579    /// Populate the regime-change, economic-cycle, and parameter-drift
7580    /// slots on a `DriftConfig` from this schema config. v3.5.2+.
7581    ///
7582    /// `generation_start` must match `config.global.start_date` so that
7583    /// absolute regime-change dates can be mapped to 0-indexed periods.
7584    /// Unparseable / out-of-range dates are silently skipped to keep
7585    /// runtime robust against user typos.
7586    pub fn apply_to(
7587        &self,
7588        drift: &mut datasynth_core::distributions::DriftConfig,
7589        generation_start: chrono::NaiveDate,
7590    ) {
7591        if !self.enabled {
7592            return;
7593        }
7594
7595        // Enable drift if any regime-change feature wants it.
7596        drift.enabled = true;
7597
7598        // Regime-change events (absolute dates → period offsets).
7599        for event in &self.changes {
7600            let period = match chrono::NaiveDate::parse_from_str(&event.date, "%Y-%m-%d") {
7601                Ok(d) => {
7602                    let days = (d - generation_start).num_days();
7603                    if days < 0 {
7604                        continue;
7605                    }
7606                    // Approximate month by dividing by 30.4 so we don't
7607                    // need chrono::Months arithmetic.
7608                    (days as f64 / 30.4).round() as u32
7609                }
7610                Err(_) => continue,
7611            };
7612            let change_type = convert_regime_change_type(event.change_type);
7613            let core_effects = event
7614                .effects
7615                .iter()
7616                .map(|e| datasynth_core::distributions::RegimeEffect {
7617                    field: e.field.clone(),
7618                    multiplier: e.multiplier,
7619                })
7620                .collect();
7621            drift
7622                .regime_changes
7623                .push(datasynth_core::distributions::RegimeChange {
7624                    period,
7625                    change_type,
7626                    description: event.description.clone(),
7627                    effects: core_effects,
7628                    transition_periods: 0,
7629                });
7630        }
7631
7632        // Economic cycle.
7633        if let Some(ec) = &self.economic_cycle {
7634            if ec.enabled {
7635                let recession_periods: Vec<u32> = ec
7636                    .recessions
7637                    .iter()
7638                    .flat_map(|r| r.start_month..r.start_month + r.duration_months)
7639                    .collect();
7640                // Use the most-severe recession as the severity driver;
7641                // fall back to default when none declared.
7642                let severity = ec
7643                    .recessions
7644                    .iter()
7645                    .map(|r| 1.0 - r.severity)
7646                    .fold(0.75f64, f64::min);
7647                drift.economic_cycle = datasynth_core::distributions::EconomicCycleConfig {
7648                    enabled: true,
7649                    cycle_length: ec.period_months,
7650                    amplitude: ec.amplitude,
7651                    phase_offset: ec.phase_offset,
7652                    recession_periods,
7653                    recession_severity: severity,
7654                };
7655                drift.drift_type = datasynth_core::distributions::DriftType::Mixed;
7656            }
7657        }
7658
7659        // Parameter drifts.
7660        for pd in &self.parameter_drifts {
7661            let drift_type = match pd.drift_type {
7662                ParameterDriftTypeConfig::Linear => {
7663                    datasynth_core::distributions::ParameterDriftType::Linear
7664                }
7665                ParameterDriftTypeConfig::Exponential => {
7666                    datasynth_core::distributions::ParameterDriftType::Exponential
7667                }
7668                ParameterDriftTypeConfig::Logistic => {
7669                    datasynth_core::distributions::ParameterDriftType::Logistic
7670                }
7671                ParameterDriftTypeConfig::Step => {
7672                    datasynth_core::distributions::ParameterDriftType::Step
7673                }
7674            };
7675            drift
7676                .parameter_drifts
7677                .push(datasynth_core::distributions::ParameterDrift {
7678                    parameter: pd.parameter.clone(),
7679                    drift_type,
7680                    initial_value: pd.start_value,
7681                    target_or_rate: pd.end_value,
7682                    start_period: pd.start_period,
7683                    end_period: pd.end_period,
7684                    steepness: 1.0,
7685                });
7686        }
7687    }
7688}
7689
7690fn convert_regime_change_type(
7691    t: RegimeChangeTypeConfig,
7692) -> datasynth_core::distributions::RegimeChangeType {
7693    use datasynth_core::distributions::RegimeChangeType as Core;
7694    match t {
7695        RegimeChangeTypeConfig::Acquisition => Core::Acquisition,
7696        RegimeChangeTypeConfig::Divestiture => Core::Divestiture,
7697        RegimeChangeTypeConfig::PriceIncrease => Core::PriceIncrease,
7698        RegimeChangeTypeConfig::PriceDecrease => Core::PriceDecrease,
7699        RegimeChangeTypeConfig::ProductLaunch => Core::ProductLaunch,
7700        RegimeChangeTypeConfig::ProductDiscontinuation => Core::ProductDiscontinuation,
7701        RegimeChangeTypeConfig::PolicyChange => Core::PolicyChange,
7702        RegimeChangeTypeConfig::CompetitorEntry => Core::CompetitorEntry,
7703        RegimeChangeTypeConfig::Custom => Core::Custom,
7704    }
7705}
7706
7707fn default_recession_severity() -> f64 {
7708    0.20
7709}
7710
7711/// Parameter drift configuration.
7712#[derive(Debug, Clone, Serialize, Deserialize)]
7713pub struct ParameterDriftSchemaConfig {
7714    /// Parameter being drifted.
7715    pub parameter: String,
7716
7717    /// Drift type.
7718    pub drift_type: ParameterDriftTypeConfig,
7719
7720    /// Start value.
7721    pub start_value: f64,
7722
7723    /// End value.
7724    pub end_value: f64,
7725
7726    /// Start period (month, 0-indexed).
7727    #[serde(default)]
7728    pub start_period: u32,
7729
7730    /// End period (month, optional - defaults to end of generation).
7731    #[serde(default)]
7732    pub end_period: Option<u32>,
7733}
7734
7735/// Parameter drift type.
7736#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7737#[serde(rename_all = "snake_case")]
7738pub enum ParameterDriftTypeConfig {
7739    /// Linear interpolation
7740    #[default]
7741    Linear,
7742    /// Exponential growth/decay
7743    Exponential,
7744    /// S-curve (logistic)
7745    Logistic,
7746    /// Step function
7747    Step,
7748}
7749
7750/// Statistical validation configuration.
7751#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7752pub struct StatisticalValidationSchemaConfig {
7753    /// Enable statistical validation.
7754    #[serde(default)]
7755    pub enabled: bool,
7756
7757    /// Statistical tests to run.
7758    #[serde(default)]
7759    pub tests: Vec<StatisticalTestConfig>,
7760
7761    /// Validation reporting configuration.
7762    #[serde(default)]
7763    pub reporting: ValidationReportingConfig,
7764}
7765
7766/// Statistical test configuration.
7767#[derive(Debug, Clone, Serialize, Deserialize)]
7768#[serde(tag = "type", rename_all = "snake_case")]
7769pub enum StatisticalTestConfig {
7770    /// Benford's Law first digit test.
7771    BenfordFirstDigit {
7772        /// Threshold MAD for failure.
7773        #[serde(default = "default_benford_threshold")]
7774        threshold_mad: f64,
7775        /// Warning MAD threshold.
7776        #[serde(default = "default_benford_warning")]
7777        warning_mad: f64,
7778    },
7779    /// Distribution fit test.
7780    DistributionFit {
7781        /// Target distribution to test.
7782        target: TargetDistributionConfig,
7783        /// K-S test significance level.
7784        #[serde(default = "default_ks_significance")]
7785        ks_significance: f64,
7786        /// Test method (ks, anderson_darling, chi_squared).
7787        #[serde(default)]
7788        method: DistributionFitMethod,
7789    },
7790    /// Correlation check.
7791    CorrelationCheck {
7792        /// Expected correlations to validate.
7793        expected_correlations: Vec<ExpectedCorrelationConfig>,
7794    },
7795    /// Chi-squared test.
7796    ChiSquared {
7797        /// Number of bins.
7798        #[serde(default = "default_chi_squared_bins")]
7799        bins: usize,
7800        /// Significance level.
7801        #[serde(default = "default_chi_squared_significance")]
7802        significance: f64,
7803    },
7804    /// Anderson-Darling test.
7805    AndersonDarling {
7806        /// Target distribution.
7807        target: TargetDistributionConfig,
7808        /// Significance level.
7809        #[serde(default = "default_ad_significance")]
7810        significance: f64,
7811    },
7812}
7813
7814fn default_benford_threshold() -> f64 {
7815    0.015
7816}
7817
7818fn default_benford_warning() -> f64 {
7819    0.010
7820}
7821
7822fn default_ks_significance() -> f64 {
7823    0.05
7824}
7825
7826fn default_chi_squared_bins() -> usize {
7827    10
7828}
7829
7830fn default_chi_squared_significance() -> f64 {
7831    0.05
7832}
7833
7834fn default_ad_significance() -> f64 {
7835    0.05
7836}
7837
7838/// Target distribution for fit tests.
7839#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7840#[serde(rename_all = "snake_case")]
7841pub enum TargetDistributionConfig {
7842    /// Normal distribution
7843    Normal,
7844    /// Log-normal distribution
7845    #[default]
7846    LogNormal,
7847    /// Exponential distribution
7848    Exponential,
7849    /// Uniform distribution
7850    Uniform,
7851}
7852
7853/// Distribution fit test method.
7854#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7855#[serde(rename_all = "snake_case")]
7856pub enum DistributionFitMethod {
7857    /// Kolmogorov-Smirnov test
7858    #[default]
7859    KolmogorovSmirnov,
7860    /// Anderson-Darling test
7861    AndersonDarling,
7862    /// Chi-squared test
7863    ChiSquared,
7864}
7865
7866/// Validation reporting configuration.
7867#[derive(Debug, Clone, Serialize, Deserialize)]
7868pub struct ValidationReportingConfig {
7869    /// Output validation report to file.
7870    #[serde(default)]
7871    pub output_report: bool,
7872
7873    /// Report format.
7874    #[serde(default)]
7875    pub format: ValidationReportFormat,
7876
7877    /// Fail generation if validation fails.
7878    #[serde(default)]
7879    pub fail_on_error: bool,
7880
7881    /// Include detailed statistics in report.
7882    #[serde(default = "default_true")]
7883    pub include_details: bool,
7884}
7885
7886impl Default for ValidationReportingConfig {
7887    fn default() -> Self {
7888        Self {
7889            output_report: false,
7890            format: ValidationReportFormat::Json,
7891            fail_on_error: false,
7892            include_details: true,
7893        }
7894    }
7895}
7896
7897/// Validation report format.
7898#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7899#[serde(rename_all = "snake_case")]
7900pub enum ValidationReportFormat {
7901    /// JSON format
7902    #[default]
7903    Json,
7904    /// YAML format
7905    Yaml,
7906    /// HTML report
7907    Html,
7908}
7909
7910// =============================================================================
7911// Temporal Patterns Configuration
7912// =============================================================================
7913
7914/// Temporal patterns configuration for business days, period-end dynamics, and processing lags.
7915///
7916/// This section enables sophisticated temporal modeling including:
7917/// - Business day calculations and settlement dates
7918/// - Regional holiday calendars
7919/// - Period-end decay curves (non-flat volume spikes)
7920/// - Processing lag modeling (event-to-posting delays)
7921#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7922pub struct TemporalPatternsConfig {
7923    /// Enable temporal patterns features.
7924    #[serde(default)]
7925    pub enabled: bool,
7926
7927    /// Business day calculation configuration.
7928    #[serde(default)]
7929    pub business_days: BusinessDaySchemaConfig,
7930
7931    /// Regional calendar configuration.
7932    #[serde(default)]
7933    pub calendars: CalendarSchemaConfig,
7934
7935    /// Period-end dynamics configuration.
7936    #[serde(default)]
7937    pub period_end: PeriodEndSchemaConfig,
7938
7939    /// Processing lag configuration.
7940    #[serde(default)]
7941    pub processing_lags: ProcessingLagSchemaConfig,
7942
7943    /// Fiscal calendar configuration (custom year start, 4-4-5, 13-period).
7944    #[serde(default)]
7945    pub fiscal_calendar: FiscalCalendarSchemaConfig,
7946
7947    /// Intra-day patterns configuration (morning spike, lunch dip, EOD rush).
7948    #[serde(default)]
7949    pub intraday: IntraDaySchemaConfig,
7950
7951    /// Timezone handling configuration.
7952    #[serde(default)]
7953    pub timezones: TimezoneSchemaConfig,
7954}
7955
7956/// Business day calculation configuration.
7957#[derive(Debug, Clone, Serialize, Deserialize)]
7958pub struct BusinessDaySchemaConfig {
7959    /// Enable business day calculations.
7960    #[serde(default = "default_true")]
7961    pub enabled: bool,
7962
7963    /// Half-day policy: "full_day", "half_day", "non_business_day".
7964    #[serde(default = "default_half_day_policy")]
7965    pub half_day_policy: String,
7966
7967    /// Settlement rules configuration.
7968    #[serde(default)]
7969    pub settlement_rules: SettlementRulesSchemaConfig,
7970
7971    /// Month-end convention: "modified_following", "preceding", "following", "end_of_month".
7972    #[serde(default = "default_month_end_convention")]
7973    pub month_end_convention: String,
7974
7975    /// Weekend days (e.g., ["saturday", "sunday"] or ["friday", "saturday"] for Middle East).
7976    #[serde(default)]
7977    pub weekend_days: Option<Vec<String>>,
7978}
7979
7980fn default_half_day_policy() -> String {
7981    "half_day".to_string()
7982}
7983
7984fn default_month_end_convention() -> String {
7985    "modified_following".to_string()
7986}
7987
7988impl Default for BusinessDaySchemaConfig {
7989    fn default() -> Self {
7990        Self {
7991            enabled: true,
7992            half_day_policy: "half_day".to_string(),
7993            settlement_rules: SettlementRulesSchemaConfig::default(),
7994            month_end_convention: "modified_following".to_string(),
7995            weekend_days: None,
7996        }
7997    }
7998}
7999
8000/// Settlement rules configuration.
8001#[derive(Debug, Clone, Serialize, Deserialize)]
8002pub struct SettlementRulesSchemaConfig {
8003    /// Equity settlement days (T+N).
8004    #[serde(default = "default_settlement_2")]
8005    pub equity_days: i32,
8006
8007    /// Government bonds settlement days.
8008    #[serde(default = "default_settlement_1")]
8009    pub government_bonds_days: i32,
8010
8011    /// FX spot settlement days.
8012    #[serde(default = "default_settlement_2")]
8013    pub fx_spot_days: i32,
8014
8015    /// Corporate bonds settlement days.
8016    #[serde(default = "default_settlement_2")]
8017    pub corporate_bonds_days: i32,
8018
8019    /// Wire transfer cutoff time (HH:MM format).
8020    #[serde(default = "default_wire_cutoff")]
8021    pub wire_cutoff_time: String,
8022
8023    /// International wire settlement days.
8024    #[serde(default = "default_settlement_1")]
8025    pub wire_international_days: i32,
8026
8027    /// ACH settlement days.
8028    #[serde(default = "default_settlement_1")]
8029    pub ach_days: i32,
8030}
8031
8032fn default_settlement_1() -> i32 {
8033    1
8034}
8035
8036fn default_settlement_2() -> i32 {
8037    2
8038}
8039
8040fn default_wire_cutoff() -> String {
8041    "14:00".to_string()
8042}
8043
8044impl Default for SettlementRulesSchemaConfig {
8045    fn default() -> Self {
8046        Self {
8047            equity_days: 2,
8048            government_bonds_days: 1,
8049            fx_spot_days: 2,
8050            corporate_bonds_days: 2,
8051            wire_cutoff_time: "14:00".to_string(),
8052            wire_international_days: 1,
8053            ach_days: 1,
8054        }
8055    }
8056}
8057
8058/// Regional calendar configuration.
8059#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8060pub struct CalendarSchemaConfig {
8061    /// List of regions to include (e.g., ["US", "DE", "BR", "SG", "KR"]).
8062    #[serde(default)]
8063    pub regions: Vec<String>,
8064
8065    /// Custom holidays (in addition to regional calendars).
8066    #[serde(default)]
8067    pub custom_holidays: Vec<CustomHolidaySchemaConfig>,
8068}
8069
8070/// Custom holiday configuration.
8071#[derive(Debug, Clone, Serialize, Deserialize)]
8072pub struct CustomHolidaySchemaConfig {
8073    /// Holiday name.
8074    pub name: String,
8075    /// Month (1-12).
8076    pub month: u8,
8077    /// Day of month.
8078    pub day: u8,
8079    /// Activity multiplier (0.0-1.0, default 0.05).
8080    #[serde(default = "default_holiday_multiplier")]
8081    pub activity_multiplier: f64,
8082}
8083
8084fn default_holiday_multiplier() -> f64 {
8085    0.05
8086}
8087
8088/// Period-end dynamics configuration.
8089#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8090pub struct PeriodEndSchemaConfig {
8091    /// Model type: "flat", "exponential", "extended_crunch", "daily_profile".
8092    #[serde(default)]
8093    pub model: Option<String>,
8094
8095    /// Month-end configuration.
8096    #[serde(default)]
8097    pub month_end: Option<PeriodEndModelSchemaConfig>,
8098
8099    /// Quarter-end configuration.
8100    #[serde(default)]
8101    pub quarter_end: Option<PeriodEndModelSchemaConfig>,
8102
8103    /// Year-end configuration.
8104    #[serde(default)]
8105    pub year_end: Option<PeriodEndModelSchemaConfig>,
8106}
8107
8108/// Period-end model configuration.
8109#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8110pub struct PeriodEndModelSchemaConfig {
8111    /// Inherit configuration from another period (e.g., "month_end").
8112    #[serde(default)]
8113    pub inherit_from: Option<String>,
8114
8115    /// Additional multiplier on top of inherited/base model.
8116    #[serde(default)]
8117    pub additional_multiplier: Option<f64>,
8118
8119    /// Days before period end to start acceleration (negative, e.g., -10).
8120    #[serde(default)]
8121    pub start_day: Option<i32>,
8122
8123    /// Base multiplier at start of acceleration.
8124    #[serde(default)]
8125    pub base_multiplier: Option<f64>,
8126
8127    /// Peak multiplier on last day.
8128    #[serde(default)]
8129    pub peak_multiplier: Option<f64>,
8130
8131    /// Decay rate for exponential model (0.1-0.5 typical).
8132    #[serde(default)]
8133    pub decay_rate: Option<f64>,
8134
8135    /// Sustained high days for crunch model.
8136    #[serde(default)]
8137    pub sustained_high_days: Option<i32>,
8138}
8139
8140/// Processing lag configuration.
8141#[derive(Debug, Clone, Serialize, Deserialize)]
8142pub struct ProcessingLagSchemaConfig {
8143    /// Enable processing lag calculations.
8144    #[serde(default = "default_true")]
8145    pub enabled: bool,
8146
8147    /// Sales order lag configuration (log-normal mu, sigma).
8148    #[serde(default)]
8149    pub sales_order_lag: Option<LagDistributionSchemaConfig>,
8150
8151    /// Purchase order lag configuration.
8152    #[serde(default)]
8153    pub purchase_order_lag: Option<LagDistributionSchemaConfig>,
8154
8155    /// Goods receipt lag configuration.
8156    #[serde(default)]
8157    pub goods_receipt_lag: Option<LagDistributionSchemaConfig>,
8158
8159    /// Invoice receipt lag configuration.
8160    #[serde(default)]
8161    pub invoice_receipt_lag: Option<LagDistributionSchemaConfig>,
8162
8163    /// Invoice issue lag configuration.
8164    #[serde(default)]
8165    pub invoice_issue_lag: Option<LagDistributionSchemaConfig>,
8166
8167    /// Payment lag configuration.
8168    #[serde(default)]
8169    pub payment_lag: Option<LagDistributionSchemaConfig>,
8170
8171    /// Journal entry lag configuration.
8172    #[serde(default)]
8173    pub journal_entry_lag: Option<LagDistributionSchemaConfig>,
8174
8175    /// Cross-day posting configuration.
8176    #[serde(default)]
8177    pub cross_day_posting: Option<CrossDayPostingSchemaConfig>,
8178}
8179
8180impl Default for ProcessingLagSchemaConfig {
8181    fn default() -> Self {
8182        Self {
8183            enabled: true,
8184            sales_order_lag: None,
8185            purchase_order_lag: None,
8186            goods_receipt_lag: None,
8187            invoice_receipt_lag: None,
8188            invoice_issue_lag: None,
8189            payment_lag: None,
8190            journal_entry_lag: None,
8191            cross_day_posting: None,
8192        }
8193    }
8194}
8195
8196/// Lag distribution configuration (log-normal parameters).
8197#[derive(Debug, Clone, Serialize, Deserialize)]
8198pub struct LagDistributionSchemaConfig {
8199    /// Log-scale mean (mu for log-normal).
8200    pub mu: f64,
8201    /// Log-scale standard deviation (sigma for log-normal).
8202    pub sigma: f64,
8203    /// Minimum lag in hours.
8204    #[serde(default)]
8205    pub min_hours: Option<f64>,
8206    /// Maximum lag in hours.
8207    #[serde(default)]
8208    pub max_hours: Option<f64>,
8209}
8210
8211/// Cross-day posting configuration.
8212#[derive(Debug, Clone, Serialize, Deserialize)]
8213pub struct CrossDayPostingSchemaConfig {
8214    /// Enable cross-day posting logic.
8215    #[serde(default = "default_true")]
8216    pub enabled: bool,
8217
8218    /// Probability of next-day posting by hour (map of hour -> probability).
8219    /// E.g., { 17: 0.7, 19: 0.9, 21: 0.99 }
8220    #[serde(default)]
8221    pub probability_by_hour: std::collections::HashMap<u8, f64>,
8222}
8223
8224impl Default for CrossDayPostingSchemaConfig {
8225    fn default() -> Self {
8226        let mut probability_by_hour = std::collections::HashMap::new();
8227        probability_by_hour.insert(17, 0.3);
8228        probability_by_hour.insert(18, 0.6);
8229        probability_by_hour.insert(19, 0.8);
8230        probability_by_hour.insert(20, 0.9);
8231        probability_by_hour.insert(21, 0.95);
8232        probability_by_hour.insert(22, 0.99);
8233
8234        Self {
8235            enabled: true,
8236            probability_by_hour,
8237        }
8238    }
8239}
8240
8241// =============================================================================
8242// Fiscal Calendar Configuration (P2)
8243// =============================================================================
8244
8245/// Fiscal calendar configuration.
8246///
8247/// Supports calendar year, custom year start, 4-4-5 retail calendar,
8248/// and 13-period calendars.
8249#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8250pub struct FiscalCalendarSchemaConfig {
8251    /// Enable non-standard fiscal calendar.
8252    #[serde(default)]
8253    pub enabled: bool,
8254
8255    /// Fiscal calendar type: "calendar_year", "custom", "four_four_five", "thirteen_period".
8256    #[serde(default = "default_fiscal_calendar_type")]
8257    pub calendar_type: String,
8258
8259    /// Month the fiscal year starts (1-12). Used for custom year start.
8260    #[serde(default)]
8261    pub year_start_month: Option<u8>,
8262
8263    /// Day the fiscal year starts (1-31). Used for custom year start.
8264    #[serde(default)]
8265    pub year_start_day: Option<u8>,
8266
8267    /// 4-4-5 calendar configuration (if calendar_type is "four_four_five").
8268    #[serde(default)]
8269    pub four_four_five: Option<FourFourFiveSchemaConfig>,
8270}
8271
8272fn default_fiscal_calendar_type() -> String {
8273    "calendar_year".to_string()
8274}
8275
8276/// 4-4-5 retail calendar configuration.
8277#[derive(Debug, Clone, Serialize, Deserialize)]
8278pub struct FourFourFiveSchemaConfig {
8279    /// Week pattern: "four_four_five", "four_five_four", "five_four_four".
8280    #[serde(default = "default_week_pattern")]
8281    pub pattern: String,
8282
8283    /// Anchor type: "first_sunday", "last_saturday", "nearest_saturday".
8284    #[serde(default = "default_anchor_type")]
8285    pub anchor_type: String,
8286
8287    /// Anchor month (1-12).
8288    #[serde(default = "default_anchor_month")]
8289    pub anchor_month: u8,
8290
8291    /// Where to place leap week: "q4_period3" or "q1_period1".
8292    #[serde(default = "default_leap_week_placement")]
8293    pub leap_week_placement: String,
8294}
8295
8296fn default_week_pattern() -> String {
8297    "four_four_five".to_string()
8298}
8299
8300fn default_anchor_type() -> String {
8301    "last_saturday".to_string()
8302}
8303
8304fn default_anchor_month() -> u8 {
8305    1 // January
8306}
8307
8308fn default_leap_week_placement() -> String {
8309    "q4_period3".to_string()
8310}
8311
8312impl Default for FourFourFiveSchemaConfig {
8313    fn default() -> Self {
8314        Self {
8315            pattern: "four_four_five".to_string(),
8316            anchor_type: "last_saturday".to_string(),
8317            anchor_month: 1,
8318            leap_week_placement: "q4_period3".to_string(),
8319        }
8320    }
8321}
8322
8323// =============================================================================
8324// Intra-Day Patterns Configuration (P2)
8325// =============================================================================
8326
8327/// Intra-day patterns configuration.
8328///
8329/// Defines time-of-day segments with different activity multipliers
8330/// for realistic modeling of morning spikes, lunch dips, and end-of-day rushes.
8331#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8332pub struct IntraDaySchemaConfig {
8333    /// Enable intra-day patterns.
8334    #[serde(default)]
8335    pub enabled: bool,
8336
8337    /// Custom intra-day segments.
8338    #[serde(default)]
8339    pub segments: Vec<IntraDaySegmentSchemaConfig>,
8340}
8341
8342/// Intra-day segment configuration.
8343#[derive(Debug, Clone, Serialize, Deserialize)]
8344pub struct IntraDaySegmentSchemaConfig {
8345    /// Name of the segment (e.g., "morning_spike", "lunch_dip").
8346    pub name: String,
8347
8348    /// Start time (HH:MM format).
8349    pub start: String,
8350
8351    /// End time (HH:MM format).
8352    pub end: String,
8353
8354    /// Activity multiplier (1.0 = normal).
8355    #[serde(default = "default_multiplier")]
8356    pub multiplier: f64,
8357
8358    /// Posting type: "human", "system", "both".
8359    #[serde(default = "default_posting_type")]
8360    pub posting_type: String,
8361}
8362
8363fn default_multiplier() -> f64 {
8364    1.0
8365}
8366
8367fn default_posting_type() -> String {
8368    "both".to_string()
8369}
8370
8371// =============================================================================
8372// Timezone Configuration
8373// =============================================================================
8374
8375/// Timezone handling configuration for multi-region entities.
8376#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8377pub struct TimezoneSchemaConfig {
8378    /// Enable timezone handling.
8379    #[serde(default)]
8380    pub enabled: bool,
8381
8382    /// Default timezone (IANA format, e.g., "America/New_York").
8383    #[serde(default = "default_timezone")]
8384    pub default_timezone: String,
8385
8386    /// Consolidation timezone for group reporting (IANA format).
8387    #[serde(default = "default_consolidation_timezone")]
8388    pub consolidation_timezone: String,
8389
8390    /// Entity-to-timezone mappings.
8391    /// Supports patterns like "EU_*" -> "Europe/London".
8392    #[serde(default)]
8393    pub entity_mappings: Vec<EntityTimezoneMapping>,
8394}
8395
8396fn default_timezone() -> String {
8397    "America/New_York".to_string()
8398}
8399
8400fn default_consolidation_timezone() -> String {
8401    "UTC".to_string()
8402}
8403
8404/// Mapping from entity pattern to timezone.
8405#[derive(Debug, Clone, Serialize, Deserialize)]
8406pub struct EntityTimezoneMapping {
8407    /// Entity code pattern (e.g., "EU_*", "*_APAC", "1000").
8408    pub pattern: String,
8409
8410    /// Timezone (IANA format, e.g., "Europe/London").
8411    pub timezone: String,
8412}
8413
8414// =============================================================================
8415// Vendor Network Configuration
8416// =============================================================================
8417
8418/// Configuration for multi-tier vendor network generation.
8419#[derive(Debug, Clone, Serialize, Deserialize)]
8420pub struct VendorNetworkSchemaConfig {
8421    /// Enable vendor network generation.
8422    #[serde(default)]
8423    pub enabled: bool,
8424
8425    /// Maximum depth of supply chain tiers (1-3).
8426    #[serde(default = "default_vendor_tier_depth")]
8427    pub depth: u8,
8428
8429    /// Tier 1 vendor count configuration.
8430    #[serde(default)]
8431    pub tier1: TierCountSchemaConfig,
8432
8433    /// Tier 2 vendors per Tier 1 parent.
8434    #[serde(default)]
8435    pub tier2_per_parent: TierCountSchemaConfig,
8436
8437    /// Tier 3 vendors per Tier 2 parent.
8438    #[serde(default)]
8439    pub tier3_per_parent: TierCountSchemaConfig,
8440
8441    /// Vendor cluster distribution.
8442    #[serde(default)]
8443    pub clusters: VendorClusterSchemaConfig,
8444
8445    /// Concentration limits.
8446    #[serde(default)]
8447    pub dependencies: DependencySchemaConfig,
8448}
8449
8450fn default_vendor_tier_depth() -> u8 {
8451    3
8452}
8453
8454impl Default for VendorNetworkSchemaConfig {
8455    fn default() -> Self {
8456        Self {
8457            enabled: false,
8458            depth: 3,
8459            tier1: TierCountSchemaConfig { min: 50, max: 100 },
8460            tier2_per_parent: TierCountSchemaConfig { min: 4, max: 10 },
8461            tier3_per_parent: TierCountSchemaConfig { min: 2, max: 5 },
8462            clusters: VendorClusterSchemaConfig::default(),
8463            dependencies: DependencySchemaConfig::default(),
8464        }
8465    }
8466}
8467
8468/// Tier count configuration.
8469#[derive(Debug, Clone, Serialize, Deserialize)]
8470pub struct TierCountSchemaConfig {
8471    /// Minimum count.
8472    #[serde(default = "default_tier_min")]
8473    pub min: usize,
8474
8475    /// Maximum count.
8476    #[serde(default = "default_tier_max")]
8477    pub max: usize,
8478}
8479
8480fn default_tier_min() -> usize {
8481    5
8482}
8483
8484fn default_tier_max() -> usize {
8485    20
8486}
8487
8488impl Default for TierCountSchemaConfig {
8489    fn default() -> Self {
8490        Self {
8491            min: default_tier_min(),
8492            max: default_tier_max(),
8493        }
8494    }
8495}
8496
8497/// Vendor cluster distribution configuration.
8498#[derive(Debug, Clone, Serialize, Deserialize)]
8499pub struct VendorClusterSchemaConfig {
8500    /// Reliable strategic vendors percentage (default: 0.20).
8501    #[serde(default = "default_reliable_strategic")]
8502    pub reliable_strategic: f64,
8503
8504    /// Standard operational vendors percentage (default: 0.50).
8505    #[serde(default = "default_standard_operational")]
8506    pub standard_operational: f64,
8507
8508    /// Transactional vendors percentage (default: 0.25).
8509    #[serde(default = "default_transactional")]
8510    pub transactional: f64,
8511
8512    /// Problematic vendors percentage (default: 0.05).
8513    #[serde(default = "default_problematic")]
8514    pub problematic: f64,
8515}
8516
8517fn default_reliable_strategic() -> f64 {
8518    0.20
8519}
8520
8521fn default_standard_operational() -> f64 {
8522    0.50
8523}
8524
8525fn default_transactional() -> f64 {
8526    0.25
8527}
8528
8529fn default_problematic() -> f64 {
8530    0.05
8531}
8532
8533impl Default for VendorClusterSchemaConfig {
8534    fn default() -> Self {
8535        Self {
8536            reliable_strategic: 0.20,
8537            standard_operational: 0.50,
8538            transactional: 0.25,
8539            problematic: 0.05,
8540        }
8541    }
8542}
8543
8544/// Dependency and concentration limits configuration.
8545#[derive(Debug, Clone, Serialize, Deserialize)]
8546pub struct DependencySchemaConfig {
8547    /// Maximum concentration for a single vendor (default: 0.15).
8548    #[serde(default = "default_max_single_vendor")]
8549    pub max_single_vendor_concentration: f64,
8550
8551    /// Maximum concentration for top 5 vendors (default: 0.45).
8552    #[serde(default = "default_max_top5")]
8553    pub top_5_concentration: f64,
8554
8555    /// Percentage of single-source vendors (default: 0.05).
8556    #[serde(default = "default_single_source_percent")]
8557    pub single_source_percent: f64,
8558}
8559
8560fn default_max_single_vendor() -> f64 {
8561    0.15
8562}
8563
8564fn default_max_top5() -> f64 {
8565    0.45
8566}
8567
8568fn default_single_source_percent() -> f64 {
8569    0.05
8570}
8571
8572impl Default for DependencySchemaConfig {
8573    fn default() -> Self {
8574        Self {
8575            max_single_vendor_concentration: 0.15,
8576            top_5_concentration: 0.45,
8577            single_source_percent: 0.05,
8578        }
8579    }
8580}
8581
8582// =============================================================================
8583// Customer Segmentation Configuration
8584// =============================================================================
8585
8586/// Configuration for customer segmentation generation.
8587#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8588pub struct CustomerSegmentationSchemaConfig {
8589    /// Enable customer segmentation generation.
8590    #[serde(default)]
8591    pub enabled: bool,
8592
8593    /// Value segment distribution.
8594    #[serde(default)]
8595    pub value_segments: ValueSegmentsSchemaConfig,
8596
8597    /// Lifecycle stage configuration.
8598    #[serde(default)]
8599    pub lifecycle: LifecycleSchemaConfig,
8600
8601    /// Network (referrals, hierarchies) configuration.
8602    #[serde(default)]
8603    pub networks: CustomerNetworksSchemaConfig,
8604}
8605
8606/// Customer value segments distribution configuration.
8607#[derive(Debug, Clone, Serialize, Deserialize)]
8608pub struct ValueSegmentsSchemaConfig {
8609    /// Enterprise segment configuration.
8610    #[serde(default)]
8611    pub enterprise: SegmentDetailSchemaConfig,
8612
8613    /// Mid-market segment configuration.
8614    #[serde(default)]
8615    pub mid_market: SegmentDetailSchemaConfig,
8616
8617    /// SMB segment configuration.
8618    #[serde(default)]
8619    pub smb: SegmentDetailSchemaConfig,
8620
8621    /// Consumer segment configuration.
8622    #[serde(default)]
8623    pub consumer: SegmentDetailSchemaConfig,
8624}
8625
8626impl Default for ValueSegmentsSchemaConfig {
8627    fn default() -> Self {
8628        Self {
8629            enterprise: SegmentDetailSchemaConfig {
8630                revenue_share: 0.40,
8631                customer_share: 0.05,
8632                avg_order_value_range: "50000+".to_string(),
8633            },
8634            mid_market: SegmentDetailSchemaConfig {
8635                revenue_share: 0.35,
8636                customer_share: 0.20,
8637                avg_order_value_range: "5000-50000".to_string(),
8638            },
8639            smb: SegmentDetailSchemaConfig {
8640                revenue_share: 0.20,
8641                customer_share: 0.50,
8642                avg_order_value_range: "500-5000".to_string(),
8643            },
8644            consumer: SegmentDetailSchemaConfig {
8645                revenue_share: 0.05,
8646                customer_share: 0.25,
8647                avg_order_value_range: "50-500".to_string(),
8648            },
8649        }
8650    }
8651}
8652
8653/// Individual segment detail configuration.
8654#[derive(Debug, Clone, Serialize, Deserialize)]
8655pub struct SegmentDetailSchemaConfig {
8656    /// Revenue share for this segment.
8657    #[serde(default)]
8658    pub revenue_share: f64,
8659
8660    /// Customer share for this segment.
8661    #[serde(default)]
8662    pub customer_share: f64,
8663
8664    /// Average order value range (e.g., "5000-50000" or "50000+").
8665    #[serde(default)]
8666    pub avg_order_value_range: String,
8667}
8668
8669impl Default for SegmentDetailSchemaConfig {
8670    fn default() -> Self {
8671        Self {
8672            revenue_share: 0.25,
8673            customer_share: 0.25,
8674            avg_order_value_range: "1000-10000".to_string(),
8675        }
8676    }
8677}
8678
8679/// Customer lifecycle stage configuration.
8680#[derive(Debug, Clone, Serialize, Deserialize)]
8681pub struct LifecycleSchemaConfig {
8682    /// Prospect stage rate.
8683    #[serde(default)]
8684    pub prospect_rate: f64,
8685
8686    /// New customer stage rate.
8687    #[serde(default = "default_new_rate")]
8688    pub new_rate: f64,
8689
8690    /// Growth stage rate.
8691    #[serde(default = "default_growth_rate")]
8692    pub growth_rate: f64,
8693
8694    /// Mature stage rate.
8695    #[serde(default = "default_mature_rate")]
8696    pub mature_rate: f64,
8697
8698    /// At-risk stage rate.
8699    #[serde(default = "default_at_risk_rate")]
8700    pub at_risk_rate: f64,
8701
8702    /// Churned stage rate.
8703    #[serde(default = "default_churned_rate")]
8704    pub churned_rate: f64,
8705
8706    /// Won-back stage rate (churned customers reacquired).
8707    #[serde(default)]
8708    pub won_back_rate: f64,
8709}
8710
8711fn default_new_rate() -> f64 {
8712    0.10
8713}
8714
8715fn default_growth_rate() -> f64 {
8716    0.15
8717}
8718
8719fn default_mature_rate() -> f64 {
8720    0.60
8721}
8722
8723fn default_at_risk_rate() -> f64 {
8724    0.10
8725}
8726
8727fn default_churned_rate() -> f64 {
8728    0.05
8729}
8730
8731impl Default for LifecycleSchemaConfig {
8732    fn default() -> Self {
8733        Self {
8734            prospect_rate: 0.0,
8735            new_rate: 0.10,
8736            growth_rate: 0.15,
8737            mature_rate: 0.60,
8738            at_risk_rate: 0.10,
8739            churned_rate: 0.05,
8740            won_back_rate: 0.0,
8741        }
8742    }
8743}
8744
8745/// Customer networks configuration (referrals, hierarchies).
8746#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8747pub struct CustomerNetworksSchemaConfig {
8748    /// Referral network configuration.
8749    #[serde(default)]
8750    pub referrals: ReferralSchemaConfig,
8751
8752    /// Corporate hierarchy configuration.
8753    #[serde(default)]
8754    pub corporate_hierarchies: HierarchySchemaConfig,
8755}
8756
8757/// Referral network configuration.
8758#[derive(Debug, Clone, Serialize, Deserialize)]
8759pub struct ReferralSchemaConfig {
8760    /// Enable referral generation.
8761    #[serde(default = "default_true")]
8762    pub enabled: bool,
8763
8764    /// Rate of customers acquired via referral.
8765    #[serde(default = "default_referral_rate")]
8766    pub referral_rate: f64,
8767}
8768
8769fn default_referral_rate() -> f64 {
8770    0.15
8771}
8772
8773impl Default for ReferralSchemaConfig {
8774    fn default() -> Self {
8775        Self {
8776            enabled: true,
8777            referral_rate: 0.15,
8778        }
8779    }
8780}
8781
8782/// Corporate hierarchy configuration.
8783#[derive(Debug, Clone, Serialize, Deserialize)]
8784pub struct HierarchySchemaConfig {
8785    /// Enable corporate hierarchy generation.
8786    #[serde(default = "default_true")]
8787    pub enabled: bool,
8788
8789    /// Rate of customers in hierarchies.
8790    #[serde(default = "default_hierarchy_rate")]
8791    pub probability: f64,
8792}
8793
8794fn default_hierarchy_rate() -> f64 {
8795    0.30
8796}
8797
8798impl Default for HierarchySchemaConfig {
8799    fn default() -> Self {
8800        Self {
8801            enabled: true,
8802            probability: 0.30,
8803        }
8804    }
8805}
8806
8807// =============================================================================
8808// Relationship Strength Configuration
8809// =============================================================================
8810
8811/// Configuration for relationship strength calculation.
8812#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8813pub struct RelationshipStrengthSchemaConfig {
8814    /// Enable relationship strength calculation.
8815    #[serde(default)]
8816    pub enabled: bool,
8817
8818    /// Calculation weights.
8819    #[serde(default)]
8820    pub calculation: StrengthCalculationSchemaConfig,
8821
8822    /// Strength thresholds for classification.
8823    #[serde(default)]
8824    pub thresholds: StrengthThresholdsSchemaConfig,
8825}
8826
8827/// Strength calculation weights configuration.
8828#[derive(Debug, Clone, Serialize, Deserialize)]
8829pub struct StrengthCalculationSchemaConfig {
8830    /// Weight for transaction volume (default: 0.30).
8831    #[serde(default = "default_volume_weight")]
8832    pub transaction_volume_weight: f64,
8833
8834    /// Weight for transaction count (default: 0.25).
8835    #[serde(default = "default_count_weight")]
8836    pub transaction_count_weight: f64,
8837
8838    /// Weight for relationship duration (default: 0.20).
8839    #[serde(default = "default_duration_weight")]
8840    pub relationship_duration_weight: f64,
8841
8842    /// Weight for recency (default: 0.15).
8843    #[serde(default = "default_recency_weight")]
8844    pub recency_weight: f64,
8845
8846    /// Weight for mutual connections (default: 0.10).
8847    #[serde(default = "default_mutual_weight")]
8848    pub mutual_connections_weight: f64,
8849
8850    /// Recency half-life in days (default: 90).
8851    #[serde(default = "default_recency_half_life")]
8852    pub recency_half_life_days: u32,
8853}
8854
8855fn default_volume_weight() -> f64 {
8856    0.30
8857}
8858
8859fn default_count_weight() -> f64 {
8860    0.25
8861}
8862
8863fn default_duration_weight() -> f64 {
8864    0.20
8865}
8866
8867fn default_recency_weight() -> f64 {
8868    0.15
8869}
8870
8871fn default_mutual_weight() -> f64 {
8872    0.10
8873}
8874
8875fn default_recency_half_life() -> u32 {
8876    90
8877}
8878
8879impl Default for StrengthCalculationSchemaConfig {
8880    fn default() -> Self {
8881        Self {
8882            transaction_volume_weight: 0.30,
8883            transaction_count_weight: 0.25,
8884            relationship_duration_weight: 0.20,
8885            recency_weight: 0.15,
8886            mutual_connections_weight: 0.10,
8887            recency_half_life_days: 90,
8888        }
8889    }
8890}
8891
8892/// Strength thresholds for relationship classification.
8893#[derive(Debug, Clone, Serialize, Deserialize)]
8894pub struct StrengthThresholdsSchemaConfig {
8895    /// Threshold for strong relationships (default: 0.7).
8896    #[serde(default = "default_strong_threshold")]
8897    pub strong: f64,
8898
8899    /// Threshold for moderate relationships (default: 0.4).
8900    #[serde(default = "default_moderate_threshold")]
8901    pub moderate: f64,
8902
8903    /// Threshold for weak relationships (default: 0.1).
8904    #[serde(default = "default_weak_threshold")]
8905    pub weak: f64,
8906}
8907
8908fn default_strong_threshold() -> f64 {
8909    0.7
8910}
8911
8912fn default_moderate_threshold() -> f64 {
8913    0.4
8914}
8915
8916fn default_weak_threshold() -> f64 {
8917    0.1
8918}
8919
8920impl Default for StrengthThresholdsSchemaConfig {
8921    fn default() -> Self {
8922        Self {
8923            strong: 0.7,
8924            moderate: 0.4,
8925            weak: 0.1,
8926        }
8927    }
8928}
8929
8930// =============================================================================
8931// Cross-Process Links Configuration
8932// =============================================================================
8933
8934/// Configuration for cross-process linkages.
8935#[derive(Debug, Clone, Serialize, Deserialize)]
8936pub struct CrossProcessLinksSchemaConfig {
8937    /// Enable cross-process link generation.
8938    #[serde(default)]
8939    pub enabled: bool,
8940
8941    /// Enable inventory links between P2P and O2C.
8942    #[serde(default = "default_true")]
8943    pub inventory_p2p_o2c: bool,
8944
8945    /// Enable payment to bank reconciliation links.
8946    #[serde(default = "default_true")]
8947    pub payment_bank_reconciliation: bool,
8948
8949    /// Enable intercompany bilateral matching.
8950    #[serde(default = "default_true")]
8951    pub intercompany_bilateral: bool,
8952
8953    /// Percentage of GR/Deliveries to link via inventory (0.0 - 1.0).
8954    #[serde(default = "default_inventory_link_rate")]
8955    pub inventory_link_rate: f64,
8956}
8957
8958fn default_inventory_link_rate() -> f64 {
8959    0.30
8960}
8961
8962impl Default for CrossProcessLinksSchemaConfig {
8963    fn default() -> Self {
8964        Self {
8965            enabled: false,
8966            inventory_p2p_o2c: true,
8967            payment_bank_reconciliation: true,
8968            intercompany_bilateral: true,
8969            inventory_link_rate: 0.30,
8970        }
8971    }
8972}
8973
8974// =============================================================================
8975// Organizational Events Configuration
8976// =============================================================================
8977
8978/// Configuration for organizational events (acquisitions, divestitures, etc.).
8979#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8980pub struct OrganizationalEventsSchemaConfig {
8981    /// Enable organizational events.
8982    #[serde(default)]
8983    pub enabled: bool,
8984
8985    /// Effect blending mode (multiplicative, additive, maximum, minimum).
8986    #[serde(default)]
8987    pub effect_blending: EffectBlendingModeConfig,
8988
8989    /// Organizational events (acquisitions, divestitures, reorganizations, etc.).
8990    #[serde(default)]
8991    pub events: Vec<OrganizationalEventSchemaConfig>,
8992
8993    /// Process evolution events.
8994    #[serde(default)]
8995    pub process_evolution: Vec<ProcessEvolutionSchemaConfig>,
8996
8997    /// Technology transition events.
8998    #[serde(default)]
8999    pub technology_transitions: Vec<TechnologyTransitionSchemaConfig>,
9000}
9001
9002/// Effect blending mode for combining multiple event effects.
9003#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9004#[serde(rename_all = "snake_case")]
9005pub enum EffectBlendingModeConfig {
9006    /// Multiply effects together.
9007    #[default]
9008    Multiplicative,
9009    /// Add effects together.
9010    Additive,
9011    /// Take the maximum effect.
9012    Maximum,
9013    /// Take the minimum effect.
9014    Minimum,
9015}
9016
9017/// Configuration for a single organizational event.
9018#[derive(Debug, Clone, Serialize, Deserialize)]
9019pub struct OrganizationalEventSchemaConfig {
9020    /// Event ID.
9021    pub id: String,
9022
9023    /// Event type and configuration.
9024    pub event_type: OrganizationalEventTypeSchemaConfig,
9025
9026    /// Effective date.
9027    pub effective_date: String,
9028
9029    /// Transition duration in months.
9030    #[serde(default = "default_org_transition_months")]
9031    pub transition_months: u32,
9032
9033    /// Description.
9034    #[serde(default)]
9035    pub description: Option<String>,
9036}
9037
9038fn default_org_transition_months() -> u32 {
9039    6
9040}
9041
9042/// Organizational event type configuration.
9043#[derive(Debug, Clone, Serialize, Deserialize)]
9044#[serde(tag = "type", rename_all = "snake_case")]
9045pub enum OrganizationalEventTypeSchemaConfig {
9046    /// Acquisition event.
9047    Acquisition {
9048        /// Acquired entity code.
9049        acquired_entity: String,
9050        /// Volume increase multiplier.
9051        #[serde(default = "default_acquisition_volume")]
9052        volume_increase: f64,
9053        /// Integration error rate.
9054        #[serde(default = "default_acquisition_error")]
9055        integration_error_rate: f64,
9056        /// Parallel posting days.
9057        #[serde(default = "default_parallel_days")]
9058        parallel_posting_days: u32,
9059    },
9060    /// Divestiture event.
9061    Divestiture {
9062        /// Divested entity code.
9063        divested_entity: String,
9064        /// Volume reduction factor.
9065        #[serde(default = "default_divestiture_volume")]
9066        volume_reduction: f64,
9067        /// Remove entity from generation.
9068        #[serde(default = "default_true_val")]
9069        remove_entity: bool,
9070    },
9071    /// Reorganization event.
9072    Reorganization {
9073        /// Cost center remapping.
9074        #[serde(default)]
9075        cost_center_remapping: std::collections::HashMap<String, String>,
9076        /// Transition error rate.
9077        #[serde(default = "default_reorg_error")]
9078        transition_error_rate: f64,
9079    },
9080    /// Leadership change event.
9081    LeadershipChange {
9082        /// Role that changed.
9083        role: String,
9084        /// Policy changes.
9085        #[serde(default)]
9086        policy_changes: Vec<String>,
9087    },
9088    /// Workforce reduction event.
9089    WorkforceReduction {
9090        /// Reduction percentage.
9091        #[serde(default = "default_workforce_reduction")]
9092        reduction_percent: f64,
9093        /// Error rate increase.
9094        #[serde(default = "default_workforce_error")]
9095        error_rate_increase: f64,
9096    },
9097    /// Merger event.
9098    Merger {
9099        /// Merged entity code.
9100        merged_entity: String,
9101        /// Volume increase multiplier.
9102        #[serde(default = "default_merger_volume")]
9103        volume_increase: f64,
9104    },
9105}
9106
9107fn default_acquisition_volume() -> f64 {
9108    1.35
9109}
9110
9111fn default_acquisition_error() -> f64 {
9112    0.05
9113}
9114
9115fn default_parallel_days() -> u32 {
9116    30
9117}
9118
9119fn default_divestiture_volume() -> f64 {
9120    0.70
9121}
9122
9123fn default_true_val() -> bool {
9124    true
9125}
9126
9127fn default_reorg_error() -> f64 {
9128    0.04
9129}
9130
9131fn default_workforce_reduction() -> f64 {
9132    0.10
9133}
9134
9135fn default_workforce_error() -> f64 {
9136    0.05
9137}
9138
9139fn default_merger_volume() -> f64 {
9140    1.80
9141}
9142
9143/// Configuration for a process evolution event.
9144#[derive(Debug, Clone, Serialize, Deserialize)]
9145pub struct ProcessEvolutionSchemaConfig {
9146    /// Event ID.
9147    pub id: String,
9148
9149    /// Event type.
9150    pub event_type: ProcessEvolutionTypeSchemaConfig,
9151
9152    /// Effective date.
9153    pub effective_date: String,
9154
9155    /// Description.
9156    #[serde(default)]
9157    pub description: Option<String>,
9158}
9159
9160/// Process evolution type configuration.
9161#[derive(Debug, Clone, Serialize, Deserialize)]
9162#[serde(tag = "type", rename_all = "snake_case")]
9163pub enum ProcessEvolutionTypeSchemaConfig {
9164    /// Process automation.
9165    ProcessAutomation {
9166        /// Process name.
9167        process_name: String,
9168        /// Manual rate before.
9169        #[serde(default = "default_manual_before")]
9170        manual_rate_before: f64,
9171        /// Manual rate after.
9172        #[serde(default = "default_manual_after")]
9173        manual_rate_after: f64,
9174    },
9175    /// Approval workflow change.
9176    ApprovalWorkflowChange {
9177        /// Description.
9178        description: String,
9179    },
9180    /// Control enhancement.
9181    ControlEnhancement {
9182        /// Control ID.
9183        control_id: String,
9184        /// Error reduction.
9185        #[serde(default = "default_error_reduction")]
9186        error_reduction: f64,
9187    },
9188}
9189
9190fn default_manual_before() -> f64 {
9191    0.80
9192}
9193
9194fn default_manual_after() -> f64 {
9195    0.15
9196}
9197
9198fn default_error_reduction() -> f64 {
9199    0.02
9200}
9201
9202/// Configuration for a technology transition event.
9203#[derive(Debug, Clone, Serialize, Deserialize)]
9204pub struct TechnologyTransitionSchemaConfig {
9205    /// Event ID.
9206    pub id: String,
9207
9208    /// Event type.
9209    pub event_type: TechnologyTransitionTypeSchemaConfig,
9210
9211    /// Description.
9212    #[serde(default)]
9213    pub description: Option<String>,
9214}
9215
9216/// Technology transition type configuration.
9217#[derive(Debug, Clone, Serialize, Deserialize)]
9218#[serde(tag = "type", rename_all = "snake_case")]
9219pub enum TechnologyTransitionTypeSchemaConfig {
9220    /// ERP migration.
9221    ErpMigration {
9222        /// Source system.
9223        source_system: String,
9224        /// Target system.
9225        target_system: String,
9226        /// Cutover date.
9227        cutover_date: String,
9228        /// Stabilization end date.
9229        stabilization_end: String,
9230        /// Duplicate rate during migration.
9231        #[serde(default = "default_erp_duplicate_rate")]
9232        duplicate_rate: f64,
9233        /// Format mismatch rate.
9234        #[serde(default = "default_format_mismatch")]
9235        format_mismatch_rate: f64,
9236    },
9237    /// Module implementation.
9238    ModuleImplementation {
9239        /// Module name.
9240        module_name: String,
9241        /// Go-live date.
9242        go_live_date: String,
9243    },
9244}
9245
9246fn default_erp_duplicate_rate() -> f64 {
9247    0.02
9248}
9249
9250fn default_format_mismatch() -> f64 {
9251    0.03
9252}
9253
9254// =============================================================================
9255// Behavioral Drift Configuration
9256// =============================================================================
9257
9258/// Configuration for behavioral drift (vendor, customer, employee behavior).
9259///
9260/// **Deprecated (v4.1.2):** this schema section is currently
9261/// validated-but-inert — no runtime code consumes its fields. Users
9262/// who want behavioral drift-style effects should reach for
9263/// `distributions.regime_changes` (v3.5.2+), which drives the
9264/// `DriftController` via the parameter-drift path. The schema type
9265/// remains for backward-compatible YAML loading; it will be removed
9266/// in a future major version once `regime_changes` gains per-entity
9267/// (vendor / customer / employee) targeting.
9268#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9269pub struct BehavioralDriftSchemaConfig {
9270    /// Enable behavioral drift.
9271    #[serde(default)]
9272    pub enabled: bool,
9273
9274    /// Vendor behavior drift.
9275    #[serde(default)]
9276    pub vendor_behavior: VendorBehaviorSchemaConfig,
9277
9278    /// Customer behavior drift.
9279    #[serde(default)]
9280    pub customer_behavior: CustomerBehaviorSchemaConfig,
9281
9282    /// Employee behavior drift.
9283    #[serde(default)]
9284    pub employee_behavior: EmployeeBehaviorSchemaConfig,
9285
9286    /// Collective behavior drift.
9287    #[serde(default)]
9288    pub collective: CollectiveBehaviorSchemaConfig,
9289}
9290
9291/// Vendor behavior drift configuration.
9292#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9293pub struct VendorBehaviorSchemaConfig {
9294    /// Payment terms drift.
9295    #[serde(default)]
9296    pub payment_terms_drift: PaymentTermsDriftSchemaConfig,
9297
9298    /// Quality drift.
9299    #[serde(default)]
9300    pub quality_drift: QualityDriftSchemaConfig,
9301}
9302
9303/// Payment terms drift configuration.
9304#[derive(Debug, Clone, Serialize, Deserialize)]
9305pub struct PaymentTermsDriftSchemaConfig {
9306    /// Extension rate per year (days).
9307    #[serde(default = "default_extension_rate")]
9308    pub extension_rate_per_year: f64,
9309
9310    /// Economic sensitivity.
9311    #[serde(default = "default_economic_sensitivity")]
9312    pub economic_sensitivity: f64,
9313}
9314
9315fn default_extension_rate() -> f64 {
9316    2.5
9317}
9318
9319fn default_economic_sensitivity() -> f64 {
9320    1.0
9321}
9322
9323impl Default for PaymentTermsDriftSchemaConfig {
9324    fn default() -> Self {
9325        Self {
9326            extension_rate_per_year: 2.5,
9327            economic_sensitivity: 1.0,
9328        }
9329    }
9330}
9331
9332/// Quality drift configuration.
9333#[derive(Debug, Clone, Serialize, Deserialize)]
9334pub struct QualityDriftSchemaConfig {
9335    /// New vendor improvement rate (per year).
9336    #[serde(default = "default_improvement_rate")]
9337    pub new_vendor_improvement_rate: f64,
9338
9339    /// Complacency decline rate (per year after first year).
9340    #[serde(default = "default_decline_rate")]
9341    pub complacency_decline_rate: f64,
9342}
9343
9344fn default_improvement_rate() -> f64 {
9345    0.02
9346}
9347
9348fn default_decline_rate() -> f64 {
9349    0.01
9350}
9351
9352impl Default for QualityDriftSchemaConfig {
9353    fn default() -> Self {
9354        Self {
9355            new_vendor_improvement_rate: 0.02,
9356            complacency_decline_rate: 0.01,
9357        }
9358    }
9359}
9360
9361/// Customer behavior drift configuration.
9362#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9363pub struct CustomerBehaviorSchemaConfig {
9364    /// Payment drift.
9365    #[serde(default)]
9366    pub payment_drift: CustomerPaymentDriftSchemaConfig,
9367
9368    /// Order drift.
9369    #[serde(default)]
9370    pub order_drift: OrderDriftSchemaConfig,
9371}
9372
9373/// Customer payment drift configuration.
9374#[derive(Debug, Clone, Serialize, Deserialize)]
9375pub struct CustomerPaymentDriftSchemaConfig {
9376    /// Days extension during downturn (min, max).
9377    #[serde(default = "default_downturn_extension")]
9378    pub downturn_days_extension: (u32, u32),
9379
9380    /// Bad debt increase during downturn.
9381    #[serde(default = "default_bad_debt_increase")]
9382    pub downturn_bad_debt_increase: f64,
9383}
9384
9385fn default_downturn_extension() -> (u32, u32) {
9386    (5, 15)
9387}
9388
9389fn default_bad_debt_increase() -> f64 {
9390    0.02
9391}
9392
9393impl Default for CustomerPaymentDriftSchemaConfig {
9394    fn default() -> Self {
9395        Self {
9396            downturn_days_extension: (5, 15),
9397            downturn_bad_debt_increase: 0.02,
9398        }
9399    }
9400}
9401
9402/// Order drift configuration.
9403#[derive(Debug, Clone, Serialize, Deserialize)]
9404pub struct OrderDriftSchemaConfig {
9405    /// Digital shift rate (per year).
9406    #[serde(default = "default_digital_shift")]
9407    pub digital_shift_rate: f64,
9408}
9409
9410fn default_digital_shift() -> f64 {
9411    0.05
9412}
9413
9414impl Default for OrderDriftSchemaConfig {
9415    fn default() -> Self {
9416        Self {
9417            digital_shift_rate: 0.05,
9418        }
9419    }
9420}
9421
9422/// Employee behavior drift configuration.
9423#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9424pub struct EmployeeBehaviorSchemaConfig {
9425    /// Approval drift.
9426    #[serde(default)]
9427    pub approval_drift: ApprovalDriftSchemaConfig,
9428
9429    /// Error drift.
9430    #[serde(default)]
9431    pub error_drift: ErrorDriftSchemaConfig,
9432}
9433
9434/// Approval drift configuration.
9435#[derive(Debug, Clone, Serialize, Deserialize)]
9436pub struct ApprovalDriftSchemaConfig {
9437    /// EOM intensity increase per year.
9438    #[serde(default = "default_eom_intensity")]
9439    pub eom_intensity_increase_per_year: f64,
9440
9441    /// Rubber stamp volume threshold.
9442    #[serde(default = "default_rubber_stamp")]
9443    pub rubber_stamp_volume_threshold: u32,
9444}
9445
9446fn default_eom_intensity() -> f64 {
9447    0.05
9448}
9449
9450fn default_rubber_stamp() -> u32 {
9451    50
9452}
9453
9454impl Default for ApprovalDriftSchemaConfig {
9455    fn default() -> Self {
9456        Self {
9457            eom_intensity_increase_per_year: 0.05,
9458            rubber_stamp_volume_threshold: 50,
9459        }
9460    }
9461}
9462
9463/// Error drift configuration.
9464#[derive(Debug, Clone, Serialize, Deserialize)]
9465pub struct ErrorDriftSchemaConfig {
9466    /// New employee error rate.
9467    #[serde(default = "default_new_error")]
9468    pub new_employee_error_rate: f64,
9469
9470    /// Learning curve months.
9471    #[serde(default = "default_learning_months")]
9472    pub learning_curve_months: u32,
9473}
9474
9475fn default_new_error() -> f64 {
9476    0.08
9477}
9478
9479fn default_learning_months() -> u32 {
9480    6
9481}
9482
9483impl Default for ErrorDriftSchemaConfig {
9484    fn default() -> Self {
9485        Self {
9486            new_employee_error_rate: 0.08,
9487            learning_curve_months: 6,
9488        }
9489    }
9490}
9491
9492/// Collective behavior drift configuration.
9493#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9494pub struct CollectiveBehaviorSchemaConfig {
9495    /// Automation adoption configuration.
9496    #[serde(default)]
9497    pub automation_adoption: AutomationAdoptionSchemaConfig,
9498}
9499
9500/// Automation adoption configuration.
9501#[derive(Debug, Clone, Serialize, Deserialize)]
9502pub struct AutomationAdoptionSchemaConfig {
9503    /// Enable S-curve adoption model.
9504    #[serde(default)]
9505    pub s_curve_enabled: bool,
9506
9507    /// Adoption midpoint in months.
9508    #[serde(default = "default_midpoint")]
9509    pub adoption_midpoint_months: u32,
9510
9511    /// Steepness of adoption curve.
9512    #[serde(default = "default_steepness")]
9513    pub steepness: f64,
9514}
9515
9516fn default_midpoint() -> u32 {
9517    24
9518}
9519
9520fn default_steepness() -> f64 {
9521    0.15
9522}
9523
9524impl Default for AutomationAdoptionSchemaConfig {
9525    fn default() -> Self {
9526        Self {
9527            s_curve_enabled: false,
9528            adoption_midpoint_months: 24,
9529            steepness: 0.15,
9530        }
9531    }
9532}
9533
9534// =============================================================================
9535// Market Drift Configuration
9536// =============================================================================
9537
9538/// Configuration for market drift (economic cycles, commodities, price shocks).
9539///
9540/// **Deprecated (v4.1.2):** validated-but-inert. Use
9541/// `distributions.regime_changes.economic_cycle` +
9542/// `distributions.regime_changes.parameter_drifts` for the
9543/// equivalent runtime behaviour (shipped in v3.5.2). The schema
9544/// type remains for backward-compatible YAML loading; will be
9545/// removed in v5.0.
9546#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9547pub struct MarketDriftSchemaConfig {
9548    /// Enable market drift.
9549    #[serde(default)]
9550    pub enabled: bool,
9551
9552    /// Economic cycle configuration.
9553    #[serde(default)]
9554    pub economic_cycle: MarketEconomicCycleSchemaConfig,
9555
9556    /// Industry-specific cycles.
9557    #[serde(default)]
9558    pub industry_cycles: std::collections::HashMap<String, IndustryCycleSchemaConfig>,
9559
9560    /// Commodity drift configuration.
9561    #[serde(default)]
9562    pub commodities: CommoditiesSchemaConfig,
9563}
9564
9565/// Market economic cycle configuration.
9566#[derive(Debug, Clone, Serialize, Deserialize)]
9567pub struct MarketEconomicCycleSchemaConfig {
9568    /// Enable economic cycle.
9569    #[serde(default)]
9570    pub enabled: bool,
9571
9572    /// Cycle type.
9573    #[serde(default)]
9574    pub cycle_type: CycleTypeSchemaConfig,
9575
9576    /// Cycle period in months.
9577    #[serde(default = "default_market_cycle_period")]
9578    pub period_months: u32,
9579
9580    /// Amplitude.
9581    #[serde(default = "default_market_amplitude")]
9582    pub amplitude: f64,
9583
9584    /// Recession configuration.
9585    #[serde(default)]
9586    pub recession: RecessionSchemaConfig,
9587}
9588
9589fn default_market_cycle_period() -> u32 {
9590    48
9591}
9592
9593fn default_market_amplitude() -> f64 {
9594    0.15
9595}
9596
9597impl Default for MarketEconomicCycleSchemaConfig {
9598    fn default() -> Self {
9599        Self {
9600            enabled: false,
9601            cycle_type: CycleTypeSchemaConfig::Sinusoidal,
9602            period_months: 48,
9603            amplitude: 0.15,
9604            recession: RecessionSchemaConfig::default(),
9605        }
9606    }
9607}
9608
9609/// Cycle type configuration.
9610#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9611#[serde(rename_all = "snake_case")]
9612pub enum CycleTypeSchemaConfig {
9613    /// Sinusoidal cycle.
9614    #[default]
9615    Sinusoidal,
9616    /// Asymmetric cycle.
9617    Asymmetric,
9618    /// Mean-reverting cycle.
9619    MeanReverting,
9620}
9621
9622/// Recession configuration.
9623#[derive(Debug, Clone, Serialize, Deserialize)]
9624pub struct RecessionSchemaConfig {
9625    /// Enable recession simulation.
9626    #[serde(default)]
9627    pub enabled: bool,
9628
9629    /// Probability per year.
9630    #[serde(default = "default_recession_prob")]
9631    pub probability_per_year: f64,
9632
9633    /// Severity.
9634    #[serde(default)]
9635    pub severity: RecessionSeveritySchemaConfig,
9636
9637    /// Specific recession periods.
9638    #[serde(default)]
9639    pub recession_periods: Vec<RecessionPeriodSchemaConfig>,
9640}
9641
9642fn default_recession_prob() -> f64 {
9643    0.10
9644}
9645
9646impl Default for RecessionSchemaConfig {
9647    fn default() -> Self {
9648        Self {
9649            enabled: false,
9650            probability_per_year: 0.10,
9651            severity: RecessionSeveritySchemaConfig::Moderate,
9652            recession_periods: Vec::new(),
9653        }
9654    }
9655}
9656
9657/// Recession severity configuration.
9658#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9659#[serde(rename_all = "snake_case")]
9660pub enum RecessionSeveritySchemaConfig {
9661    /// Mild recession.
9662    Mild,
9663    /// Moderate recession.
9664    #[default]
9665    Moderate,
9666    /// Severe recession.
9667    Severe,
9668}
9669
9670/// Recession period configuration.
9671#[derive(Debug, Clone, Serialize, Deserialize)]
9672pub struct RecessionPeriodSchemaConfig {
9673    /// Start month.
9674    pub start_month: u32,
9675    /// Duration in months.
9676    pub duration_months: u32,
9677}
9678
9679/// Industry cycle configuration.
9680#[derive(Debug, Clone, Serialize, Deserialize)]
9681pub struct IndustryCycleSchemaConfig {
9682    /// Period in months.
9683    #[serde(default = "default_industry_period")]
9684    pub period_months: u32,
9685
9686    /// Amplitude.
9687    #[serde(default = "default_industry_amp")]
9688    pub amplitude: f64,
9689}
9690
9691fn default_industry_period() -> u32 {
9692    36
9693}
9694
9695fn default_industry_amp() -> f64 {
9696    0.20
9697}
9698
9699/// Commodities drift configuration.
9700#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9701pub struct CommoditiesSchemaConfig {
9702    /// Enable commodity drift.
9703    #[serde(default)]
9704    pub enabled: bool,
9705
9706    /// Commodity items.
9707    #[serde(default)]
9708    pub items: Vec<CommodityItemSchemaConfig>,
9709}
9710
9711/// Commodity item configuration.
9712#[derive(Debug, Clone, Serialize, Deserialize)]
9713pub struct CommodityItemSchemaConfig {
9714    /// Commodity name.
9715    pub name: String,
9716
9717    /// Volatility.
9718    #[serde(default = "default_volatility")]
9719    pub volatility: f64,
9720
9721    /// COGS pass-through.
9722    #[serde(default)]
9723    pub cogs_pass_through: f64,
9724
9725    /// Overhead pass-through.
9726    #[serde(default)]
9727    pub overhead_pass_through: f64,
9728}
9729
9730fn default_volatility() -> f64 {
9731    0.20
9732}
9733
9734// =============================================================================
9735// Drift Labeling Configuration
9736// =============================================================================
9737
9738/// Configuration for drift ground truth labeling.
9739///
9740/// **Deprecated (v4.1.2):** validated-but-inert. The v3.3.0
9741/// analytics-metadata phase (`DriftEventGenerator` +
9742/// `AnalyticsMetadataSnapshot.drift_events`) produces drift labels
9743/// at runtime — configure it via `analytics_metadata.drift_events`
9744/// instead. The schema type remains for backward-compatible YAML
9745/// loading; will be removed in v5.0.
9746#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9747pub struct DriftLabelingSchemaConfig {
9748    /// Enable drift labeling.
9749    #[serde(default)]
9750    pub enabled: bool,
9751
9752    /// Statistical drift labeling.
9753    #[serde(default)]
9754    pub statistical: StatisticalDriftLabelingSchemaConfig,
9755
9756    /// Categorical drift labeling.
9757    #[serde(default)]
9758    pub categorical: CategoricalDriftLabelingSchemaConfig,
9759
9760    /// Temporal drift labeling.
9761    #[serde(default)]
9762    pub temporal: TemporalDriftLabelingSchemaConfig,
9763
9764    /// Regulatory calendar preset.
9765    #[serde(default)]
9766    pub regulatory_calendar_preset: Option<String>,
9767}
9768
9769/// Statistical drift labeling configuration.
9770#[derive(Debug, Clone, Serialize, Deserialize)]
9771pub struct StatisticalDriftLabelingSchemaConfig {
9772    /// Enable statistical drift labeling.
9773    #[serde(default = "default_true_val")]
9774    pub enabled: bool,
9775
9776    /// Minimum magnitude threshold.
9777    #[serde(default = "default_min_magnitude")]
9778    pub min_magnitude_threshold: f64,
9779}
9780
9781fn default_min_magnitude() -> f64 {
9782    0.05
9783}
9784
9785impl Default for StatisticalDriftLabelingSchemaConfig {
9786    fn default() -> Self {
9787        Self {
9788            enabled: true,
9789            min_magnitude_threshold: 0.05,
9790        }
9791    }
9792}
9793
9794/// Categorical drift labeling configuration.
9795#[derive(Debug, Clone, Serialize, Deserialize)]
9796pub struct CategoricalDriftLabelingSchemaConfig {
9797    /// Enable categorical drift labeling.
9798    #[serde(default = "default_true_val")]
9799    pub enabled: bool,
9800}
9801
9802impl Default for CategoricalDriftLabelingSchemaConfig {
9803    fn default() -> Self {
9804        Self { enabled: true }
9805    }
9806}
9807
9808/// Temporal drift labeling configuration.
9809#[derive(Debug, Clone, Serialize, Deserialize)]
9810pub struct TemporalDriftLabelingSchemaConfig {
9811    /// Enable temporal drift labeling.
9812    #[serde(default = "default_true_val")]
9813    pub enabled: bool,
9814}
9815
9816impl Default for TemporalDriftLabelingSchemaConfig {
9817    fn default() -> Self {
9818        Self { enabled: true }
9819    }
9820}
9821
9822// =============================================================================
9823// Enhanced Anomaly Injection Configuration
9824// =============================================================================
9825
9826/// Enhanced anomaly injection configuration.
9827///
9828/// Provides comprehensive anomaly injection capabilities including:
9829/// - Multi-stage fraud schemes (embezzlement, revenue manipulation, kickbacks)
9830/// - Correlated anomaly injection (co-occurrence patterns, error cascades)
9831/// - Near-miss generation for false positive reduction
9832/// - Detection difficulty classification
9833/// - Context-aware injection based on entity behavior
9834#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9835pub struct EnhancedAnomalyConfig {
9836    /// Enable enhanced anomaly injection.
9837    #[serde(default)]
9838    pub enabled: bool,
9839
9840    /// Base anomaly rates.
9841    #[serde(default)]
9842    pub rates: AnomalyRateConfig,
9843
9844    /// Multi-stage fraud scheme configuration.
9845    #[serde(default)]
9846    pub multi_stage_schemes: MultiStageSchemeConfig,
9847
9848    /// Correlated anomaly injection configuration.
9849    #[serde(default)]
9850    pub correlated_injection: CorrelatedInjectionConfig,
9851
9852    /// Near-miss generation configuration.
9853    #[serde(default)]
9854    pub near_miss: NearMissConfig,
9855
9856    /// Detection difficulty classification configuration.
9857    #[serde(default)]
9858    pub difficulty_classification: DifficultyClassificationConfig,
9859
9860    /// Context-aware injection configuration.
9861    #[serde(default)]
9862    pub context_aware: ContextAwareConfig,
9863
9864    /// Enhanced labeling configuration.
9865    #[serde(default)]
9866    pub labeling: EnhancedLabelingConfig,
9867
9868    /// SOTA-12 (#140, FINDINGS §13): post-process tagger that tags the top
9869    /// `rate × n_jes` JEs whose `(source, gl_account)` is rare under the
9870    /// per-source empirical PMF as `RelationalAnomalyType::SourceConditional-
9871    /// Rarity`. `None` = disabled (default); typical value `0.01` matches the
9872    /// audit-packet hot-list size. Runs AFTER per-entry strategies — additive,
9873    /// doesn't replace them.
9874    ///
9875    /// **Phase 1 deprecation note:** this key remains the source of truth for
9876    /// back-compat. If `concentration.source_conditional_rarity.rate` is also
9877    /// set, that value wins (it's an opt-in to the unified DSL).
9878    #[serde(default)]
9879    pub source_conditional_rarity_rate: Option<f64>,
9880}
9881
9882// ---------------------------------------------------------------------------
9883// ConcentrationConfig — central post-process pass pipeline (#143, Phase 1).
9884//
9885// Design reference:
9886//   docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md
9887//
9888// Phase 1 fields: SourceConditionalRarityPass (wrapping shipped SOTA-12) +
9889// TradingPartnerPoolPass (closes SOTA-11.1 / #142).
9890// Phase 2 will add: account_pair_substitution (closes SOTA-8.1 / #141).
9891// ---------------------------------------------------------------------------
9892
9893/// Top-level configuration for the post-generation concentration pipeline.
9894///
9895/// Each sub-field is `Option<_>`; presence enables the corresponding pass.
9896/// `enabled = false` (default) disables the pipeline regardless of sub-fields,
9897/// matching the parent proposal's "opt-in" guidance.
9898#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9899pub struct ConcentrationConfig {
9900    /// Master switch. `false` (default) → pipeline is no-op.
9901    #[serde(default)]
9902    pub enabled: bool,
9903
9904    /// Phase 1: source-conditional rarity tagger (wraps shipped SOTA-12).
9905    /// If also `anomaly_injection.source_conditional_rarity_rate` is set, this
9906    /// field wins.
9907    #[serde(default)]
9908    pub source_conditional_rarity: Option<SourceConditionalRarityPassConfig>,
9909
9910    /// Phase 1: trading-partner pool resizing (closes SOTA-11.1 / #142).
9911    #[serde(default)]
9912    pub trading_partner_pool: Option<TradingPartnerPoolPassConfig>,
9913
9914    /// Phase 2: account-pair substitution against a corpus-derived PMF
9915    /// (closes SOTA-8.1 / #141). Defers to Phase 2 when wired.
9916    #[serde(default)]
9917    pub account_pair_substitution: Option<AccountPairSubstitutionPassConfig>,
9918
9919    /// Phase 1.5: blank-source post-process (closes SOTA-7 / #132). Nulls
9920    /// `sap_source_code` on a configurable fraction of JEs to match the
9921    /// corpus's ~21% blank-source rate. Runs LAST in the pipeline so
9922    /// earlier passes (`SourceConditionalRarityPass`,
9923    /// `AccountPairSubstitutionPass`) see full source coverage.
9924    #[serde(default)]
9925    pub source_blanking: Option<SourceBlankingPassConfig>,
9926
9927    /// v5.30 B2 (#154) — heavy-tail consolidation outlier emission.
9928    /// Reshapes a small fraction of JEs (~0.001 typical) into
9929    /// multi-100-line postings touching bridge / suspense / clearing
9930    /// accounts. Lifts the synthetic relational_score p99/max
9931    /// percentiles toward the corpus's heavy tail without distorting
9932    /// the median. Honors `anomaly_injection.consolidation_outlier_rate`
9933    /// as a back-compat alias — if both are set, this DSL field wins.
9934    #[serde(default)]
9935    pub consolidation_outlier: Option<ConsolidationOutlierPassConfig>,
9936}
9937
9938/// Per-pass config for SourceConditionalRarityPass.
9939#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9940pub struct SourceConditionalRarityPassConfig {
9941    /// Fraction of input JEs to tag (typically `0.01`).
9942    pub rate: f64,
9943    /// Optional min surprise floor (Σ -log P(account|source)). Default `5.0`.
9944    #[serde(default)]
9945    pub min_surprise: Option<f64>,
9946    /// Per-source line-count floor (sources below have unreliable PMFs).
9947    /// Default `5`.
9948    #[serde(default)]
9949    pub min_per_source_lines: Option<u32>,
9950}
9951
9952/// Per-pass config for TradingPartnerPoolPass.
9953#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9954pub struct TradingPartnerPoolPassConfig {
9955    /// Target distinct trading-partner pool size. `0` is clamped to `1` at
9956    /// runtime. Typical corpus value `~12`; synthetic default `~40`.
9957    pub target_size: usize,
9958}
9959
9960/// Per-pass config for SourceBlankingPass (Phase 1.5 / SOTA-7).
9961#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9962pub struct SourceBlankingPassConfig {
9963    /// Fraction of JEs whose `sap_source_code` should be nulled. Typical
9964    /// corpus-matching value `0.21`. Clamped to `[0.0, 1.0]` at runtime.
9965    pub rate: f64,
9966}
9967
9968/// Per-pass config for ConsolidationOutlierPass (v5.30 B2 / #154).
9969///
9970/// Amounts are stored as `f64` here (schema layer) and converted to
9971/// `rust_decimal::Decimal` in the pass constructor. The synthetic
9972/// bridge-line amounts are log-uniformly distributed and the pp99
9973/// metric reads scale rather than exact value, so the f64 → Decimal
9974/// rounding is irrelevant for the heavy-tail signal we're trying to
9975/// emit. Keeping `rust_decimal` out of `datasynth-config`'s direct
9976/// dependency graph avoids a downstream crate-pull.
9977#[derive(Debug, Clone, Serialize, Deserialize)]
9978pub struct ConsolidationOutlierPassConfig {
9979    /// Fraction of JEs to reshape into multi-line bridge-account
9980    /// postings. Typical baseline `0.001` (one in a thousand).
9981    /// Clamped to `[0.0, 1.0]` at runtime.
9982    pub rate: f64,
9983    /// Minimum number of extra lines to append (always rounded up to
9984    /// an even number — lines are added in balanced DR/CR pairs).
9985    /// Default `50`.
9986    #[serde(default = "default_consolidation_outlier_min_lines")]
9987    pub min_extra_lines: usize,
9988    /// Maximum number of extra lines to append. Default `200`.
9989    #[serde(default = "default_consolidation_outlier_max_lines")]
9990    pub max_extra_lines: usize,
9991    /// Bridge / suspense / clearing accounts the appended lines use.
9992    /// Empty (default) → use the pass's built-in default list.
9993    #[serde(default)]
9994    pub bridge_accounts: Vec<String>,
9995    /// Minimum bridge-line amount (log-uniform draw). Default `100.0`.
9996    #[serde(default = "default_consolidation_outlier_min_amount")]
9997    pub line_amount_min: f64,
9998    /// Maximum bridge-line amount (log-uniform draw). Default `50_000.0`.
9999    #[serde(default = "default_consolidation_outlier_max_amount")]
10000    pub line_amount_max: f64,
10001}
10002
10003impl Default for ConsolidationOutlierPassConfig {
10004    fn default() -> Self {
10005        Self {
10006            rate: 0.0,
10007            min_extra_lines: default_consolidation_outlier_min_lines(),
10008            max_extra_lines: default_consolidation_outlier_max_lines(),
10009            bridge_accounts: Vec::new(),
10010            line_amount_min: default_consolidation_outlier_min_amount(),
10011            line_amount_max: default_consolidation_outlier_max_amount(),
10012        }
10013    }
10014}
10015
10016fn default_consolidation_outlier_min_lines() -> usize {
10017    50
10018}
10019fn default_consolidation_outlier_max_lines() -> usize {
10020    200
10021}
10022fn default_consolidation_outlier_min_amount() -> f64 {
10023    100.0
10024}
10025fn default_consolidation_outlier_max_amount() -> f64 {
10026    50_000.0
10027}
10028
10029/// Per-pass config for AccountPairSubstitutionPass (Phase 2).
10030#[derive(Debug, Clone, Default, Serialize, Deserialize)]
10031pub struct AccountPairSubstitutionPassConfig {
10032    /// Path to a per-source pair-PMF JSON (produced by
10033    /// `corpus_vs_synth_gap.py --emit-pair-pmf`). Aggregate-only; never
10034    /// contains row content or client identifiers.
10035    pub pmf_path: String,
10036    /// JEs whose dominant (debit, credit) pair has corpus probability ≥ this
10037    /// threshold are left alone (they're already plausible). Default `0.005`.
10038    #[serde(default)]
10039    pub rarity_threshold: Option<f64>,
10040    /// When substituting, draw from the top-K corpus pairs (weighted by
10041    /// probability). Default `10`.
10042    #[serde(default)]
10043    pub top_k: Option<usize>,
10044}
10045
10046/// Base anomaly rate configuration.
10047#[derive(Debug, Clone, Serialize, Deserialize)]
10048pub struct AnomalyRateConfig {
10049    /// Total anomaly rate (0.0 to 1.0).
10050    #[serde(default = "default_total_anomaly_rate")]
10051    pub total_rate: f64,
10052
10053    /// Fraud anomaly rate.
10054    #[serde(default = "default_fraud_anomaly_rate")]
10055    pub fraud_rate: f64,
10056
10057    /// Error anomaly rate.
10058    #[serde(default = "default_error_anomaly_rate")]
10059    pub error_rate: f64,
10060
10061    /// Process issue rate.
10062    #[serde(default = "default_process_anomaly_rate")]
10063    pub process_rate: f64,
10064
10065    /// v5.30 B2 (#154) — heavy-tail outlier JE rate. Fraction of
10066    /// emitted JEs that get re-shaped into multi-100-line postings
10067    /// touching bridge accounts. Models real consolidation entries,
10068    /// period-end accruals, and manual reclasses. Default `0.0`
10069    /// preserves v5.29 byte-identical output; opt in (e.g. `0.001`)
10070    /// to lift synth p99/max relational_score percentiles toward the
10071    /// reference shard's heavy tail (~20× vs synth's default ~12×).
10072    #[serde(
10073        default = "default_consolidation_outlier_rate",
10074        alias = "consolidationOutlierRate"
10075    )]
10076    pub consolidation_outlier_rate: f64,
10077}
10078
10079fn default_total_anomaly_rate() -> f64 {
10080    0.03
10081}
10082fn default_fraud_anomaly_rate() -> f64 {
10083    0.01
10084}
10085fn default_error_anomaly_rate() -> f64 {
10086    0.015
10087}
10088fn default_process_anomaly_rate() -> f64 {
10089    0.005
10090}
10091fn default_consolidation_outlier_rate() -> f64 {
10092    // v5.30 B2 (#154) — small baseline so the synth heavy tail moves
10093    // toward the reference shard's p99 / max relational_score (~20×)
10094    // without overpowering downstream metrics. At 0.001, roughly 1 in
10095    // 1000 JEs becomes a multi-100-line bridge-account posting —
10096    // matching the observed corpus frequency of period-close /
10097    // manual reclass / consolidation entries.
10098    0.001
10099}
10100
10101impl Default for AnomalyRateConfig {
10102    fn default() -> Self {
10103        Self {
10104            total_rate: default_total_anomaly_rate(),
10105            fraud_rate: default_fraud_anomaly_rate(),
10106            error_rate: default_error_anomaly_rate(),
10107            process_rate: default_process_anomaly_rate(),
10108            consolidation_outlier_rate: default_consolidation_outlier_rate(),
10109        }
10110    }
10111}
10112
10113/// Multi-stage fraud scheme configuration.
10114#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10115pub struct MultiStageSchemeConfig {
10116    /// Enable multi-stage fraud schemes.
10117    #[serde(default)]
10118    pub enabled: bool,
10119
10120    /// Embezzlement scheme configuration.
10121    #[serde(default)]
10122    pub embezzlement: EmbezzlementSchemeConfig,
10123
10124    /// Revenue manipulation scheme configuration.
10125    #[serde(default)]
10126    pub revenue_manipulation: RevenueManipulationSchemeConfig,
10127
10128    /// Vendor kickback scheme configuration.
10129    #[serde(default)]
10130    pub kickback: KickbackSchemeConfig,
10131}
10132
10133/// Embezzlement scheme configuration.
10134#[derive(Debug, Clone, Serialize, Deserialize)]
10135pub struct EmbezzlementSchemeConfig {
10136    /// Probability of starting an embezzlement scheme per perpetrator per year.
10137    #[serde(default = "default_embezzlement_probability")]
10138    pub probability: f64,
10139
10140    /// Testing stage configuration.
10141    #[serde(default)]
10142    pub testing_stage: SchemeStageConfig,
10143
10144    /// Escalation stage configuration.
10145    #[serde(default)]
10146    pub escalation_stage: SchemeStageConfig,
10147
10148    /// Acceleration stage configuration.
10149    #[serde(default)]
10150    pub acceleration_stage: SchemeStageConfig,
10151
10152    /// Desperation stage configuration.
10153    #[serde(default)]
10154    pub desperation_stage: SchemeStageConfig,
10155}
10156
10157fn default_embezzlement_probability() -> f64 {
10158    0.02
10159}
10160
10161impl Default for EmbezzlementSchemeConfig {
10162    fn default() -> Self {
10163        Self {
10164            probability: default_embezzlement_probability(),
10165            testing_stage: SchemeStageConfig {
10166                duration_months: 2,
10167                amount_min: 100.0,
10168                amount_max: 500.0,
10169                transaction_count_min: 2,
10170                transaction_count_max: 5,
10171                difficulty: "hard".to_string(),
10172            },
10173            escalation_stage: SchemeStageConfig {
10174                duration_months: 6,
10175                amount_min: 500.0,
10176                amount_max: 2000.0,
10177                transaction_count_min: 3,
10178                transaction_count_max: 8,
10179                difficulty: "moderate".to_string(),
10180            },
10181            acceleration_stage: SchemeStageConfig {
10182                duration_months: 3,
10183                amount_min: 2000.0,
10184                amount_max: 10000.0,
10185                transaction_count_min: 5,
10186                transaction_count_max: 12,
10187                difficulty: "easy".to_string(),
10188            },
10189            desperation_stage: SchemeStageConfig {
10190                duration_months: 1,
10191                amount_min: 10000.0,
10192                amount_max: 50000.0,
10193                transaction_count_min: 3,
10194                transaction_count_max: 6,
10195                difficulty: "trivial".to_string(),
10196            },
10197        }
10198    }
10199}
10200
10201/// Revenue manipulation scheme configuration.
10202#[derive(Debug, Clone, Serialize, Deserialize)]
10203pub struct RevenueManipulationSchemeConfig {
10204    /// Probability of starting a revenue manipulation scheme per period.
10205    #[serde(default = "default_revenue_manipulation_probability")]
10206    pub probability: f64,
10207
10208    /// Early revenue recognition inflation target (Q4).
10209    #[serde(default = "default_early_recognition_target")]
10210    pub early_recognition_target: f64,
10211
10212    /// Expense deferral inflation target (Q1).
10213    #[serde(default = "default_expense_deferral_target")]
10214    pub expense_deferral_target: f64,
10215
10216    /// Reserve release inflation target (Q2).
10217    #[serde(default = "default_reserve_release_target")]
10218    pub reserve_release_target: f64,
10219
10220    /// Channel stuffing inflation target (Q4).
10221    #[serde(default = "default_channel_stuffing_target")]
10222    pub channel_stuffing_target: f64,
10223}
10224
10225fn default_revenue_manipulation_probability() -> f64 {
10226    0.01
10227}
10228fn default_early_recognition_target() -> f64 {
10229    0.02
10230}
10231fn default_expense_deferral_target() -> f64 {
10232    0.03
10233}
10234fn default_reserve_release_target() -> f64 {
10235    0.02
10236}
10237fn default_channel_stuffing_target() -> f64 {
10238    0.05
10239}
10240
10241impl Default for RevenueManipulationSchemeConfig {
10242    fn default() -> Self {
10243        Self {
10244            probability: default_revenue_manipulation_probability(),
10245            early_recognition_target: default_early_recognition_target(),
10246            expense_deferral_target: default_expense_deferral_target(),
10247            reserve_release_target: default_reserve_release_target(),
10248            channel_stuffing_target: default_channel_stuffing_target(),
10249        }
10250    }
10251}
10252
10253/// Vendor kickback scheme configuration.
10254#[derive(Debug, Clone, Serialize, Deserialize)]
10255pub struct KickbackSchemeConfig {
10256    /// Probability of starting a kickback scheme.
10257    #[serde(default = "default_kickback_probability")]
10258    pub probability: f64,
10259
10260    /// Minimum price inflation percentage.
10261    #[serde(default = "default_kickback_inflation_min")]
10262    pub inflation_min: f64,
10263
10264    /// Maximum price inflation percentage.
10265    #[serde(default = "default_kickback_inflation_max")]
10266    pub inflation_max: f64,
10267
10268    /// Kickback percentage (of inflation).
10269    #[serde(default = "default_kickback_percent")]
10270    pub kickback_percent: f64,
10271
10272    /// Setup duration in months.
10273    #[serde(default = "default_kickback_setup_months")]
10274    pub setup_months: u32,
10275
10276    /// Main operation duration in months.
10277    #[serde(default = "default_kickback_operation_months")]
10278    pub operation_months: u32,
10279}
10280
10281fn default_kickback_probability() -> f64 {
10282    0.01
10283}
10284fn default_kickback_inflation_min() -> f64 {
10285    0.10
10286}
10287fn default_kickback_inflation_max() -> f64 {
10288    0.25
10289}
10290fn default_kickback_percent() -> f64 {
10291    0.50
10292}
10293fn default_kickback_setup_months() -> u32 {
10294    3
10295}
10296fn default_kickback_operation_months() -> u32 {
10297    12
10298}
10299
10300impl Default for KickbackSchemeConfig {
10301    fn default() -> Self {
10302        Self {
10303            probability: default_kickback_probability(),
10304            inflation_min: default_kickback_inflation_min(),
10305            inflation_max: default_kickback_inflation_max(),
10306            kickback_percent: default_kickback_percent(),
10307            setup_months: default_kickback_setup_months(),
10308            operation_months: default_kickback_operation_months(),
10309        }
10310    }
10311}
10312
10313/// Individual scheme stage configuration.
10314#[derive(Debug, Clone, Serialize, Deserialize)]
10315pub struct SchemeStageConfig {
10316    /// Duration in months.
10317    pub duration_months: u32,
10318
10319    /// Minimum transaction amount.
10320    pub amount_min: f64,
10321
10322    /// Maximum transaction amount.
10323    pub amount_max: f64,
10324
10325    /// Minimum number of transactions.
10326    pub transaction_count_min: u32,
10327
10328    /// Maximum number of transactions.
10329    pub transaction_count_max: u32,
10330
10331    /// Detection difficulty level (trivial, easy, moderate, hard, expert).
10332    pub difficulty: String,
10333}
10334
10335impl Default for SchemeStageConfig {
10336    fn default() -> Self {
10337        Self {
10338            duration_months: 3,
10339            amount_min: 100.0,
10340            amount_max: 1000.0,
10341            transaction_count_min: 2,
10342            transaction_count_max: 10,
10343            difficulty: "moderate".to_string(),
10344        }
10345    }
10346}
10347
10348/// Correlated anomaly injection configuration.
10349#[derive(Debug, Clone, Serialize, Deserialize)]
10350pub struct CorrelatedInjectionConfig {
10351    /// Enable correlated anomaly injection.
10352    #[serde(default)]
10353    pub enabled: bool,
10354
10355    /// Enable fraud concealment co-occurrence patterns.
10356    #[serde(default = "default_true_val")]
10357    pub fraud_concealment: bool,
10358
10359    /// Enable error cascade patterns.
10360    #[serde(default = "default_true_val")]
10361    pub error_cascade: bool,
10362
10363    /// Enable temporal clustering (period-end spikes).
10364    #[serde(default = "default_true_val")]
10365    pub temporal_clustering: bool,
10366
10367    /// Temporal clustering configuration.
10368    #[serde(default)]
10369    pub temporal_clustering_config: TemporalClusteringConfig,
10370
10371    /// Co-occurrence patterns.
10372    #[serde(default)]
10373    pub co_occurrence_patterns: Vec<CoOccurrencePatternConfig>,
10374}
10375
10376impl Default for CorrelatedInjectionConfig {
10377    fn default() -> Self {
10378        Self {
10379            enabled: false,
10380            fraud_concealment: true,
10381            error_cascade: true,
10382            temporal_clustering: true,
10383            temporal_clustering_config: TemporalClusteringConfig::default(),
10384            co_occurrence_patterns: Vec::new(),
10385        }
10386    }
10387}
10388
10389/// Temporal clustering configuration.
10390#[derive(Debug, Clone, Serialize, Deserialize)]
10391pub struct TemporalClusteringConfig {
10392    /// Period-end error multiplier.
10393    #[serde(default = "default_period_end_multiplier")]
10394    pub period_end_multiplier: f64,
10395
10396    /// Number of business days before period end to apply multiplier.
10397    #[serde(default = "default_period_end_days")]
10398    pub period_end_days: u32,
10399
10400    /// Quarter-end additional multiplier.
10401    #[serde(default = "default_quarter_end_multiplier")]
10402    pub quarter_end_multiplier: f64,
10403
10404    /// Year-end additional multiplier.
10405    #[serde(default = "default_year_end_multiplier")]
10406    pub year_end_multiplier: f64,
10407}
10408
10409fn default_period_end_multiplier() -> f64 {
10410    2.5
10411}
10412fn default_period_end_days() -> u32 {
10413    5
10414}
10415fn default_quarter_end_multiplier() -> f64 {
10416    1.5
10417}
10418fn default_year_end_multiplier() -> f64 {
10419    2.0
10420}
10421
10422impl Default for TemporalClusteringConfig {
10423    fn default() -> Self {
10424        Self {
10425            period_end_multiplier: default_period_end_multiplier(),
10426            period_end_days: default_period_end_days(),
10427            quarter_end_multiplier: default_quarter_end_multiplier(),
10428            year_end_multiplier: default_year_end_multiplier(),
10429        }
10430    }
10431}
10432
10433/// Co-occurrence pattern configuration.
10434#[derive(Debug, Clone, Serialize, Deserialize)]
10435pub struct CoOccurrencePatternConfig {
10436    /// Pattern name.
10437    pub name: String,
10438
10439    /// Primary anomaly type that triggers the pattern.
10440    pub primary_type: String,
10441
10442    /// Correlated anomalies.
10443    pub correlated: Vec<CorrelatedAnomalyConfig>,
10444}
10445
10446/// Correlated anomaly configuration.
10447#[derive(Debug, Clone, Serialize, Deserialize)]
10448pub struct CorrelatedAnomalyConfig {
10449    /// Anomaly type.
10450    pub anomaly_type: String,
10451
10452    /// Probability of occurrence (0.0 to 1.0).
10453    pub probability: f64,
10454
10455    /// Minimum lag in days.
10456    pub lag_days_min: i32,
10457
10458    /// Maximum lag in days.
10459    pub lag_days_max: i32,
10460}
10461
10462/// Near-miss generation configuration.
10463#[derive(Debug, Clone, Serialize, Deserialize)]
10464pub struct NearMissConfig {
10465    /// Enable near-miss generation.
10466    #[serde(default)]
10467    pub enabled: bool,
10468
10469    /// Proportion of "anomalies" that are actually near-misses (0.0 to 1.0).
10470    #[serde(default = "default_near_miss_proportion")]
10471    pub proportion: f64,
10472
10473    /// Enable near-duplicate pattern.
10474    #[serde(default = "default_true_val")]
10475    pub near_duplicate: bool,
10476
10477    /// Near-duplicate date difference range in days.
10478    #[serde(default)]
10479    pub near_duplicate_days: NearDuplicateDaysConfig,
10480
10481    /// Enable threshold proximity pattern.
10482    #[serde(default = "default_true_val")]
10483    pub threshold_proximity: bool,
10484
10485    /// Threshold proximity range (e.g., 0.90-0.99 of threshold).
10486    #[serde(default)]
10487    pub threshold_proximity_range: ThresholdProximityRangeConfig,
10488
10489    /// Enable unusual but legitimate patterns.
10490    #[serde(default = "default_true_val")]
10491    pub unusual_legitimate: bool,
10492
10493    /// Types of unusual legitimate patterns to generate.
10494    #[serde(default = "default_unusual_legitimate_types")]
10495    pub unusual_legitimate_types: Vec<String>,
10496
10497    /// Enable corrected error patterns.
10498    #[serde(default = "default_true_val")]
10499    pub corrected_errors: bool,
10500
10501    /// Corrected error correction lag range in days.
10502    #[serde(default)]
10503    pub corrected_error_lag: CorrectedErrorLagConfig,
10504}
10505
10506fn default_near_miss_proportion() -> f64 {
10507    0.30
10508}
10509
10510fn default_unusual_legitimate_types() -> Vec<String> {
10511    vec![
10512        "year_end_bonus".to_string(),
10513        "contract_prepayment".to_string(),
10514        "insurance_claim".to_string(),
10515        "settlement_payment".to_string(),
10516    ]
10517}
10518
10519impl Default for NearMissConfig {
10520    fn default() -> Self {
10521        Self {
10522            enabled: false,
10523            proportion: default_near_miss_proportion(),
10524            near_duplicate: true,
10525            near_duplicate_days: NearDuplicateDaysConfig::default(),
10526            threshold_proximity: true,
10527            threshold_proximity_range: ThresholdProximityRangeConfig::default(),
10528            unusual_legitimate: true,
10529            unusual_legitimate_types: default_unusual_legitimate_types(),
10530            corrected_errors: true,
10531            corrected_error_lag: CorrectedErrorLagConfig::default(),
10532        }
10533    }
10534}
10535
10536/// Near-duplicate days configuration.
10537#[derive(Debug, Clone, Serialize, Deserialize)]
10538pub struct NearDuplicateDaysConfig {
10539    /// Minimum days apart.
10540    #[serde(default = "default_near_duplicate_min")]
10541    pub min: u32,
10542
10543    /// Maximum days apart.
10544    #[serde(default = "default_near_duplicate_max")]
10545    pub max: u32,
10546}
10547
10548fn default_near_duplicate_min() -> u32 {
10549    1
10550}
10551fn default_near_duplicate_max() -> u32 {
10552    3
10553}
10554
10555impl Default for NearDuplicateDaysConfig {
10556    fn default() -> Self {
10557        Self {
10558            min: default_near_duplicate_min(),
10559            max: default_near_duplicate_max(),
10560        }
10561    }
10562}
10563
10564/// Threshold proximity range configuration.
10565#[derive(Debug, Clone, Serialize, Deserialize)]
10566pub struct ThresholdProximityRangeConfig {
10567    /// Minimum proximity (e.g., 0.90 = 90% of threshold).
10568    #[serde(default = "default_threshold_proximity_min")]
10569    pub min: f64,
10570
10571    /// Maximum proximity (e.g., 0.99 = 99% of threshold).
10572    #[serde(default = "default_threshold_proximity_max")]
10573    pub max: f64,
10574}
10575
10576fn default_threshold_proximity_min() -> f64 {
10577    0.90
10578}
10579fn default_threshold_proximity_max() -> f64 {
10580    0.99
10581}
10582
10583impl Default for ThresholdProximityRangeConfig {
10584    fn default() -> Self {
10585        Self {
10586            min: default_threshold_proximity_min(),
10587            max: default_threshold_proximity_max(),
10588        }
10589    }
10590}
10591
10592/// Corrected error lag configuration.
10593#[derive(Debug, Clone, Serialize, Deserialize)]
10594pub struct CorrectedErrorLagConfig {
10595    /// Minimum correction lag in days.
10596    #[serde(default = "default_corrected_error_lag_min")]
10597    pub min: u32,
10598
10599    /// Maximum correction lag in days.
10600    #[serde(default = "default_corrected_error_lag_max")]
10601    pub max: u32,
10602}
10603
10604fn default_corrected_error_lag_min() -> u32 {
10605    1
10606}
10607fn default_corrected_error_lag_max() -> u32 {
10608    5
10609}
10610
10611impl Default for CorrectedErrorLagConfig {
10612    fn default() -> Self {
10613        Self {
10614            min: default_corrected_error_lag_min(),
10615            max: default_corrected_error_lag_max(),
10616        }
10617    }
10618}
10619
10620/// Detection difficulty classification configuration.
10621#[derive(Debug, Clone, Serialize, Deserialize)]
10622pub struct DifficultyClassificationConfig {
10623    /// Enable detection difficulty classification.
10624    #[serde(default)]
10625    pub enabled: bool,
10626
10627    /// Target distribution of difficulty levels.
10628    #[serde(default)]
10629    pub target_distribution: DifficultyDistributionConfig,
10630}
10631
10632impl Default for DifficultyClassificationConfig {
10633    fn default() -> Self {
10634        Self {
10635            enabled: true,
10636            target_distribution: DifficultyDistributionConfig::default(),
10637        }
10638    }
10639}
10640
10641/// Target distribution of detection difficulty levels.
10642#[derive(Debug, Clone, Serialize, Deserialize)]
10643pub struct DifficultyDistributionConfig {
10644    /// Proportion of trivial anomalies (expected 99% detection).
10645    #[serde(default = "default_difficulty_trivial")]
10646    pub trivial: f64,
10647
10648    /// Proportion of easy anomalies (expected 90% detection).
10649    #[serde(default = "default_difficulty_easy")]
10650    pub easy: f64,
10651
10652    /// Proportion of moderate anomalies (expected 70% detection).
10653    #[serde(default = "default_difficulty_moderate")]
10654    pub moderate: f64,
10655
10656    /// Proportion of hard anomalies (expected 40% detection).
10657    #[serde(default = "default_difficulty_hard")]
10658    pub hard: f64,
10659
10660    /// Proportion of expert anomalies (expected 15% detection).
10661    #[serde(default = "default_difficulty_expert")]
10662    pub expert: f64,
10663}
10664
10665fn default_difficulty_trivial() -> f64 {
10666    0.15
10667}
10668fn default_difficulty_easy() -> f64 {
10669    0.25
10670}
10671fn default_difficulty_moderate() -> f64 {
10672    0.30
10673}
10674fn default_difficulty_hard() -> f64 {
10675    0.20
10676}
10677fn default_difficulty_expert() -> f64 {
10678    0.10
10679}
10680
10681impl Default for DifficultyDistributionConfig {
10682    fn default() -> Self {
10683        Self {
10684            trivial: default_difficulty_trivial(),
10685            easy: default_difficulty_easy(),
10686            moderate: default_difficulty_moderate(),
10687            hard: default_difficulty_hard(),
10688            expert: default_difficulty_expert(),
10689        }
10690    }
10691}
10692
10693/// Context-aware injection configuration.
10694#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10695pub struct ContextAwareConfig {
10696    /// Enable context-aware injection.
10697    #[serde(default)]
10698    pub enabled: bool,
10699
10700    /// Vendor-specific anomaly rules.
10701    #[serde(default)]
10702    pub vendor_rules: VendorAnomalyRulesConfig,
10703
10704    /// Employee-specific anomaly rules.
10705    #[serde(default)]
10706    pub employee_rules: EmployeeAnomalyRulesConfig,
10707
10708    /// Account-specific anomaly rules.
10709    #[serde(default)]
10710    pub account_rules: AccountAnomalyRulesConfig,
10711
10712    /// Behavioral baseline configuration.
10713    #[serde(default)]
10714    pub behavioral_baseline: BehavioralBaselineConfig,
10715}
10716
10717/// Vendor-specific anomaly rules configuration.
10718#[derive(Debug, Clone, Serialize, Deserialize)]
10719pub struct VendorAnomalyRulesConfig {
10720    /// Error rate multiplier for new vendors (< threshold days).
10721    #[serde(default = "default_new_vendor_multiplier")]
10722    pub new_vendor_error_multiplier: f64,
10723
10724    /// Days threshold for "new" vendor classification.
10725    #[serde(default = "default_new_vendor_threshold")]
10726    pub new_vendor_threshold_days: u32,
10727
10728    /// Error rate multiplier for international vendors.
10729    #[serde(default = "default_international_multiplier")]
10730    pub international_error_multiplier: f64,
10731
10732    /// Strategic vendor anomaly types (may differ from general vendors).
10733    #[serde(default = "default_strategic_vendor_types")]
10734    pub strategic_vendor_anomaly_types: Vec<String>,
10735}
10736
10737fn default_new_vendor_multiplier() -> f64 {
10738    2.5
10739}
10740fn default_new_vendor_threshold() -> u32 {
10741    90
10742}
10743fn default_international_multiplier() -> f64 {
10744    1.5
10745}
10746fn default_strategic_vendor_types() -> Vec<String> {
10747    vec![
10748        "pricing_dispute".to_string(),
10749        "contract_violation".to_string(),
10750    ]
10751}
10752
10753impl Default for VendorAnomalyRulesConfig {
10754    fn default() -> Self {
10755        Self {
10756            new_vendor_error_multiplier: default_new_vendor_multiplier(),
10757            new_vendor_threshold_days: default_new_vendor_threshold(),
10758            international_error_multiplier: default_international_multiplier(),
10759            strategic_vendor_anomaly_types: default_strategic_vendor_types(),
10760        }
10761    }
10762}
10763
10764/// Employee-specific anomaly rules configuration.
10765#[derive(Debug, Clone, Serialize, Deserialize)]
10766pub struct EmployeeAnomalyRulesConfig {
10767    /// Error rate for new employees (< threshold days).
10768    #[serde(default = "default_new_employee_rate")]
10769    pub new_employee_error_rate: f64,
10770
10771    /// Days threshold for "new" employee classification.
10772    #[serde(default = "default_new_employee_threshold")]
10773    pub new_employee_threshold_days: u32,
10774
10775    /// Transaction volume threshold for fatigue errors.
10776    #[serde(default = "default_volume_fatigue_threshold")]
10777    pub volume_fatigue_threshold: u32,
10778
10779    /// Error rate multiplier when primary approver is absent.
10780    #[serde(default = "default_coverage_multiplier")]
10781    pub coverage_error_multiplier: f64,
10782}
10783
10784fn default_new_employee_rate() -> f64 {
10785    0.05
10786}
10787fn default_new_employee_threshold() -> u32 {
10788    180
10789}
10790fn default_volume_fatigue_threshold() -> u32 {
10791    50
10792}
10793fn default_coverage_multiplier() -> f64 {
10794    1.8
10795}
10796
10797impl Default for EmployeeAnomalyRulesConfig {
10798    fn default() -> Self {
10799        Self {
10800            new_employee_error_rate: default_new_employee_rate(),
10801            new_employee_threshold_days: default_new_employee_threshold(),
10802            volume_fatigue_threshold: default_volume_fatigue_threshold(),
10803            coverage_error_multiplier: default_coverage_multiplier(),
10804        }
10805    }
10806}
10807
10808/// Account-specific anomaly rules configuration.
10809#[derive(Debug, Clone, Serialize, Deserialize)]
10810pub struct AccountAnomalyRulesConfig {
10811    /// Error rate multiplier for high-risk accounts.
10812    #[serde(default = "default_high_risk_multiplier")]
10813    pub high_risk_account_multiplier: f64,
10814
10815    /// Account codes considered high-risk.
10816    #[serde(default = "default_high_risk_accounts")]
10817    pub high_risk_accounts: Vec<String>,
10818
10819    /// Error rate multiplier for suspense accounts.
10820    #[serde(default = "default_suspense_multiplier")]
10821    pub suspense_account_multiplier: f64,
10822
10823    /// Account codes considered suspense accounts.
10824    #[serde(default = "default_suspense_accounts")]
10825    pub suspense_accounts: Vec<String>,
10826
10827    /// Error rate multiplier for intercompany accounts.
10828    #[serde(default = "default_intercompany_multiplier")]
10829    pub intercompany_account_multiplier: f64,
10830}
10831
10832fn default_high_risk_multiplier() -> f64 {
10833    2.0
10834}
10835fn default_high_risk_accounts() -> Vec<String> {
10836    vec![
10837        "1100".to_string(), // AR Control
10838        "2000".to_string(), // AP Control
10839        "3000".to_string(), // Cash
10840    ]
10841}
10842fn default_suspense_multiplier() -> f64 {
10843    3.0
10844}
10845fn default_suspense_accounts() -> Vec<String> {
10846    vec!["9999".to_string(), "9998".to_string()]
10847}
10848fn default_intercompany_multiplier() -> f64 {
10849    1.5
10850}
10851
10852impl Default for AccountAnomalyRulesConfig {
10853    fn default() -> Self {
10854        Self {
10855            high_risk_account_multiplier: default_high_risk_multiplier(),
10856            high_risk_accounts: default_high_risk_accounts(),
10857            suspense_account_multiplier: default_suspense_multiplier(),
10858            suspense_accounts: default_suspense_accounts(),
10859            intercompany_account_multiplier: default_intercompany_multiplier(),
10860        }
10861    }
10862}
10863
10864/// Behavioral baseline configuration.
10865#[derive(Debug, Clone, Serialize, Deserialize)]
10866pub struct BehavioralBaselineConfig {
10867    /// Enable behavioral baseline tracking.
10868    #[serde(default)]
10869    pub enabled: bool,
10870
10871    /// Number of days to build baseline from.
10872    #[serde(default = "default_baseline_period")]
10873    pub baseline_period_days: u32,
10874
10875    /// Standard deviation threshold for amount anomalies.
10876    #[serde(default = "default_deviation_threshold")]
10877    pub deviation_threshold_std: f64,
10878
10879    /// Standard deviation threshold for frequency anomalies.
10880    #[serde(default = "default_frequency_deviation")]
10881    pub frequency_deviation_threshold: f64,
10882}
10883
10884fn default_baseline_period() -> u32 {
10885    90
10886}
10887fn default_deviation_threshold() -> f64 {
10888    3.0
10889}
10890fn default_frequency_deviation() -> f64 {
10891    2.0
10892}
10893
10894impl Default for BehavioralBaselineConfig {
10895    fn default() -> Self {
10896        Self {
10897            enabled: false,
10898            baseline_period_days: default_baseline_period(),
10899            deviation_threshold_std: default_deviation_threshold(),
10900            frequency_deviation_threshold: default_frequency_deviation(),
10901        }
10902    }
10903}
10904
10905/// Enhanced labeling configuration.
10906#[derive(Debug, Clone, Serialize, Deserialize)]
10907pub struct EnhancedLabelingConfig {
10908    /// Enable severity scoring.
10909    #[serde(default = "default_true_val")]
10910    pub severity_scoring: bool,
10911
10912    /// Enable difficulty classification.
10913    #[serde(default = "default_true_val")]
10914    pub difficulty_classification: bool,
10915
10916    /// Materiality thresholds for severity classification.
10917    #[serde(default)]
10918    pub materiality_thresholds: MaterialityThresholdsConfig,
10919}
10920
10921impl Default for EnhancedLabelingConfig {
10922    fn default() -> Self {
10923        Self {
10924            severity_scoring: true,
10925            difficulty_classification: true,
10926            materiality_thresholds: MaterialityThresholdsConfig::default(),
10927        }
10928    }
10929}
10930
10931/// Materiality thresholds configuration.
10932#[derive(Debug, Clone, Serialize, Deserialize)]
10933pub struct MaterialityThresholdsConfig {
10934    /// Threshold for trivial impact (as percentage of total).
10935    #[serde(default = "default_materiality_trivial")]
10936    pub trivial: f64,
10937
10938    /// Threshold for immaterial impact.
10939    #[serde(default = "default_materiality_immaterial")]
10940    pub immaterial: f64,
10941
10942    /// Threshold for material impact.
10943    #[serde(default = "default_materiality_material")]
10944    pub material: f64,
10945
10946    /// Threshold for highly material impact.
10947    #[serde(default = "default_materiality_highly_material")]
10948    pub highly_material: f64,
10949}
10950
10951fn default_materiality_trivial() -> f64 {
10952    0.001
10953}
10954fn default_materiality_immaterial() -> f64 {
10955    0.01
10956}
10957fn default_materiality_material() -> f64 {
10958    0.05
10959}
10960fn default_materiality_highly_material() -> f64 {
10961    0.10
10962}
10963
10964impl Default for MaterialityThresholdsConfig {
10965    fn default() -> Self {
10966        Self {
10967            trivial: default_materiality_trivial(),
10968            immaterial: default_materiality_immaterial(),
10969            material: default_materiality_material(),
10970            highly_material: default_materiality_highly_material(),
10971        }
10972    }
10973}
10974
10975// =============================================================================
10976// Industry-Specific Configuration
10977// =============================================================================
10978
10979/// Industry-specific transaction and anomaly generation configuration.
10980///
10981/// This configuration enables generation of industry-authentic:
10982/// - Transaction types with appropriate terminology
10983/// - Master data (BOM, routings, clinical codes, etc.)
10984/// - Industry-specific anomaly patterns
10985/// - Regulatory framework compliance
10986#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10987pub struct IndustrySpecificConfig {
10988    /// Enable industry-specific generation.
10989    #[serde(default)]
10990    pub enabled: bool,
10991
10992    /// Manufacturing industry settings.
10993    #[serde(default)]
10994    pub manufacturing: ManufacturingConfig,
10995
10996    /// Retail industry settings.
10997    #[serde(default)]
10998    pub retail: RetailConfig,
10999
11000    /// Healthcare industry settings.
11001    #[serde(default)]
11002    pub healthcare: HealthcareConfig,
11003
11004    /// Technology industry settings.
11005    #[serde(default)]
11006    pub technology: TechnologyConfig,
11007
11008    /// Financial services industry settings.
11009    #[serde(default)]
11010    pub financial_services: FinancialServicesConfig,
11011
11012    /// Professional services industry settings.
11013    #[serde(default)]
11014    pub professional_services: ProfessionalServicesConfig,
11015}
11016
11017/// Manufacturing industry configuration.
11018#[derive(Debug, Clone, Serialize, Deserialize)]
11019pub struct ManufacturingConfig {
11020    /// Enable manufacturing-specific generation.
11021    #[serde(default)]
11022    pub enabled: bool,
11023
11024    /// Bill of Materials depth (typical: 3-7).
11025    #[serde(default = "default_bom_depth")]
11026    pub bom_depth: u32,
11027
11028    /// Whether to use just-in-time inventory.
11029    #[serde(default)]
11030    pub just_in_time: bool,
11031
11032    /// Production order types to generate.
11033    #[serde(default = "default_production_order_types")]
11034    pub production_order_types: Vec<String>,
11035
11036    /// Quality framework (ISO_9001, Six_Sigma, etc.).
11037    #[serde(default)]
11038    pub quality_framework: Option<String>,
11039
11040    /// Number of supplier tiers to model (1-3).
11041    #[serde(default = "default_supplier_tiers")]
11042    pub supplier_tiers: u32,
11043
11044    /// Standard cost update frequency.
11045    #[serde(default = "default_cost_frequency")]
11046    pub standard_cost_frequency: String,
11047
11048    /// Target yield rate (0.95-0.99 typical).
11049    #[serde(default = "default_yield_rate")]
11050    pub target_yield_rate: f64,
11051
11052    /// Scrap percentage threshold for alerts.
11053    #[serde(default = "default_scrap_threshold")]
11054    pub scrap_alert_threshold: f64,
11055
11056    /// Manufacturing anomaly injection rates.
11057    #[serde(default)]
11058    pub anomaly_rates: ManufacturingAnomalyRates,
11059
11060    /// Cost accounting configuration (WIP → FG → COGS pipeline).
11061    #[serde(default)]
11062    pub cost_accounting: ManufacturingCostAccountingConfig,
11063}
11064
11065/// Configuration for manufacturing cost accounting JE generation.
11066#[derive(Debug, Clone, Serialize, Deserialize)]
11067pub struct ManufacturingCostAccountingConfig {
11068    /// Enable multi-stage cost flow (WIP → FG → COGS) instead of flat JEs.
11069    #[serde(default = "default_true")]
11070    pub enabled: bool,
11071
11072    /// Generate standard cost variance JEs.
11073    #[serde(default = "default_true")]
11074    pub variance_accounts_enabled: bool,
11075
11076    /// Generate warranty provisions from quality inspection failures.
11077    #[serde(default = "default_true")]
11078    pub warranty_provisions_enabled: bool,
11079
11080    /// Minimum defect rate (0.0-1.0) to trigger warranty provision generation.
11081    #[serde(default = "default_warranty_defect_threshold")]
11082    pub warranty_defect_threshold: f64,
11083}
11084
11085fn default_warranty_defect_threshold() -> f64 {
11086    0.01
11087}
11088
11089impl Default for ManufacturingCostAccountingConfig {
11090    fn default() -> Self {
11091        Self {
11092            enabled: true,
11093            variance_accounts_enabled: true,
11094            warranty_provisions_enabled: true,
11095            warranty_defect_threshold: 0.01,
11096        }
11097    }
11098}
11099
11100fn default_bom_depth() -> u32 {
11101    4
11102}
11103
11104fn default_production_order_types() -> Vec<String> {
11105    vec![
11106        "standard".to_string(),
11107        "rework".to_string(),
11108        "prototype".to_string(),
11109    ]
11110}
11111
11112fn default_supplier_tiers() -> u32 {
11113    2
11114}
11115
11116fn default_cost_frequency() -> String {
11117    "quarterly".to_string()
11118}
11119
11120fn default_yield_rate() -> f64 {
11121    0.97
11122}
11123
11124fn default_scrap_threshold() -> f64 {
11125    0.03
11126}
11127
11128impl Default for ManufacturingConfig {
11129    fn default() -> Self {
11130        Self {
11131            enabled: false,
11132            bom_depth: default_bom_depth(),
11133            just_in_time: false,
11134            production_order_types: default_production_order_types(),
11135            quality_framework: Some("ISO_9001".to_string()),
11136            supplier_tiers: default_supplier_tiers(),
11137            standard_cost_frequency: default_cost_frequency(),
11138            target_yield_rate: default_yield_rate(),
11139            scrap_alert_threshold: default_scrap_threshold(),
11140            anomaly_rates: ManufacturingAnomalyRates::default(),
11141            cost_accounting: ManufacturingCostAccountingConfig::default(),
11142        }
11143    }
11144}
11145
11146/// Manufacturing anomaly injection rates.
11147#[derive(Debug, Clone, Serialize, Deserialize)]
11148pub struct ManufacturingAnomalyRates {
11149    /// Yield manipulation rate.
11150    #[serde(default = "default_mfg_yield_rate")]
11151    pub yield_manipulation: f64,
11152
11153    /// Labor misallocation rate.
11154    #[serde(default = "default_mfg_labor_rate")]
11155    pub labor_misallocation: f64,
11156
11157    /// Phantom production rate.
11158    #[serde(default = "default_mfg_phantom_rate")]
11159    pub phantom_production: f64,
11160
11161    /// Standard cost manipulation rate.
11162    #[serde(default = "default_mfg_cost_rate")]
11163    pub standard_cost_manipulation: f64,
11164
11165    /// Inventory fraud rate.
11166    #[serde(default = "default_mfg_inventory_rate")]
11167    pub inventory_fraud: f64,
11168}
11169
11170fn default_mfg_yield_rate() -> f64 {
11171    0.015
11172}
11173
11174fn default_mfg_labor_rate() -> f64 {
11175    0.02
11176}
11177
11178fn default_mfg_phantom_rate() -> f64 {
11179    0.005
11180}
11181
11182fn default_mfg_cost_rate() -> f64 {
11183    0.01
11184}
11185
11186fn default_mfg_inventory_rate() -> f64 {
11187    0.008
11188}
11189
11190impl Default for ManufacturingAnomalyRates {
11191    fn default() -> Self {
11192        Self {
11193            yield_manipulation: default_mfg_yield_rate(),
11194            labor_misallocation: default_mfg_labor_rate(),
11195            phantom_production: default_mfg_phantom_rate(),
11196            standard_cost_manipulation: default_mfg_cost_rate(),
11197            inventory_fraud: default_mfg_inventory_rate(),
11198        }
11199    }
11200}
11201
11202/// Retail industry configuration.
11203#[derive(Debug, Clone, Serialize, Deserialize)]
11204pub struct RetailConfig {
11205    /// Enable retail-specific generation.
11206    #[serde(default)]
11207    pub enabled: bool,
11208
11209    /// Store type distribution.
11210    #[serde(default)]
11211    pub store_types: RetailStoreTypeConfig,
11212
11213    /// Average daily transactions per store.
11214    #[serde(default = "default_retail_daily_txns")]
11215    pub avg_daily_transactions: u32,
11216
11217    /// Enable loss prevention tracking.
11218    #[serde(default = "default_true")]
11219    pub loss_prevention: bool,
11220
11221    /// Shrinkage rate (0.01-0.03 typical).
11222    #[serde(default = "default_shrinkage_rate")]
11223    pub shrinkage_rate: f64,
11224
11225    /// Retail anomaly injection rates.
11226    #[serde(default)]
11227    pub anomaly_rates: RetailAnomalyRates,
11228}
11229
11230fn default_retail_daily_txns() -> u32 {
11231    500
11232}
11233
11234fn default_shrinkage_rate() -> f64 {
11235    0.015
11236}
11237
11238impl Default for RetailConfig {
11239    fn default() -> Self {
11240        Self {
11241            enabled: false,
11242            store_types: RetailStoreTypeConfig::default(),
11243            avg_daily_transactions: default_retail_daily_txns(),
11244            loss_prevention: true,
11245            shrinkage_rate: default_shrinkage_rate(),
11246            anomaly_rates: RetailAnomalyRates::default(),
11247        }
11248    }
11249}
11250
11251/// Retail store type distribution.
11252#[derive(Debug, Clone, Serialize, Deserialize)]
11253pub struct RetailStoreTypeConfig {
11254    /// Percentage of flagship stores.
11255    #[serde(default = "default_flagship_pct")]
11256    pub flagship: f64,
11257
11258    /// Percentage of regional stores.
11259    #[serde(default = "default_regional_pct")]
11260    pub regional: f64,
11261
11262    /// Percentage of outlet stores.
11263    #[serde(default = "default_outlet_pct")]
11264    pub outlet: f64,
11265
11266    /// Percentage of e-commerce.
11267    #[serde(default = "default_ecommerce_pct")]
11268    pub ecommerce: f64,
11269}
11270
11271fn default_flagship_pct() -> f64 {
11272    0.10
11273}
11274
11275fn default_regional_pct() -> f64 {
11276    0.50
11277}
11278
11279fn default_outlet_pct() -> f64 {
11280    0.25
11281}
11282
11283fn default_ecommerce_pct() -> f64 {
11284    0.15
11285}
11286
11287impl Default for RetailStoreTypeConfig {
11288    fn default() -> Self {
11289        Self {
11290            flagship: default_flagship_pct(),
11291            regional: default_regional_pct(),
11292            outlet: default_outlet_pct(),
11293            ecommerce: default_ecommerce_pct(),
11294        }
11295    }
11296}
11297
11298/// Retail anomaly injection rates.
11299#[derive(Debug, Clone, Serialize, Deserialize)]
11300pub struct RetailAnomalyRates {
11301    /// Sweethearting rate.
11302    #[serde(default = "default_sweethearting_rate")]
11303    pub sweethearting: f64,
11304
11305    /// Skimming rate.
11306    #[serde(default = "default_skimming_rate")]
11307    pub skimming: f64,
11308
11309    /// Refund fraud rate.
11310    #[serde(default = "default_refund_fraud_rate")]
11311    pub refund_fraud: f64,
11312
11313    /// Void abuse rate.
11314    #[serde(default = "default_void_abuse_rate")]
11315    pub void_abuse: f64,
11316
11317    /// Gift card fraud rate.
11318    #[serde(default = "default_gift_card_rate")]
11319    pub gift_card_fraud: f64,
11320
11321    /// Vendor kickback rate.
11322    #[serde(default = "default_retail_kickback_rate")]
11323    pub vendor_kickback: f64,
11324}
11325
11326fn default_sweethearting_rate() -> f64 {
11327    0.02
11328}
11329
11330fn default_skimming_rate() -> f64 {
11331    0.005
11332}
11333
11334fn default_refund_fraud_rate() -> f64 {
11335    0.015
11336}
11337
11338fn default_void_abuse_rate() -> f64 {
11339    0.01
11340}
11341
11342fn default_gift_card_rate() -> f64 {
11343    0.008
11344}
11345
11346fn default_retail_kickback_rate() -> f64 {
11347    0.003
11348}
11349
11350impl Default for RetailAnomalyRates {
11351    fn default() -> Self {
11352        Self {
11353            sweethearting: default_sweethearting_rate(),
11354            skimming: default_skimming_rate(),
11355            refund_fraud: default_refund_fraud_rate(),
11356            void_abuse: default_void_abuse_rate(),
11357            gift_card_fraud: default_gift_card_rate(),
11358            vendor_kickback: default_retail_kickback_rate(),
11359        }
11360    }
11361}
11362
11363/// Healthcare industry configuration.
11364#[derive(Debug, Clone, Serialize, Deserialize)]
11365pub struct HealthcareConfig {
11366    /// Enable healthcare-specific generation.
11367    #[serde(default)]
11368    pub enabled: bool,
11369
11370    /// Healthcare facility type.
11371    #[serde(default = "default_facility_type")]
11372    pub facility_type: String,
11373
11374    /// Payer mix distribution.
11375    #[serde(default)]
11376    pub payer_mix: HealthcarePayerMix,
11377
11378    /// Coding systems enabled.
11379    #[serde(default)]
11380    pub coding_systems: HealthcareCodingSystems,
11381
11382    /// Healthcare compliance settings.
11383    #[serde(default)]
11384    pub compliance: HealthcareComplianceConfig,
11385
11386    /// Average daily encounters.
11387    #[serde(default = "default_daily_encounters")]
11388    pub avg_daily_encounters: u32,
11389
11390    /// Average charges per encounter.
11391    #[serde(default = "default_charges_per_encounter")]
11392    pub avg_charges_per_encounter: u32,
11393
11394    /// Denial rate (0.0-1.0).
11395    #[serde(default = "default_hc_denial_rate")]
11396    pub denial_rate: f64,
11397
11398    /// Bad debt rate (0.0-1.0).
11399    #[serde(default = "default_hc_bad_debt_rate")]
11400    pub bad_debt_rate: f64,
11401
11402    /// Charity care rate (0.0-1.0).
11403    #[serde(default = "default_hc_charity_care_rate")]
11404    pub charity_care_rate: f64,
11405
11406    /// Healthcare anomaly injection rates.
11407    #[serde(default)]
11408    pub anomaly_rates: HealthcareAnomalyRates,
11409}
11410
11411fn default_facility_type() -> String {
11412    "hospital".to_string()
11413}
11414
11415fn default_daily_encounters() -> u32 {
11416    150
11417}
11418
11419fn default_charges_per_encounter() -> u32 {
11420    8
11421}
11422
11423fn default_hc_denial_rate() -> f64 {
11424    0.05
11425}
11426
11427fn default_hc_bad_debt_rate() -> f64 {
11428    0.03
11429}
11430
11431fn default_hc_charity_care_rate() -> f64 {
11432    0.02
11433}
11434
11435impl Default for HealthcareConfig {
11436    fn default() -> Self {
11437        Self {
11438            enabled: false,
11439            facility_type: default_facility_type(),
11440            payer_mix: HealthcarePayerMix::default(),
11441            coding_systems: HealthcareCodingSystems::default(),
11442            compliance: HealthcareComplianceConfig::default(),
11443            avg_daily_encounters: default_daily_encounters(),
11444            avg_charges_per_encounter: default_charges_per_encounter(),
11445            denial_rate: default_hc_denial_rate(),
11446            bad_debt_rate: default_hc_bad_debt_rate(),
11447            charity_care_rate: default_hc_charity_care_rate(),
11448            anomaly_rates: HealthcareAnomalyRates::default(),
11449        }
11450    }
11451}
11452
11453/// Healthcare payer mix distribution.
11454#[derive(Debug, Clone, Serialize, Deserialize)]
11455pub struct HealthcarePayerMix {
11456    /// Medicare percentage.
11457    #[serde(default = "default_medicare_pct")]
11458    pub medicare: f64,
11459
11460    /// Medicaid percentage.
11461    #[serde(default = "default_medicaid_pct")]
11462    pub medicaid: f64,
11463
11464    /// Commercial insurance percentage.
11465    #[serde(default = "default_commercial_pct")]
11466    pub commercial: f64,
11467
11468    /// Self-pay percentage.
11469    #[serde(default = "default_self_pay_pct")]
11470    pub self_pay: f64,
11471}
11472
11473fn default_medicare_pct() -> f64 {
11474    0.40
11475}
11476
11477fn default_medicaid_pct() -> f64 {
11478    0.20
11479}
11480
11481fn default_commercial_pct() -> f64 {
11482    0.30
11483}
11484
11485fn default_self_pay_pct() -> f64 {
11486    0.10
11487}
11488
11489impl Default for HealthcarePayerMix {
11490    fn default() -> Self {
11491        Self {
11492            medicare: default_medicare_pct(),
11493            medicaid: default_medicaid_pct(),
11494            commercial: default_commercial_pct(),
11495            self_pay: default_self_pay_pct(),
11496        }
11497    }
11498}
11499
11500/// Healthcare coding systems configuration.
11501#[derive(Debug, Clone, Serialize, Deserialize)]
11502pub struct HealthcareCodingSystems {
11503    /// Enable ICD-10 diagnosis coding.
11504    #[serde(default = "default_true")]
11505    pub icd10: bool,
11506
11507    /// Enable CPT procedure coding.
11508    #[serde(default = "default_true")]
11509    pub cpt: bool,
11510
11511    /// Enable DRG grouping.
11512    #[serde(default = "default_true")]
11513    pub drg: bool,
11514
11515    /// Enable HCPCS Level II coding.
11516    #[serde(default = "default_true")]
11517    pub hcpcs: bool,
11518
11519    /// Enable revenue codes.
11520    #[serde(default = "default_true")]
11521    pub revenue_codes: bool,
11522}
11523
11524impl Default for HealthcareCodingSystems {
11525    fn default() -> Self {
11526        Self {
11527            icd10: true,
11528            cpt: true,
11529            drg: true,
11530            hcpcs: true,
11531            revenue_codes: true,
11532        }
11533    }
11534}
11535
11536/// Healthcare compliance configuration.
11537#[derive(Debug, Clone, Serialize, Deserialize)]
11538pub struct HealthcareComplianceConfig {
11539    /// Enable HIPAA compliance.
11540    #[serde(default = "default_true")]
11541    pub hipaa: bool,
11542
11543    /// Enable Stark Law compliance.
11544    #[serde(default = "default_true")]
11545    pub stark_law: bool,
11546
11547    /// Enable Anti-Kickback Statute compliance.
11548    #[serde(default = "default_true")]
11549    pub anti_kickback: bool,
11550
11551    /// Enable False Claims Act compliance.
11552    #[serde(default = "default_true")]
11553    pub false_claims_act: bool,
11554
11555    /// Enable EMTALA compliance (for hospitals).
11556    #[serde(default = "default_true")]
11557    pub emtala: bool,
11558}
11559
11560impl Default for HealthcareComplianceConfig {
11561    fn default() -> Self {
11562        Self {
11563            hipaa: true,
11564            stark_law: true,
11565            anti_kickback: true,
11566            false_claims_act: true,
11567            emtala: true,
11568        }
11569    }
11570}
11571
11572/// Healthcare anomaly injection rates.
11573#[derive(Debug, Clone, Serialize, Deserialize)]
11574pub struct HealthcareAnomalyRates {
11575    /// Upcoding rate.
11576    #[serde(default = "default_upcoding_rate")]
11577    pub upcoding: f64,
11578
11579    /// Unbundling rate.
11580    #[serde(default = "default_unbundling_rate")]
11581    pub unbundling: f64,
11582
11583    /// Phantom billing rate.
11584    #[serde(default = "default_phantom_billing_rate")]
11585    pub phantom_billing: f64,
11586
11587    /// Kickback rate.
11588    #[serde(default = "default_healthcare_kickback_rate")]
11589    pub kickbacks: f64,
11590
11591    /// Duplicate billing rate.
11592    #[serde(default = "default_duplicate_billing_rate")]
11593    pub duplicate_billing: f64,
11594
11595    /// Medical necessity abuse rate.
11596    #[serde(default = "default_med_necessity_rate")]
11597    pub medical_necessity_abuse: f64,
11598}
11599
11600fn default_upcoding_rate() -> f64 {
11601    0.02
11602}
11603
11604fn default_unbundling_rate() -> f64 {
11605    0.015
11606}
11607
11608fn default_phantom_billing_rate() -> f64 {
11609    0.005
11610}
11611
11612fn default_healthcare_kickback_rate() -> f64 {
11613    0.003
11614}
11615
11616fn default_duplicate_billing_rate() -> f64 {
11617    0.008
11618}
11619
11620fn default_med_necessity_rate() -> f64 {
11621    0.01
11622}
11623
11624impl Default for HealthcareAnomalyRates {
11625    fn default() -> Self {
11626        Self {
11627            upcoding: default_upcoding_rate(),
11628            unbundling: default_unbundling_rate(),
11629            phantom_billing: default_phantom_billing_rate(),
11630            kickbacks: default_healthcare_kickback_rate(),
11631            duplicate_billing: default_duplicate_billing_rate(),
11632            medical_necessity_abuse: default_med_necessity_rate(),
11633        }
11634    }
11635}
11636
11637/// Technology industry configuration.
11638#[derive(Debug, Clone, Serialize, Deserialize)]
11639pub struct TechnologyConfig {
11640    /// Enable technology-specific generation.
11641    #[serde(default)]
11642    pub enabled: bool,
11643
11644    /// Revenue model type.
11645    #[serde(default = "default_revenue_model")]
11646    pub revenue_model: String,
11647
11648    /// Subscription revenue percentage (for SaaS).
11649    #[serde(default = "default_subscription_pct")]
11650    pub subscription_revenue_pct: f64,
11651
11652    /// License revenue percentage.
11653    #[serde(default = "default_license_pct")]
11654    pub license_revenue_pct: f64,
11655
11656    /// Services revenue percentage.
11657    #[serde(default = "default_services_pct")]
11658    pub services_revenue_pct: f64,
11659
11660    /// R&D capitalization settings.
11661    #[serde(default)]
11662    pub rd_capitalization: RdCapitalizationConfig,
11663
11664    /// Technology anomaly injection rates.
11665    #[serde(default)]
11666    pub anomaly_rates: TechnologyAnomalyRates,
11667}
11668
11669fn default_revenue_model() -> String {
11670    "saas".to_string()
11671}
11672
11673fn default_subscription_pct() -> f64 {
11674    0.60
11675}
11676
11677fn default_license_pct() -> f64 {
11678    0.25
11679}
11680
11681fn default_services_pct() -> f64 {
11682    0.15
11683}
11684
11685impl Default for TechnologyConfig {
11686    fn default() -> Self {
11687        Self {
11688            enabled: false,
11689            revenue_model: default_revenue_model(),
11690            subscription_revenue_pct: default_subscription_pct(),
11691            license_revenue_pct: default_license_pct(),
11692            services_revenue_pct: default_services_pct(),
11693            rd_capitalization: RdCapitalizationConfig::default(),
11694            anomaly_rates: TechnologyAnomalyRates::default(),
11695        }
11696    }
11697}
11698
11699/// R&D capitalization configuration.
11700#[derive(Debug, Clone, Serialize, Deserialize)]
11701pub struct RdCapitalizationConfig {
11702    /// Enable R&D capitalization.
11703    #[serde(default = "default_true")]
11704    pub enabled: bool,
11705
11706    /// Capitalization rate (0.0-1.0).
11707    #[serde(default = "default_cap_rate")]
11708    pub capitalization_rate: f64,
11709
11710    /// Useful life in years.
11711    #[serde(default = "default_useful_life")]
11712    pub useful_life_years: u32,
11713}
11714
11715fn default_cap_rate() -> f64 {
11716    0.30
11717}
11718
11719fn default_useful_life() -> u32 {
11720    3
11721}
11722
11723impl Default for RdCapitalizationConfig {
11724    fn default() -> Self {
11725        Self {
11726            enabled: true,
11727            capitalization_rate: default_cap_rate(),
11728            useful_life_years: default_useful_life(),
11729        }
11730    }
11731}
11732
11733/// Technology anomaly injection rates.
11734#[derive(Debug, Clone, Serialize, Deserialize)]
11735pub struct TechnologyAnomalyRates {
11736    /// Premature revenue recognition rate.
11737    #[serde(default = "default_premature_rev_rate")]
11738    pub premature_revenue: f64,
11739
11740    /// Side letter abuse rate.
11741    #[serde(default = "default_side_letter_rate")]
11742    pub side_letter_abuse: f64,
11743
11744    /// Channel stuffing rate.
11745    #[serde(default = "default_channel_stuffing_rate")]
11746    pub channel_stuffing: f64,
11747
11748    /// Improper capitalization rate.
11749    #[serde(default = "default_improper_cap_rate")]
11750    pub improper_capitalization: f64,
11751}
11752
11753fn default_premature_rev_rate() -> f64 {
11754    0.015
11755}
11756
11757fn default_side_letter_rate() -> f64 {
11758    0.008
11759}
11760
11761fn default_channel_stuffing_rate() -> f64 {
11762    0.01
11763}
11764
11765fn default_improper_cap_rate() -> f64 {
11766    0.012
11767}
11768
11769impl Default for TechnologyAnomalyRates {
11770    fn default() -> Self {
11771        Self {
11772            premature_revenue: default_premature_rev_rate(),
11773            side_letter_abuse: default_side_letter_rate(),
11774            channel_stuffing: default_channel_stuffing_rate(),
11775            improper_capitalization: default_improper_cap_rate(),
11776        }
11777    }
11778}
11779
11780/// Financial services industry configuration.
11781#[derive(Debug, Clone, Serialize, Deserialize)]
11782pub struct FinancialServicesConfig {
11783    /// Enable financial services-specific generation.
11784    #[serde(default)]
11785    pub enabled: bool,
11786
11787    /// Financial institution type.
11788    #[serde(default = "default_fi_type")]
11789    pub institution_type: String,
11790
11791    /// Regulatory framework.
11792    #[serde(default = "default_fi_regulatory")]
11793    pub regulatory_framework: String,
11794
11795    /// Financial services anomaly injection rates.
11796    #[serde(default)]
11797    pub anomaly_rates: FinancialServicesAnomalyRates,
11798}
11799
11800fn default_fi_type() -> String {
11801    "commercial_bank".to_string()
11802}
11803
11804fn default_fi_regulatory() -> String {
11805    "us_banking".to_string()
11806}
11807
11808impl Default for FinancialServicesConfig {
11809    fn default() -> Self {
11810        Self {
11811            enabled: false,
11812            institution_type: default_fi_type(),
11813            regulatory_framework: default_fi_regulatory(),
11814            anomaly_rates: FinancialServicesAnomalyRates::default(),
11815        }
11816    }
11817}
11818
11819/// Financial services anomaly injection rates.
11820#[derive(Debug, Clone, Serialize, Deserialize)]
11821pub struct FinancialServicesAnomalyRates {
11822    /// Loan fraud rate.
11823    #[serde(default = "default_loan_fraud_rate")]
11824    pub loan_fraud: f64,
11825
11826    /// Trading fraud rate.
11827    #[serde(default = "default_trading_fraud_rate")]
11828    pub trading_fraud: f64,
11829
11830    /// Insurance fraud rate.
11831    #[serde(default = "default_insurance_fraud_rate")]
11832    pub insurance_fraud: f64,
11833
11834    /// Account manipulation rate.
11835    #[serde(default = "default_account_manip_rate")]
11836    pub account_manipulation: f64,
11837}
11838
11839fn default_loan_fraud_rate() -> f64 {
11840    0.01
11841}
11842
11843fn default_trading_fraud_rate() -> f64 {
11844    0.008
11845}
11846
11847fn default_insurance_fraud_rate() -> f64 {
11848    0.012
11849}
11850
11851fn default_account_manip_rate() -> f64 {
11852    0.005
11853}
11854
11855impl Default for FinancialServicesAnomalyRates {
11856    fn default() -> Self {
11857        Self {
11858            loan_fraud: default_loan_fraud_rate(),
11859            trading_fraud: default_trading_fraud_rate(),
11860            insurance_fraud: default_insurance_fraud_rate(),
11861            account_manipulation: default_account_manip_rate(),
11862        }
11863    }
11864}
11865
11866/// Professional services industry configuration.
11867#[derive(Debug, Clone, Serialize, Deserialize)]
11868pub struct ProfessionalServicesConfig {
11869    /// Enable professional services-specific generation.
11870    #[serde(default)]
11871    pub enabled: bool,
11872
11873    /// Firm type.
11874    #[serde(default = "default_firm_type")]
11875    pub firm_type: String,
11876
11877    /// Billing model.
11878    #[serde(default = "default_billing_model")]
11879    pub billing_model: String,
11880
11881    /// Average hourly rate.
11882    #[serde(default = "default_hourly_rate")]
11883    pub avg_hourly_rate: f64,
11884
11885    /// Trust account settings (for law firms).
11886    #[serde(default)]
11887    pub trust_accounting: TrustAccountingConfig,
11888
11889    /// Professional services anomaly injection rates.
11890    #[serde(default)]
11891    pub anomaly_rates: ProfessionalServicesAnomalyRates,
11892}
11893
11894fn default_firm_type() -> String {
11895    "consulting".to_string()
11896}
11897
11898fn default_billing_model() -> String {
11899    "time_and_materials".to_string()
11900}
11901
11902fn default_hourly_rate() -> f64 {
11903    250.0
11904}
11905
11906impl Default for ProfessionalServicesConfig {
11907    fn default() -> Self {
11908        Self {
11909            enabled: false,
11910            firm_type: default_firm_type(),
11911            billing_model: default_billing_model(),
11912            avg_hourly_rate: default_hourly_rate(),
11913            trust_accounting: TrustAccountingConfig::default(),
11914            anomaly_rates: ProfessionalServicesAnomalyRates::default(),
11915        }
11916    }
11917}
11918
11919/// Trust accounting configuration for law firms.
11920#[derive(Debug, Clone, Serialize, Deserialize)]
11921pub struct TrustAccountingConfig {
11922    /// Enable trust accounting.
11923    #[serde(default)]
11924    pub enabled: bool,
11925
11926    /// Require three-way reconciliation.
11927    #[serde(default = "default_true")]
11928    pub require_three_way_reconciliation: bool,
11929}
11930
11931impl Default for TrustAccountingConfig {
11932    fn default() -> Self {
11933        Self {
11934            enabled: false,
11935            require_three_way_reconciliation: true,
11936        }
11937    }
11938}
11939
11940/// Professional services anomaly injection rates.
11941#[derive(Debug, Clone, Serialize, Deserialize)]
11942pub struct ProfessionalServicesAnomalyRates {
11943    /// Time billing fraud rate.
11944    #[serde(default = "default_time_fraud_rate")]
11945    pub time_billing_fraud: f64,
11946
11947    /// Expense report fraud rate.
11948    #[serde(default = "default_expense_fraud_rate")]
11949    pub expense_fraud: f64,
11950
11951    /// Trust misappropriation rate.
11952    #[serde(default = "default_trust_misappropriation_rate")]
11953    pub trust_misappropriation: f64,
11954}
11955
11956fn default_time_fraud_rate() -> f64 {
11957    0.02
11958}
11959
11960fn default_expense_fraud_rate() -> f64 {
11961    0.015
11962}
11963
11964fn default_trust_misappropriation_rate() -> f64 {
11965    0.003
11966}
11967
11968impl Default for ProfessionalServicesAnomalyRates {
11969    fn default() -> Self {
11970        Self {
11971            time_billing_fraud: default_time_fraud_rate(),
11972            expense_fraud: default_expense_fraud_rate(),
11973            trust_misappropriation: default_trust_misappropriation_rate(),
11974        }
11975    }
11976}
11977
11978/// Fingerprint privacy configuration for extraction and synthesis.
11979///
11980/// Controls the privacy parameters used when extracting fingerprints
11981/// from sensitive data. Supports predefined levels or custom (epsilon, delta) tuples.
11982///
11983/// ```yaml
11984/// fingerprint_privacy:
11985///   level: custom
11986///   epsilon: 0.5
11987///   delta: 1.0e-5
11988///   k_anonymity: 10
11989///   composition_method: renyi_dp
11990/// ```
11991#[derive(Debug, Clone, Serialize, Deserialize)]
11992pub struct FingerprintPrivacyConfig {
11993    /// Privacy level preset. Use "custom" for user-specified epsilon/delta.
11994    #[serde(default)]
11995    pub level: String,
11996    /// Custom epsilon value (only used when level = "custom").
11997    #[serde(default = "default_epsilon")]
11998    pub epsilon: f64,
11999    /// Custom delta value for (epsilon, delta)-DP (only used with RDP/zCDP).
12000    #[serde(default = "default_delta")]
12001    pub delta: f64,
12002    /// K-anonymity threshold.
12003    #[serde(default = "default_k_anonymity")]
12004    pub k_anonymity: u32,
12005    /// Composition method: "naive", "advanced", "renyi_dp", "zcdp".
12006    #[serde(default)]
12007    pub composition_method: String,
12008}
12009
12010fn default_epsilon() -> f64 {
12011    1.0
12012}
12013
12014fn default_delta() -> f64 {
12015    1e-5
12016}
12017
12018fn default_k_anonymity() -> u32 {
12019    5
12020}
12021
12022impl Default for FingerprintPrivacyConfig {
12023    fn default() -> Self {
12024        Self {
12025            level: "standard".to_string(),
12026            epsilon: default_epsilon(),
12027            delta: default_delta(),
12028            k_anonymity: default_k_anonymity(),
12029            composition_method: "naive".to_string(),
12030        }
12031    }
12032}
12033
12034/// Quality gates configuration for pass/fail thresholds on generation runs.
12035///
12036/// ```yaml
12037/// quality_gates:
12038///   enabled: true
12039///   profile: strict  # strict, default, lenient, custom
12040///   fail_on_violation: true
12041///   custom_gates:
12042///     - name: benford_compliance
12043///       metric: benford_mad
12044///       threshold: 0.015
12045///       comparison: lte
12046/// ```
12047#[derive(Debug, Clone, Serialize, Deserialize)]
12048pub struct QualityGatesSchemaConfig {
12049    /// Enable quality gate evaluation.
12050    #[serde(default)]
12051    pub enabled: bool,
12052    /// Gate profile: "strict", "default", "lenient", or "custom".
12053    #[serde(default = "default_gate_profile_name")]
12054    pub profile: String,
12055    /// Whether to fail the generation on gate violations.
12056    #[serde(default)]
12057    pub fail_on_violation: bool,
12058    /// Custom gate definitions (used when profile = "custom").
12059    #[serde(default)]
12060    pub custom_gates: Vec<QualityGateEntry>,
12061}
12062
12063fn default_gate_profile_name() -> String {
12064    "default".to_string()
12065}
12066
12067impl Default for QualityGatesSchemaConfig {
12068    fn default() -> Self {
12069        Self {
12070            enabled: false,
12071            profile: default_gate_profile_name(),
12072            fail_on_violation: false,
12073            custom_gates: Vec::new(),
12074        }
12075    }
12076}
12077
12078/// A single quality gate entry in configuration.
12079#[derive(Debug, Clone, Serialize, Deserialize)]
12080pub struct QualityGateEntry {
12081    /// Gate name.
12082    pub name: String,
12083    /// Metric to check: benford_mad, balance_coherence, document_chain_integrity,
12084    /// correlation_preservation, temporal_consistency, privacy_mia_auc,
12085    /// completion_rate, duplicate_rate, referential_integrity, ic_match_rate.
12086    pub metric: String,
12087    /// Threshold value.
12088    pub threshold: f64,
12089    /// Upper threshold for "between" comparison.
12090    #[serde(default)]
12091    pub upper_threshold: Option<f64>,
12092    /// Comparison operator: "gte", "lte", "eq", "between".
12093    #[serde(default = "default_gate_comparison")]
12094    pub comparison: String,
12095}
12096
12097fn default_gate_comparison() -> String {
12098    "gte".to_string()
12099}
12100
12101/// Compliance configuration for regulatory requirements.
12102///
12103/// ```yaml
12104/// compliance:
12105///   content_marking:
12106///     enabled: true
12107///     format: embedded  # embedded, sidecar, both
12108///   article10_report: true
12109/// ```
12110#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12111pub struct ComplianceSchemaConfig {
12112    /// Synthetic content marking configuration (EU AI Act Article 50).
12113    #[serde(default)]
12114    pub content_marking: ContentMarkingSchemaConfig,
12115    /// Generate Article 10 data governance report.
12116    #[serde(default)]
12117    pub article10_report: bool,
12118    /// Certificate configuration for proving DP guarantees.
12119    #[serde(default)]
12120    pub certificates: CertificateSchemaConfig,
12121}
12122
12123/// Configuration for synthetic data certificates.
12124#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12125pub struct CertificateSchemaConfig {
12126    /// Whether certificate generation is enabled.
12127    #[serde(default)]
12128    pub enabled: bool,
12129    /// Environment variable name for the signing key.
12130    #[serde(default)]
12131    pub signing_key_env: Option<String>,
12132    /// Whether to include quality metrics in the certificate.
12133    #[serde(default)]
12134    pub include_quality_metrics: bool,
12135}
12136
12137/// Content marking configuration for synthetic data output.
12138#[derive(Debug, Clone, Serialize, Deserialize)]
12139pub struct ContentMarkingSchemaConfig {
12140    /// Whether content marking is enabled.
12141    #[serde(default = "default_true")]
12142    pub enabled: bool,
12143    /// Marking format: "embedded", "sidecar", or "both".
12144    #[serde(default = "default_marking_format")]
12145    pub format: String,
12146}
12147
12148fn default_marking_format() -> String {
12149    "embedded".to_string()
12150}
12151
12152impl Default for ContentMarkingSchemaConfig {
12153    fn default() -> Self {
12154        Self {
12155            enabled: true,
12156            format: default_marking_format(),
12157        }
12158    }
12159}
12160
12161/// Webhook notification configuration.
12162#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12163pub struct WebhookSchemaConfig {
12164    /// Whether webhooks are enabled.
12165    #[serde(default)]
12166    pub enabled: bool,
12167    /// Webhook endpoint configurations.
12168    #[serde(default)]
12169    pub endpoints: Vec<WebhookEndpointConfig>,
12170}
12171
12172/// Configuration for a single webhook endpoint.
12173#[derive(Debug, Clone, Serialize, Deserialize)]
12174pub struct WebhookEndpointConfig {
12175    /// Target URL for the webhook.
12176    pub url: String,
12177    /// Event types this endpoint subscribes to.
12178    #[serde(default)]
12179    pub events: Vec<String>,
12180    /// Optional secret for HMAC-SHA256 signature.
12181    #[serde(default)]
12182    pub secret: Option<String>,
12183    /// Maximum retry attempts (default: 3).
12184    #[serde(default = "default_webhook_retries")]
12185    pub max_retries: u32,
12186    /// Timeout in seconds (default: 10).
12187    #[serde(default = "default_webhook_timeout")]
12188    pub timeout_secs: u64,
12189}
12190
12191fn default_webhook_retries() -> u32 {
12192    3
12193}
12194fn default_webhook_timeout() -> u64 {
12195    10
12196}
12197
12198// ===== Enterprise Process Chain Config Structs =====
12199
12200// ----- Source-to-Pay (S2C/S2P) -----
12201
12202/// Source-to-Pay configuration covering the entire sourcing lifecycle.
12203#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12204pub struct SourceToPayConfig {
12205    /// Enable source-to-pay generation
12206    #[serde(default)]
12207    pub enabled: bool,
12208    /// Spend analysis configuration
12209    #[serde(default)]
12210    pub spend_analysis: SpendAnalysisConfig,
12211    /// Sourcing project configuration
12212    #[serde(default)]
12213    pub sourcing: SourcingConfig,
12214    /// Supplier qualification configuration
12215    #[serde(default)]
12216    pub qualification: QualificationConfig,
12217    /// RFx event configuration
12218    #[serde(default)]
12219    pub rfx: RfxConfig,
12220    /// Contract configuration
12221    #[serde(default)]
12222    pub contracts: ContractConfig,
12223    /// Catalog configuration
12224    #[serde(default)]
12225    pub catalog: CatalogConfig,
12226    /// Scorecard configuration
12227    #[serde(default)]
12228    pub scorecards: ScorecardConfig,
12229    /// P2P integration settings
12230    #[serde(default)]
12231    pub p2p_integration: P2PIntegrationConfig,
12232}
12233
12234/// Spend analysis configuration.
12235#[derive(Debug, Clone, Serialize, Deserialize)]
12236pub struct SpendAnalysisConfig {
12237    /// HHI threshold for triggering sourcing project
12238    #[serde(default = "default_hhi_threshold")]
12239    pub hhi_threshold: f64,
12240    /// Target spend coverage under contracts
12241    #[serde(default = "default_contract_coverage_target")]
12242    pub contract_coverage_target: f64,
12243}
12244
12245impl Default for SpendAnalysisConfig {
12246    fn default() -> Self {
12247        Self {
12248            hhi_threshold: default_hhi_threshold(),
12249            contract_coverage_target: default_contract_coverage_target(),
12250        }
12251    }
12252}
12253
12254fn default_hhi_threshold() -> f64 {
12255    2500.0
12256}
12257fn default_contract_coverage_target() -> f64 {
12258    0.80
12259}
12260
12261/// Sourcing project configuration.
12262#[derive(Debug, Clone, Serialize, Deserialize)]
12263pub struct SourcingConfig {
12264    /// Number of sourcing projects per year
12265    #[serde(default = "default_sourcing_projects_per_year")]
12266    pub projects_per_year: u32,
12267    /// Months before expiry to trigger renewal project
12268    #[serde(default = "default_renewal_horizon_months")]
12269    pub renewal_horizon_months: u32,
12270    /// Average project duration in months
12271    #[serde(default = "default_project_duration_months")]
12272    pub project_duration_months: u32,
12273}
12274
12275impl Default for SourcingConfig {
12276    fn default() -> Self {
12277        Self {
12278            projects_per_year: default_sourcing_projects_per_year(),
12279            renewal_horizon_months: default_renewal_horizon_months(),
12280            project_duration_months: default_project_duration_months(),
12281        }
12282    }
12283}
12284
12285fn default_sourcing_projects_per_year() -> u32 {
12286    10
12287}
12288fn default_renewal_horizon_months() -> u32 {
12289    3
12290}
12291fn default_project_duration_months() -> u32 {
12292    4
12293}
12294
12295/// Supplier qualification configuration.
12296#[derive(Debug, Clone, Serialize, Deserialize)]
12297pub struct QualificationConfig {
12298    /// Pass rate for qualification
12299    #[serde(default = "default_qualification_pass_rate")]
12300    pub pass_rate: f64,
12301    /// Qualification validity in days
12302    #[serde(default = "default_qualification_validity_days")]
12303    pub validity_days: u32,
12304    /// Financial stability weight
12305    #[serde(default = "default_financial_weight")]
12306    pub financial_weight: f64,
12307    /// Quality management weight
12308    #[serde(default = "default_quality_weight")]
12309    pub quality_weight: f64,
12310    /// Delivery performance weight
12311    #[serde(default = "default_delivery_weight")]
12312    pub delivery_weight: f64,
12313    /// Compliance weight
12314    #[serde(default = "default_compliance_weight")]
12315    pub compliance_weight: f64,
12316}
12317
12318impl Default for QualificationConfig {
12319    fn default() -> Self {
12320        Self {
12321            pass_rate: default_qualification_pass_rate(),
12322            validity_days: default_qualification_validity_days(),
12323            financial_weight: default_financial_weight(),
12324            quality_weight: default_quality_weight(),
12325            delivery_weight: default_delivery_weight(),
12326            compliance_weight: default_compliance_weight(),
12327        }
12328    }
12329}
12330
12331fn default_qualification_pass_rate() -> f64 {
12332    0.75
12333}
12334fn default_qualification_validity_days() -> u32 {
12335    365
12336}
12337fn default_financial_weight() -> f64 {
12338    0.25
12339}
12340fn default_quality_weight() -> f64 {
12341    0.30
12342}
12343fn default_delivery_weight() -> f64 {
12344    0.25
12345}
12346fn default_compliance_weight() -> f64 {
12347    0.20
12348}
12349
12350/// RFx event configuration.
12351#[derive(Debug, Clone, Serialize, Deserialize)]
12352pub struct RfxConfig {
12353    /// Spend threshold above which RFI is required before RFP
12354    #[serde(default = "default_rfi_threshold")]
12355    pub rfi_threshold: f64,
12356    /// Minimum vendors invited per RFx
12357    #[serde(default = "default_min_invited_vendors")]
12358    pub min_invited_vendors: u32,
12359    /// Maximum vendors invited per RFx
12360    #[serde(default = "default_max_invited_vendors")]
12361    pub max_invited_vendors: u32,
12362    /// Response rate (% of invited vendors that submit bids)
12363    #[serde(default = "default_response_rate")]
12364    pub response_rate: f64,
12365    /// Default price weight in evaluation
12366    #[serde(default = "default_price_weight")]
12367    pub default_price_weight: f64,
12368    /// Default quality weight in evaluation
12369    #[serde(default = "default_rfx_quality_weight")]
12370    pub default_quality_weight: f64,
12371    /// Default delivery weight in evaluation
12372    #[serde(default = "default_rfx_delivery_weight")]
12373    pub default_delivery_weight: f64,
12374}
12375
12376impl Default for RfxConfig {
12377    fn default() -> Self {
12378        Self {
12379            rfi_threshold: default_rfi_threshold(),
12380            min_invited_vendors: default_min_invited_vendors(),
12381            max_invited_vendors: default_max_invited_vendors(),
12382            response_rate: default_response_rate(),
12383            default_price_weight: default_price_weight(),
12384            default_quality_weight: default_rfx_quality_weight(),
12385            default_delivery_weight: default_rfx_delivery_weight(),
12386        }
12387    }
12388}
12389
12390fn default_rfi_threshold() -> f64 {
12391    100_000.0
12392}
12393fn default_min_invited_vendors() -> u32 {
12394    3
12395}
12396fn default_max_invited_vendors() -> u32 {
12397    8
12398}
12399fn default_response_rate() -> f64 {
12400    0.70
12401}
12402fn default_price_weight() -> f64 {
12403    0.40
12404}
12405fn default_rfx_quality_weight() -> f64 {
12406    0.35
12407}
12408fn default_rfx_delivery_weight() -> f64 {
12409    0.25
12410}
12411
12412/// Contract configuration.
12413#[derive(Debug, Clone, Serialize, Deserialize)]
12414pub struct ContractConfig {
12415    /// Minimum contract duration in months
12416    #[serde(default = "default_min_contract_months")]
12417    pub min_duration_months: u32,
12418    /// Maximum contract duration in months
12419    #[serde(default = "default_max_contract_months")]
12420    pub max_duration_months: u32,
12421    /// Auto-renewal rate
12422    #[serde(default = "default_auto_renewal_rate")]
12423    pub auto_renewal_rate: f64,
12424    /// Amendment rate (% of contracts with at least one amendment)
12425    #[serde(default = "default_amendment_rate")]
12426    pub amendment_rate: f64,
12427    /// Distribution of contract types
12428    #[serde(default)]
12429    pub type_distribution: ContractTypeDistribution,
12430}
12431
12432impl Default for ContractConfig {
12433    fn default() -> Self {
12434        Self {
12435            min_duration_months: default_min_contract_months(),
12436            max_duration_months: default_max_contract_months(),
12437            auto_renewal_rate: default_auto_renewal_rate(),
12438            amendment_rate: default_amendment_rate(),
12439            type_distribution: ContractTypeDistribution::default(),
12440        }
12441    }
12442}
12443
12444fn default_min_contract_months() -> u32 {
12445    12
12446}
12447fn default_max_contract_months() -> u32 {
12448    36
12449}
12450fn default_auto_renewal_rate() -> f64 {
12451    0.40
12452}
12453fn default_amendment_rate() -> f64 {
12454    0.20
12455}
12456
12457/// Distribution of contract types.
12458#[derive(Debug, Clone, Serialize, Deserialize)]
12459pub struct ContractTypeDistribution {
12460    /// Fixed price percentage
12461    #[serde(default = "default_fixed_price_pct")]
12462    pub fixed_price: f64,
12463    /// Blanket/framework percentage
12464    #[serde(default = "default_blanket_pct")]
12465    pub blanket: f64,
12466    /// Time and materials percentage
12467    #[serde(default = "default_time_materials_pct")]
12468    pub time_and_materials: f64,
12469    /// Service agreement percentage
12470    #[serde(default = "default_service_agreement_pct")]
12471    pub service_agreement: f64,
12472}
12473
12474impl Default for ContractTypeDistribution {
12475    fn default() -> Self {
12476        Self {
12477            fixed_price: default_fixed_price_pct(),
12478            blanket: default_blanket_pct(),
12479            time_and_materials: default_time_materials_pct(),
12480            service_agreement: default_service_agreement_pct(),
12481        }
12482    }
12483}
12484
12485fn default_fixed_price_pct() -> f64 {
12486    0.40
12487}
12488fn default_blanket_pct() -> f64 {
12489    0.30
12490}
12491fn default_time_materials_pct() -> f64 {
12492    0.15
12493}
12494fn default_service_agreement_pct() -> f64 {
12495    0.15
12496}
12497
12498/// Catalog configuration.
12499#[derive(Debug, Clone, Serialize, Deserialize)]
12500pub struct CatalogConfig {
12501    /// Percentage of catalog items marked as preferred
12502    #[serde(default = "default_preferred_vendor_flag_rate")]
12503    pub preferred_vendor_flag_rate: f64,
12504    /// Rate of materials with multiple sources in catalog
12505    #[serde(default = "default_multi_source_rate")]
12506    pub multi_source_rate: f64,
12507}
12508
12509impl Default for CatalogConfig {
12510    fn default() -> Self {
12511        Self {
12512            preferred_vendor_flag_rate: default_preferred_vendor_flag_rate(),
12513            multi_source_rate: default_multi_source_rate(),
12514        }
12515    }
12516}
12517
12518fn default_preferred_vendor_flag_rate() -> f64 {
12519    0.70
12520}
12521fn default_multi_source_rate() -> f64 {
12522    0.25
12523}
12524
12525/// Scorecard configuration.
12526#[derive(Debug, Clone, Serialize, Deserialize)]
12527pub struct ScorecardConfig {
12528    /// Scorecard review frequency (quarterly, monthly)
12529    #[serde(default = "default_scorecard_frequency")]
12530    pub frequency: String,
12531    /// On-time delivery weight in overall score
12532    #[serde(default = "default_otd_weight")]
12533    pub on_time_delivery_weight: f64,
12534    /// Quality weight in overall score
12535    #[serde(default = "default_quality_score_weight")]
12536    pub quality_weight: f64,
12537    /// Price competitiveness weight
12538    #[serde(default = "default_price_score_weight")]
12539    pub price_weight: f64,
12540    /// Responsiveness weight
12541    #[serde(default = "default_responsiveness_weight")]
12542    pub responsiveness_weight: f64,
12543    /// Grade A threshold (score >= this)
12544    #[serde(default = "default_grade_a_threshold")]
12545    pub grade_a_threshold: f64,
12546    /// Grade B threshold
12547    #[serde(default = "default_grade_b_threshold")]
12548    pub grade_b_threshold: f64,
12549    /// Grade C threshold
12550    #[serde(default = "default_grade_c_threshold")]
12551    pub grade_c_threshold: f64,
12552}
12553
12554impl Default for ScorecardConfig {
12555    fn default() -> Self {
12556        Self {
12557            frequency: default_scorecard_frequency(),
12558            on_time_delivery_weight: default_otd_weight(),
12559            quality_weight: default_quality_score_weight(),
12560            price_weight: default_price_score_weight(),
12561            responsiveness_weight: default_responsiveness_weight(),
12562            grade_a_threshold: default_grade_a_threshold(),
12563            grade_b_threshold: default_grade_b_threshold(),
12564            grade_c_threshold: default_grade_c_threshold(),
12565        }
12566    }
12567}
12568
12569fn default_scorecard_frequency() -> String {
12570    "quarterly".to_string()
12571}
12572fn default_otd_weight() -> f64 {
12573    0.30
12574}
12575fn default_quality_score_weight() -> f64 {
12576    0.30
12577}
12578fn default_price_score_weight() -> f64 {
12579    0.25
12580}
12581fn default_responsiveness_weight() -> f64 {
12582    0.15
12583}
12584fn default_grade_a_threshold() -> f64 {
12585    90.0
12586}
12587fn default_grade_b_threshold() -> f64 {
12588    75.0
12589}
12590fn default_grade_c_threshold() -> f64 {
12591    60.0
12592}
12593
12594/// P2P integration settings for contract enforcement.
12595#[derive(Debug, Clone, Serialize, Deserialize)]
12596pub struct P2PIntegrationConfig {
12597    /// Rate of off-contract (maverick) purchases
12598    #[serde(default = "default_off_contract_rate")]
12599    pub off_contract_rate: f64,
12600    /// Price tolerance for contract price validation
12601    #[serde(default = "default_price_tolerance")]
12602    pub price_tolerance: f64,
12603    /// Whether to enforce catalog ordering
12604    #[serde(default)]
12605    pub catalog_enforcement: bool,
12606}
12607
12608impl Default for P2PIntegrationConfig {
12609    fn default() -> Self {
12610        Self {
12611            off_contract_rate: default_off_contract_rate(),
12612            price_tolerance: default_price_tolerance(),
12613            catalog_enforcement: false,
12614        }
12615    }
12616}
12617
12618fn default_off_contract_rate() -> f64 {
12619    0.15
12620}
12621fn default_price_tolerance() -> f64 {
12622    0.02
12623}
12624
12625// ----- Financial Reporting -----
12626
12627/// Financial reporting configuration.
12628#[derive(Debug, Clone, Serialize, Deserialize)]
12629pub struct FinancialReportingConfig {
12630    /// Enable financial reporting generation
12631    #[serde(default)]
12632    pub enabled: bool,
12633    /// Generate balance sheet
12634    #[serde(default = "default_true")]
12635    pub generate_balance_sheet: bool,
12636    /// Generate income statement
12637    #[serde(default = "default_true")]
12638    pub generate_income_statement: bool,
12639    /// Generate cash flow statement
12640    #[serde(default = "default_true")]
12641    pub generate_cash_flow: bool,
12642    /// Generate changes in equity statement
12643    #[serde(default = "default_true")]
12644    pub generate_changes_in_equity: bool,
12645    /// Number of comparative periods
12646    #[serde(default = "default_comparative_periods")]
12647    pub comparative_periods: u32,
12648    /// Management KPIs configuration
12649    #[serde(default)]
12650    pub management_kpis: ManagementKpisConfig,
12651    /// Budget configuration
12652    #[serde(default)]
12653    pub budgets: BudgetConfig,
12654    /// External-expectation (ISA-520 substantive-analytics) configuration
12655    #[serde(default, alias = "externalExpectations")]
12656    pub external_expectations: ExternalExpectationsConfig,
12657    /// Evidence-anchor (ISA-505 external-corroboration) configuration
12658    #[serde(default, alias = "evidenceAnchors")]
12659    pub evidence_anchors: EvidenceAnchorsConfig,
12660}
12661
12662impl Default for FinancialReportingConfig {
12663    fn default() -> Self {
12664        Self {
12665            enabled: false,
12666            generate_balance_sheet: true,
12667            generate_income_statement: true,
12668            generate_cash_flow: true,
12669            generate_changes_in_equity: true,
12670            comparative_periods: default_comparative_periods(),
12671            management_kpis: ManagementKpisConfig::default(),
12672            budgets: BudgetConfig::default(),
12673            external_expectations: ExternalExpectationsConfig::default(),
12674            evidence_anchors: EvidenceAnchorsConfig::default(),
12675        }
12676    }
12677}
12678
12679fn default_comparative_periods() -> u32 {
12680    1
12681}
12682
12683/// Management KPIs configuration.
12684#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12685pub struct ManagementKpisConfig {
12686    /// Enable KPI generation
12687    #[serde(default)]
12688    pub enabled: bool,
12689    /// KPI calculation frequency (monthly, quarterly)
12690    #[serde(default = "default_kpi_frequency")]
12691    pub frequency: String,
12692}
12693
12694fn default_kpi_frequency() -> String {
12695    "monthly".to_string()
12696}
12697
12698/// Budget configuration.
12699#[derive(Debug, Clone, Serialize, Deserialize)]
12700pub struct BudgetConfig {
12701    /// Enable budget generation
12702    #[serde(default)]
12703    pub enabled: bool,
12704    /// Expected revenue growth rate for budgeting
12705    #[serde(default = "default_revenue_growth_rate")]
12706    pub revenue_growth_rate: f64,
12707    /// Expected expense inflation rate
12708    #[serde(default = "default_expense_inflation_rate")]
12709    pub expense_inflation_rate: f64,
12710    /// Random noise to add to budget vs actual
12711    #[serde(default = "default_variance_noise")]
12712    pub variance_noise: f64,
12713}
12714
12715impl Default for BudgetConfig {
12716    fn default() -> Self {
12717        Self {
12718            enabled: false,
12719            revenue_growth_rate: default_revenue_growth_rate(),
12720            expense_inflation_rate: default_expense_inflation_rate(),
12721            variance_noise: default_variance_noise(),
12722        }
12723    }
12724}
12725
12726fn default_revenue_growth_rate() -> f64 {
12727    0.05
12728}
12729fn default_expense_inflation_rate() -> f64 {
12730    0.03
12731}
12732fn default_variance_noise() -> f64 {
12733    0.10
12734}
12735
12736/// External-expectation (ISA-520 substantive-analytics) configuration.
12737///
12738/// When enabled, the engine emits, per material GL account, an expected period total derived from an
12739/// exogenous driver (prior-year / market / macro / budget) plus a materiality tolerance band, with the
12740/// realized deviation and the ground-truth fraud contribution. This is the Phase-2 substantive-
12741/// analytics layer — the engine-side counterpart to the perfect-crime countermeasure (see
12742/// `docs/phase2-ledger-evidence-assurance.md`).
12743#[derive(Debug, Clone, Serialize, Deserialize)]
12744pub struct ExternalExpectationsConfig {
12745    /// Enable external-expectation generation.
12746    #[serde(default)]
12747    pub enabled: bool,
12748    /// Exogenous driver the expectation is built on.
12749    #[serde(default, alias = "primaryDriver")]
12750    pub driver: ExpectationDriver,
12751    /// Materiality tolerance band as a fraction of the expectation (the ISA-520 investigate threshold).
12752    #[serde(default = "default_expectation_tolerance_pct", alias = "tolerancePct")]
12753    pub tolerance_pct: f64,
12754    /// Forecast-error std (fraction) of the auditor's expectation around the legitimate level —
12755    /// models imperfect expectations, producing realistic false positives on volatile accounts.
12756    #[serde(default = "default_forecast_noise", alias = "forecastNoise")]
12757    pub forecast_noise: f64,
12758    /// Expected period-over-period growth used to frame the driver (e.g. prior-year × (1 + growth)).
12759    #[serde(default = "default_expectation_growth_rate", alias = "growthRate")]
12760    pub growth_rate: f64,
12761    /// Only accounts whose legitimate share of total activity is at least this fraction are scored —
12762    /// substantive analytics targets material balances.
12763    #[serde(
12764        default = "default_min_materiality_share",
12765        alias = "minMaterialityShare"
12766    )]
12767    pub min_materiality_share: f64,
12768}
12769
12770impl Default for ExternalExpectationsConfig {
12771    fn default() -> Self {
12772        Self {
12773            enabled: false,
12774            driver: ExpectationDriver::default(),
12775            tolerance_pct: default_expectation_tolerance_pct(),
12776            forecast_noise: default_forecast_noise(),
12777            growth_rate: default_expectation_growth_rate(),
12778            min_materiality_share: default_min_materiality_share(),
12779        }
12780    }
12781}
12782
12783fn default_expectation_tolerance_pct() -> f64 {
12784    0.10
12785}
12786fn default_forecast_noise() -> f64 {
12787    0.05
12788}
12789fn default_expectation_growth_rate() -> f64 {
12790    0.05
12791}
12792fn default_min_materiality_share() -> f64 {
12793    0.005
12794}
12795
12796/// Evidence-anchor (ISA-505 external-corroboration) configuration.
12797///
12798/// When enabled, the engine emits, per material GL account, whether the account's activity is
12799/// corroborated by evidence exogenous to the ledger; a material, uncorroborated account is a
12800/// **dangling node** — the ISA-505 existence/occurrence lead. Genuine accounts are corroborated at
12801/// `corroboration_rate`; fraud-linked accounts are corroborated only at `fabrication_evade_rate`
12802/// (the adversary who forged external evidence — the expensive "perfect audit crime"). Phase-2
12803/// evidence layer (see `docs/phase2-ledger-evidence-assurance.md`).
12804#[derive(Debug, Clone, Serialize, Deserialize)]
12805pub struct EvidenceAnchorsConfig {
12806    /// Enable evidence-anchor generation.
12807    #[serde(default)]
12808    pub enabled: bool,
12809    /// Only accounts whose share of total activity is at least this fraction are scored.
12810    #[serde(
12811        default = "default_min_materiality_share",
12812        alias = "minMaterialityShare"
12813    )]
12814    pub min_materiality_share: f64,
12815    /// Rate at which a genuine account's activity is externally corroborated (1 − this = false-positive
12816    /// dangling rate on clean accounts, modelling unconfirmed-but-legitimate balances).
12817    #[serde(default = "default_corroboration_rate", alias = "corroborationRate")]
12818    pub corroboration_rate: f64,
12819    /// Rate at which a fraud-linked account is nonetheless corroborated — the adversary who forged the
12820    /// external evidence (a false negative; the expensive, fragile perfect-audit-crime, `prop:counter`).
12821    #[serde(
12822        default = "default_fabrication_evade_rate",
12823        alias = "fabricationEvadeRate"
12824    )]
12825    pub fabrication_evade_rate: f64,
12826}
12827
12828impl Default for EvidenceAnchorsConfig {
12829    fn default() -> Self {
12830        Self {
12831            enabled: false,
12832            min_materiality_share: default_min_materiality_share(),
12833            corroboration_rate: default_corroboration_rate(),
12834            fabrication_evade_rate: default_fabrication_evade_rate(),
12835        }
12836    }
12837}
12838
12839fn default_corroboration_rate() -> f64 {
12840    0.92
12841}
12842fn default_fabrication_evade_rate() -> f64 {
12843    0.10
12844}
12845
12846// ----- HR Configuration -----
12847
12848/// HR (Hire-to-Retire) process configuration.
12849#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12850pub struct HrConfig {
12851    /// Enable HR generation
12852    #[serde(default)]
12853    pub enabled: bool,
12854    /// Payroll configuration
12855    #[serde(default)]
12856    pub payroll: PayrollConfig,
12857    /// Time and attendance configuration
12858    #[serde(default)]
12859    pub time_attendance: TimeAttendanceConfig,
12860    /// Expense management configuration
12861    #[serde(default)]
12862    pub expenses: ExpenseConfig,
12863}
12864
12865/// Payroll configuration.
12866#[derive(Debug, Clone, Serialize, Deserialize)]
12867pub struct PayrollConfig {
12868    /// Enable payroll generation
12869    #[serde(default = "default_true")]
12870    pub enabled: bool,
12871    /// Pay frequency (monthly, biweekly, weekly)
12872    #[serde(default = "default_pay_frequency")]
12873    pub pay_frequency: String,
12874    /// Salary ranges by job level
12875    #[serde(default)]
12876    pub salary_ranges: PayrollSalaryRanges,
12877    /// Effective tax rates
12878    #[serde(default)]
12879    pub tax_rates: PayrollTaxRates,
12880    /// Benefits enrollment rate
12881    #[serde(default = "default_benefits_enrollment_rate")]
12882    pub benefits_enrollment_rate: f64,
12883    /// Retirement plan participation rate
12884    #[serde(default = "default_retirement_participation_rate")]
12885    pub retirement_participation_rate: f64,
12886}
12887
12888impl Default for PayrollConfig {
12889    fn default() -> Self {
12890        Self {
12891            enabled: true,
12892            pay_frequency: default_pay_frequency(),
12893            salary_ranges: PayrollSalaryRanges::default(),
12894            tax_rates: PayrollTaxRates::default(),
12895            benefits_enrollment_rate: default_benefits_enrollment_rate(),
12896            retirement_participation_rate: default_retirement_participation_rate(),
12897        }
12898    }
12899}
12900
12901fn default_pay_frequency() -> String {
12902    "monthly".to_string()
12903}
12904fn default_benefits_enrollment_rate() -> f64 {
12905    0.60
12906}
12907fn default_retirement_participation_rate() -> f64 {
12908    0.45
12909}
12910
12911/// Salary ranges by job level.
12912#[derive(Debug, Clone, Serialize, Deserialize)]
12913pub struct PayrollSalaryRanges {
12914    /// Staff level min/max
12915    #[serde(default = "default_staff_min")]
12916    pub staff_min: f64,
12917    #[serde(default = "default_staff_max")]
12918    pub staff_max: f64,
12919    /// Manager level min/max
12920    #[serde(default = "default_manager_min")]
12921    pub manager_min: f64,
12922    #[serde(default = "default_manager_max")]
12923    pub manager_max: f64,
12924    /// Director level min/max
12925    #[serde(default = "default_director_min")]
12926    pub director_min: f64,
12927    #[serde(default = "default_director_max")]
12928    pub director_max: f64,
12929    /// Executive level min/max
12930    #[serde(default = "default_executive_min")]
12931    pub executive_min: f64,
12932    #[serde(default = "default_executive_max")]
12933    pub executive_max: f64,
12934}
12935
12936impl Default for PayrollSalaryRanges {
12937    fn default() -> Self {
12938        Self {
12939            staff_min: default_staff_min(),
12940            staff_max: default_staff_max(),
12941            manager_min: default_manager_min(),
12942            manager_max: default_manager_max(),
12943            director_min: default_director_min(),
12944            director_max: default_director_max(),
12945            executive_min: default_executive_min(),
12946            executive_max: default_executive_max(),
12947        }
12948    }
12949}
12950
12951fn default_staff_min() -> f64 {
12952    50_000.0
12953}
12954fn default_staff_max() -> f64 {
12955    70_000.0
12956}
12957fn default_manager_min() -> f64 {
12958    80_000.0
12959}
12960fn default_manager_max() -> f64 {
12961    120_000.0
12962}
12963fn default_director_min() -> f64 {
12964    120_000.0
12965}
12966fn default_director_max() -> f64 {
12967    180_000.0
12968}
12969fn default_executive_min() -> f64 {
12970    180_000.0
12971}
12972fn default_executive_max() -> f64 {
12973    350_000.0
12974}
12975
12976/// Effective tax rates for payroll.
12977#[derive(Debug, Clone, Serialize, Deserialize)]
12978pub struct PayrollTaxRates {
12979    /// Federal effective tax rate
12980    #[serde(default = "default_federal_rate")]
12981    pub federal_effective: f64,
12982    /// State effective tax rate
12983    #[serde(default = "default_state_rate")]
12984    pub state_effective: f64,
12985    /// FICA/social security rate
12986    #[serde(default = "default_fica_rate")]
12987    pub fica: f64,
12988}
12989
12990impl Default for PayrollTaxRates {
12991    fn default() -> Self {
12992        Self {
12993            federal_effective: default_federal_rate(),
12994            state_effective: default_state_rate(),
12995            fica: default_fica_rate(),
12996        }
12997    }
12998}
12999
13000fn default_federal_rate() -> f64 {
13001    0.22
13002}
13003fn default_state_rate() -> f64 {
13004    0.05
13005}
13006fn default_fica_rate() -> f64 {
13007    0.0765
13008}
13009
13010/// Time and attendance configuration.
13011#[derive(Debug, Clone, Serialize, Deserialize)]
13012pub struct TimeAttendanceConfig {
13013    /// Enable time tracking
13014    #[serde(default = "default_true")]
13015    pub enabled: bool,
13016    /// Overtime rate (% of employees with overtime in a period)
13017    #[serde(default = "default_overtime_rate")]
13018    pub overtime_rate: f64,
13019}
13020
13021impl Default for TimeAttendanceConfig {
13022    fn default() -> Self {
13023        Self {
13024            enabled: true,
13025            overtime_rate: default_overtime_rate(),
13026        }
13027    }
13028}
13029
13030fn default_overtime_rate() -> f64 {
13031    0.10
13032}
13033
13034/// Expense management configuration.
13035#[derive(Debug, Clone, Serialize, Deserialize)]
13036pub struct ExpenseConfig {
13037    /// Enable expense report generation
13038    #[serde(default = "default_true")]
13039    pub enabled: bool,
13040    /// Rate of employees submitting expenses per month
13041    #[serde(default = "default_expense_submission_rate")]
13042    pub submission_rate: f64,
13043    /// Rate of policy violations
13044    #[serde(default = "default_policy_violation_rate")]
13045    pub policy_violation_rate: f64,
13046}
13047
13048impl Default for ExpenseConfig {
13049    fn default() -> Self {
13050        Self {
13051            enabled: true,
13052            submission_rate: default_expense_submission_rate(),
13053            policy_violation_rate: default_policy_violation_rate(),
13054        }
13055    }
13056}
13057
13058fn default_expense_submission_rate() -> f64 {
13059    0.30
13060}
13061fn default_policy_violation_rate() -> f64 {
13062    0.08
13063}
13064
13065// ----- Manufacturing Configuration -----
13066
13067/// Manufacturing process configuration (production orders, WIP, routing).
13068#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13069pub struct ManufacturingProcessConfig {
13070    /// Enable manufacturing generation
13071    #[serde(default)]
13072    pub enabled: bool,
13073    /// Production order configuration
13074    #[serde(default)]
13075    pub production_orders: ProductionOrderConfig,
13076    /// Costing configuration
13077    #[serde(default)]
13078    pub costing: ManufacturingCostingConfig,
13079    /// Routing configuration
13080    #[serde(default)]
13081    pub routing: RoutingConfig,
13082}
13083
13084/// Production order configuration.
13085#[derive(Debug, Clone, Serialize, Deserialize)]
13086pub struct ProductionOrderConfig {
13087    /// Orders per month
13088    #[serde(default = "default_prod_orders_per_month")]
13089    pub orders_per_month: u32,
13090    /// Average batch size
13091    #[serde(default = "default_prod_avg_batch_size")]
13092    pub avg_batch_size: u32,
13093    /// Yield rate
13094    #[serde(default = "default_prod_yield_rate")]
13095    pub yield_rate: f64,
13096    /// Make-to-order rate (vs make-to-stock)
13097    #[serde(default = "default_prod_make_to_order_rate")]
13098    pub make_to_order_rate: f64,
13099    /// Rework rate
13100    #[serde(default = "default_prod_rework_rate")]
13101    pub rework_rate: f64,
13102}
13103
13104impl Default for ProductionOrderConfig {
13105    fn default() -> Self {
13106        Self {
13107            orders_per_month: default_prod_orders_per_month(),
13108            avg_batch_size: default_prod_avg_batch_size(),
13109            yield_rate: default_prod_yield_rate(),
13110            make_to_order_rate: default_prod_make_to_order_rate(),
13111            rework_rate: default_prod_rework_rate(),
13112        }
13113    }
13114}
13115
13116fn default_prod_orders_per_month() -> u32 {
13117    50
13118}
13119fn default_prod_avg_batch_size() -> u32 {
13120    100
13121}
13122fn default_prod_yield_rate() -> f64 {
13123    0.97
13124}
13125fn default_prod_make_to_order_rate() -> f64 {
13126    0.20
13127}
13128fn default_prod_rework_rate() -> f64 {
13129    0.03
13130}
13131
13132/// Manufacturing costing configuration.
13133#[derive(Debug, Clone, Serialize, Deserialize)]
13134pub struct ManufacturingCostingConfig {
13135    /// Labor rate per hour
13136    #[serde(default = "default_labor_rate")]
13137    pub labor_rate_per_hour: f64,
13138    /// Overhead application rate (multiplier on direct labor)
13139    #[serde(default = "default_overhead_rate")]
13140    pub overhead_rate: f64,
13141    /// Standard cost update frequency
13142    #[serde(default = "default_cost_update_frequency")]
13143    pub standard_cost_update_frequency: String,
13144}
13145
13146impl Default for ManufacturingCostingConfig {
13147    fn default() -> Self {
13148        Self {
13149            labor_rate_per_hour: default_labor_rate(),
13150            overhead_rate: default_overhead_rate(),
13151            standard_cost_update_frequency: default_cost_update_frequency(),
13152        }
13153    }
13154}
13155
13156fn default_labor_rate() -> f64 {
13157    35.0
13158}
13159fn default_overhead_rate() -> f64 {
13160    1.50
13161}
13162fn default_cost_update_frequency() -> String {
13163    "quarterly".to_string()
13164}
13165
13166/// Routing configuration for production operations.
13167#[derive(Debug, Clone, Serialize, Deserialize)]
13168pub struct RoutingConfig {
13169    /// Average number of operations per routing
13170    #[serde(default = "default_avg_operations")]
13171    pub avg_operations: u32,
13172    /// Average setup time in hours
13173    #[serde(default = "default_setup_time")]
13174    pub setup_time_hours: f64,
13175    /// Run time variation coefficient
13176    #[serde(default = "default_run_time_variation")]
13177    pub run_time_variation: f64,
13178}
13179
13180impl Default for RoutingConfig {
13181    fn default() -> Self {
13182        Self {
13183            avg_operations: default_avg_operations(),
13184            setup_time_hours: default_setup_time(),
13185            run_time_variation: default_run_time_variation(),
13186        }
13187    }
13188}
13189
13190fn default_avg_operations() -> u32 {
13191    4
13192}
13193fn default_setup_time() -> f64 {
13194    1.5
13195}
13196fn default_run_time_variation() -> f64 {
13197    0.15
13198}
13199
13200// ----- Sales Quote Configuration -----
13201
13202/// Sales quote (quote-to-order) pipeline configuration.
13203#[derive(Debug, Clone, Serialize, Deserialize)]
13204pub struct SalesQuoteConfig {
13205    /// Enable sales quote generation
13206    #[serde(default)]
13207    pub enabled: bool,
13208    /// Quotes per month
13209    #[serde(default = "default_quotes_per_month")]
13210    pub quotes_per_month: u32,
13211    /// Win rate (fraction of quotes that convert to orders)
13212    #[serde(default = "default_quote_win_rate")]
13213    pub win_rate: f64,
13214    /// Average quote validity in days
13215    #[serde(default = "default_quote_validity_days")]
13216    pub validity_days: u32,
13217}
13218
13219impl Default for SalesQuoteConfig {
13220    fn default() -> Self {
13221        Self {
13222            enabled: false,
13223            quotes_per_month: default_quotes_per_month(),
13224            win_rate: default_quote_win_rate(),
13225            validity_days: default_quote_validity_days(),
13226        }
13227    }
13228}
13229
13230fn default_quotes_per_month() -> u32 {
13231    30
13232}
13233fn default_quote_win_rate() -> f64 {
13234    0.35
13235}
13236fn default_quote_validity_days() -> u32 {
13237    30
13238}
13239
13240// =============================================================================
13241// Tax Accounting Configuration
13242// =============================================================================
13243
13244/// Tax accounting configuration.
13245///
13246/// Controls generation of tax-related data including VAT/GST, sales tax,
13247/// withholding tax, tax provisions, and payroll tax across multiple jurisdictions.
13248#[derive(Debug, Clone, Serialize, Deserialize)]
13249pub struct TaxConfig {
13250    /// Whether tax generation is enabled.
13251    #[serde(default)]
13252    pub enabled: bool,
13253    /// Tax jurisdiction configuration.
13254    #[serde(default)]
13255    pub jurisdictions: TaxJurisdictionConfig,
13256    /// VAT/GST configuration.
13257    #[serde(default)]
13258    pub vat_gst: VatGstConfig,
13259    /// Sales tax configuration.
13260    #[serde(default)]
13261    pub sales_tax: SalesTaxConfig,
13262    /// Withholding tax configuration.
13263    #[serde(default)]
13264    pub withholding: WithholdingTaxSchemaConfig,
13265    /// Tax provision configuration.
13266    #[serde(default)]
13267    pub provisions: TaxProvisionSchemaConfig,
13268    /// Payroll tax configuration.
13269    #[serde(default)]
13270    pub payroll_tax: PayrollTaxSchemaConfig,
13271    /// Anomaly injection rate for tax data (0.0 to 1.0).
13272    #[serde(default = "default_tax_anomaly_rate")]
13273    pub anomaly_rate: f64,
13274}
13275
13276fn default_tax_anomaly_rate() -> f64 {
13277    0.03
13278}
13279
13280impl Default for TaxConfig {
13281    fn default() -> Self {
13282        Self {
13283            enabled: false,
13284            jurisdictions: TaxJurisdictionConfig::default(),
13285            vat_gst: VatGstConfig::default(),
13286            sales_tax: SalesTaxConfig::default(),
13287            withholding: WithholdingTaxSchemaConfig::default(),
13288            provisions: TaxProvisionSchemaConfig::default(),
13289            payroll_tax: PayrollTaxSchemaConfig::default(),
13290            anomaly_rate: default_tax_anomaly_rate(),
13291        }
13292    }
13293}
13294
13295/// Tax jurisdiction configuration.
13296///
13297/// Specifies which countries and subnational jurisdictions to include
13298/// when generating tax data.
13299#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13300pub struct TaxJurisdictionConfig {
13301    /// List of country codes to include (e.g., ["US", "DE", "GB"]).
13302    #[serde(default)]
13303    pub countries: Vec<String>,
13304    /// Whether to include subnational jurisdictions (e.g., US states, Canadian provinces).
13305    #[serde(default)]
13306    pub include_subnational: bool,
13307}
13308
13309/// VAT/GST configuration.
13310///
13311/// Controls generation of Value Added Tax / Goods and Services Tax data,
13312/// including standard and reduced rates, exempt categories, and reverse charge.
13313#[derive(Debug, Clone, Serialize, Deserialize)]
13314pub struct VatGstConfig {
13315    /// Whether VAT/GST generation is enabled.
13316    #[serde(default)]
13317    pub enabled: bool,
13318    /// Standard VAT/GST rates by country code (e.g., {"DE": 0.19, "GB": 0.20}).
13319    #[serde(default)]
13320    pub standard_rates: std::collections::HashMap<String, f64>,
13321    /// Reduced VAT/GST rates by country code (e.g., {"DE": 0.07, "GB": 0.05}).
13322    #[serde(default)]
13323    pub reduced_rates: std::collections::HashMap<String, f64>,
13324    /// Categories exempt from VAT/GST (e.g., ["financial_services", "healthcare"]).
13325    #[serde(default)]
13326    pub exempt_categories: Vec<String>,
13327    /// Whether to apply reverse charge mechanism for cross-border B2B transactions.
13328    #[serde(default = "default_true")]
13329    pub reverse_charge: bool,
13330}
13331
13332impl Default for VatGstConfig {
13333    fn default() -> Self {
13334        Self {
13335            enabled: false,
13336            standard_rates: std::collections::HashMap::new(),
13337            reduced_rates: std::collections::HashMap::new(),
13338            exempt_categories: Vec::new(),
13339            reverse_charge: true,
13340        }
13341    }
13342}
13343
13344/// Sales tax configuration.
13345///
13346/// Controls generation of US-style sales tax data including nexus determination.
13347#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13348pub struct SalesTaxConfig {
13349    /// Whether sales tax generation is enabled.
13350    #[serde(default)]
13351    pub enabled: bool,
13352    /// US states where the company has nexus (e.g., ["CA", "NY", "TX"]).
13353    #[serde(default)]
13354    pub nexus_states: Vec<String>,
13355}
13356
13357/// Withholding tax configuration.
13358///
13359/// Controls generation of withholding tax data for cross-border payments,
13360/// including treaty network and rate overrides.
13361#[derive(Debug, Clone, Serialize, Deserialize)]
13362pub struct WithholdingTaxSchemaConfig {
13363    /// Whether withholding tax generation is enabled.
13364    #[serde(default)]
13365    pub enabled: bool,
13366    /// Whether to simulate a treaty network with reduced rates.
13367    #[serde(default = "default_true")]
13368    pub treaty_network: bool,
13369    /// Default withholding tax rate for non-treaty countries (0.0 to 1.0).
13370    #[serde(default = "default_withholding_rate")]
13371    pub default_rate: f64,
13372    /// Reduced withholding tax rate for treaty countries (0.0 to 1.0).
13373    #[serde(default = "default_treaty_reduced_rate")]
13374    pub treaty_reduced_rate: f64,
13375}
13376
13377fn default_withholding_rate() -> f64 {
13378    0.30
13379}
13380
13381fn default_treaty_reduced_rate() -> f64 {
13382    0.15
13383}
13384
13385impl Default for WithholdingTaxSchemaConfig {
13386    fn default() -> Self {
13387        Self {
13388            enabled: false,
13389            treaty_network: true,
13390            default_rate: default_withholding_rate(),
13391            treaty_reduced_rate: default_treaty_reduced_rate(),
13392        }
13393    }
13394}
13395
13396/// Tax provision configuration.
13397///
13398/// Controls generation of tax provision data including statutory rates
13399/// and uncertain tax positions (ASC 740 / IAS 12).
13400#[derive(Debug, Clone, Serialize, Deserialize)]
13401pub struct TaxProvisionSchemaConfig {
13402    /// Whether tax provision generation is enabled.
13403    /// Defaults to true when tax is enabled, as provisions are typically required.
13404    #[serde(default = "default_true")]
13405    pub enabled: bool,
13406    /// Statutory corporate tax rate (0.0 to 1.0).
13407    #[serde(default = "default_statutory_rate")]
13408    pub statutory_rate: f64,
13409    /// Whether to generate uncertain tax positions (FIN 48 / IFRIC 23).
13410    #[serde(default = "default_true")]
13411    pub uncertain_positions: bool,
13412}
13413
13414fn default_statutory_rate() -> f64 {
13415    0.21
13416}
13417
13418impl Default for TaxProvisionSchemaConfig {
13419    fn default() -> Self {
13420        Self {
13421            enabled: true,
13422            statutory_rate: default_statutory_rate(),
13423            uncertain_positions: true,
13424        }
13425    }
13426}
13427
13428/// Payroll tax configuration.
13429///
13430/// Controls generation of payroll tax data (employer/employee contributions,
13431/// social security, Medicare, etc.).
13432#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13433pub struct PayrollTaxSchemaConfig {
13434    /// Whether payroll tax generation is enabled.
13435    #[serde(default)]
13436    pub enabled: bool,
13437}
13438
13439// ---------------------------------------------------------------------------
13440// Treasury & Cash Management Configuration
13441// ---------------------------------------------------------------------------
13442
13443/// Treasury and cash management configuration.
13444///
13445/// Controls generation of cash positions, forecasts, pooling, hedging
13446/// instruments (ASC 815 / IFRS 9), debt instruments with covenants,
13447/// bank guarantees, and intercompany netting runs.
13448#[derive(Debug, Clone, Serialize, Deserialize)]
13449pub struct TreasuryConfig {
13450    /// Whether treasury generation is enabled.
13451    #[serde(default)]
13452    pub enabled: bool,
13453    /// Cash positioning configuration.
13454    #[serde(default)]
13455    pub cash_positioning: CashPositioningConfig,
13456    /// Cash forecasting configuration.
13457    #[serde(default)]
13458    pub cash_forecasting: CashForecastingConfig,
13459    /// Cash pooling configuration.
13460    #[serde(default)]
13461    pub cash_pooling: CashPoolingConfig,
13462    /// Hedging configuration (FX forwards, IR swaps, etc.).
13463    #[serde(default)]
13464    pub hedging: HedgingSchemaConfig,
13465    /// Debt instrument and covenant configuration.
13466    #[serde(default)]
13467    pub debt: DebtSchemaConfig,
13468    /// Intercompany netting configuration.
13469    #[serde(default)]
13470    pub netting: NettingSchemaConfig,
13471    /// Bank guarantee / letter of credit configuration.
13472    #[serde(default)]
13473    pub bank_guarantees: BankGuaranteeSchemaConfig,
13474    /// Anomaly injection rate for treasury data (0.0 to 1.0).
13475    #[serde(default = "default_treasury_anomaly_rate")]
13476    pub anomaly_rate: f64,
13477}
13478
13479fn default_treasury_anomaly_rate() -> f64 {
13480    0.02
13481}
13482
13483impl Default for TreasuryConfig {
13484    fn default() -> Self {
13485        Self {
13486            enabled: false,
13487            cash_positioning: CashPositioningConfig::default(),
13488            cash_forecasting: CashForecastingConfig::default(),
13489            cash_pooling: CashPoolingConfig::default(),
13490            hedging: HedgingSchemaConfig::default(),
13491            debt: DebtSchemaConfig::default(),
13492            netting: NettingSchemaConfig::default(),
13493            bank_guarantees: BankGuaranteeSchemaConfig::default(),
13494            anomaly_rate: default_treasury_anomaly_rate(),
13495        }
13496    }
13497}
13498
13499/// Cash positioning configuration.
13500///
13501/// Controls daily cash position generation per entity/bank account.
13502#[derive(Debug, Clone, Serialize, Deserialize)]
13503pub struct CashPositioningConfig {
13504    /// Whether cash positioning is enabled.
13505    #[serde(default = "default_true")]
13506    pub enabled: bool,
13507    /// Position generation frequency.
13508    #[serde(default = "default_cash_frequency")]
13509    pub frequency: String,
13510    /// Minimum cash balance policy threshold.
13511    #[serde(default = "default_minimum_balance_policy")]
13512    pub minimum_balance_policy: f64,
13513}
13514
13515fn default_cash_frequency() -> String {
13516    "daily".to_string()
13517}
13518
13519fn default_minimum_balance_policy() -> f64 {
13520    100_000.0
13521}
13522
13523impl Default for CashPositioningConfig {
13524    fn default() -> Self {
13525        Self {
13526            enabled: true,
13527            frequency: default_cash_frequency(),
13528            minimum_balance_policy: default_minimum_balance_policy(),
13529        }
13530    }
13531}
13532
13533/// Cash forecasting configuration.
13534///
13535/// Controls forward-looking cash forecast generation with probability-weighted items.
13536#[derive(Debug, Clone, Serialize, Deserialize)]
13537pub struct CashForecastingConfig {
13538    /// Whether cash forecasting is enabled.
13539    #[serde(default = "default_true")]
13540    pub enabled: bool,
13541    /// Number of days to forecast into the future.
13542    #[serde(default = "default_horizon_days")]
13543    pub horizon_days: u32,
13544    /// AR collection probability curve type ("aging" or "flat").
13545    #[serde(default = "default_ar_probability_curve")]
13546    pub ar_collection_probability_curve: String,
13547    /// Confidence interval for the forecast (0.0 to 1.0).
13548    #[serde(default = "default_confidence_interval")]
13549    pub confidence_interval: f64,
13550}
13551
13552fn default_horizon_days() -> u32 {
13553    90
13554}
13555
13556fn default_ar_probability_curve() -> String {
13557    "aging".to_string()
13558}
13559
13560fn default_confidence_interval() -> f64 {
13561    0.90
13562}
13563
13564impl Default for CashForecastingConfig {
13565    fn default() -> Self {
13566        Self {
13567            enabled: true,
13568            horizon_days: default_horizon_days(),
13569            ar_collection_probability_curve: default_ar_probability_curve(),
13570            confidence_interval: default_confidence_interval(),
13571        }
13572    }
13573}
13574
13575/// Cash pooling configuration.
13576///
13577/// Controls cash pool structure generation (physical, notional, zero-balancing).
13578#[derive(Debug, Clone, Serialize, Deserialize)]
13579pub struct CashPoolingConfig {
13580    /// Whether cash pooling is enabled.
13581    #[serde(default)]
13582    pub enabled: bool,
13583    /// Pool type: "physical_pooling", "notional_pooling", or "zero_balancing".
13584    #[serde(default = "default_pool_type")]
13585    pub pool_type: String,
13586    /// Time of day when sweeps occur (HH:MM format).
13587    #[serde(default = "default_sweep_time")]
13588    pub sweep_time: String,
13589}
13590
13591fn default_pool_type() -> String {
13592    "zero_balancing".to_string()
13593}
13594
13595fn default_sweep_time() -> String {
13596    "16:00".to_string()
13597}
13598
13599impl Default for CashPoolingConfig {
13600    fn default() -> Self {
13601        Self {
13602            enabled: false,
13603            pool_type: default_pool_type(),
13604            sweep_time: default_sweep_time(),
13605        }
13606    }
13607}
13608
13609/// Hedging configuration.
13610///
13611/// Controls generation of hedging instruments and hedge relationship designations
13612/// under ASC 815 / IFRS 9.
13613#[derive(Debug, Clone, Serialize, Deserialize)]
13614pub struct HedgingSchemaConfig {
13615    /// Whether hedging generation is enabled.
13616    #[serde(default)]
13617    pub enabled: bool,
13618    /// Target hedge ratio (0.0 to 1.0). Proportion of FX exposure to hedge.
13619    #[serde(default = "default_hedge_ratio")]
13620    pub hedge_ratio: f64,
13621    /// Types of instruments to generate (e.g., ["fx_forward", "interest_rate_swap"]).
13622    #[serde(default = "default_hedge_instruments")]
13623    pub instruments: Vec<String>,
13624    /// Whether to designate formal hedge accounting relationships.
13625    #[serde(default = "default_true")]
13626    pub hedge_accounting: bool,
13627    /// Effectiveness testing method: "dollar_offset", "regression", or "critical_terms".
13628    #[serde(default = "default_effectiveness_method")]
13629    pub effectiveness_method: String,
13630}
13631
13632fn default_hedge_ratio() -> f64 {
13633    0.75
13634}
13635
13636fn default_hedge_instruments() -> Vec<String> {
13637    vec!["fx_forward".to_string(), "interest_rate_swap".to_string()]
13638}
13639
13640fn default_effectiveness_method() -> String {
13641    "regression".to_string()
13642}
13643
13644impl Default for HedgingSchemaConfig {
13645    fn default() -> Self {
13646        Self {
13647            enabled: false,
13648            hedge_ratio: default_hedge_ratio(),
13649            instruments: default_hedge_instruments(),
13650            hedge_accounting: true,
13651            effectiveness_method: default_effectiveness_method(),
13652        }
13653    }
13654}
13655
13656/// Debt instrument configuration.
13657///
13658/// Controls generation of debt instruments (term loans, revolving credit, bonds)
13659/// with amortization schedules and financial covenants.
13660#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13661pub struct DebtSchemaConfig {
13662    /// Whether debt instrument generation is enabled.
13663    #[serde(default)]
13664    pub enabled: bool,
13665    /// Debt instrument definitions.
13666    #[serde(default)]
13667    pub instruments: Vec<DebtInstrumentDef>,
13668    /// Covenant definitions.
13669    #[serde(default)]
13670    pub covenants: Vec<CovenantDef>,
13671}
13672
13673/// Definition of a debt instrument in configuration.
13674#[derive(Debug, Clone, Serialize, Deserialize)]
13675pub struct DebtInstrumentDef {
13676    /// Instrument type: "term_loan", "revolving_credit", "bond", "commercial_paper", "bridge_loan".
13677    #[serde(rename = "type")]
13678    pub instrument_type: String,
13679    /// Principal amount (for term loans, bonds).
13680    #[serde(default)]
13681    pub principal: Option<f64>,
13682    /// Interest rate (annual, as decimal fraction).
13683    #[serde(default)]
13684    pub rate: Option<f64>,
13685    /// Maturity in months.
13686    #[serde(default)]
13687    pub maturity_months: Option<u32>,
13688    /// Facility limit (for revolving credit).
13689    #[serde(default)]
13690    pub facility: Option<f64>,
13691}
13692
13693/// Definition of a debt covenant in configuration.
13694#[derive(Debug, Clone, Serialize, Deserialize)]
13695pub struct CovenantDef {
13696    /// Covenant type: "debt_to_equity", "interest_coverage", "current_ratio",
13697    /// "net_worth", "debt_to_ebitda", "fixed_charge_coverage".
13698    #[serde(rename = "type")]
13699    pub covenant_type: String,
13700    /// Covenant threshold value.
13701    pub threshold: f64,
13702}
13703
13704/// Intercompany netting configuration.
13705///
13706/// Controls generation of multilateral netting runs.
13707#[derive(Debug, Clone, Serialize, Deserialize)]
13708pub struct NettingSchemaConfig {
13709    /// Whether netting generation is enabled.
13710    #[serde(default)]
13711    pub enabled: bool,
13712    /// Netting cycle: "daily", "weekly", or "monthly".
13713    #[serde(default = "default_netting_cycle")]
13714    pub cycle: String,
13715}
13716
13717fn default_netting_cycle() -> String {
13718    "monthly".to_string()
13719}
13720
13721impl Default for NettingSchemaConfig {
13722    fn default() -> Self {
13723        Self {
13724            enabled: false,
13725            cycle: default_netting_cycle(),
13726        }
13727    }
13728}
13729
13730/// Bank guarantee and letter of credit configuration.
13731///
13732/// Controls generation of bank guarantees, standby LCs, and performance bonds.
13733#[derive(Debug, Clone, Serialize, Deserialize)]
13734pub struct BankGuaranteeSchemaConfig {
13735    /// Whether bank guarantee generation is enabled.
13736    #[serde(default)]
13737    pub enabled: bool,
13738    /// Number of guarantees to generate.
13739    #[serde(default = "default_guarantee_count")]
13740    pub count: u32,
13741}
13742
13743fn default_guarantee_count() -> u32 {
13744    5
13745}
13746
13747impl Default for BankGuaranteeSchemaConfig {
13748    fn default() -> Self {
13749        Self {
13750            enabled: false,
13751            count: default_guarantee_count(),
13752        }
13753    }
13754}
13755
13756// ===========================================================================
13757// Project Accounting Configuration
13758// ===========================================================================
13759
13760/// Project accounting configuration.
13761///
13762/// Controls generation of project cost lines, revenue recognition,
13763/// milestones, change orders, retainage, and earned value metrics.
13764#[derive(Debug, Clone, Serialize, Deserialize)]
13765pub struct ProjectAccountingConfig {
13766    /// Whether project accounting is enabled.
13767    #[serde(default)]
13768    pub enabled: bool,
13769    /// Number of projects to generate.
13770    #[serde(default = "default_project_count")]
13771    pub project_count: u32,
13772    /// Distribution of project types (capital, internal, customer, r_and_d, maintenance, technology).
13773    #[serde(default)]
13774    pub project_types: ProjectTypeDistribution,
13775    /// WBS structure configuration.
13776    #[serde(default)]
13777    pub wbs: WbsSchemaConfig,
13778    /// Cost allocation rates (what % of source documents get project-tagged).
13779    #[serde(default)]
13780    pub cost_allocation: CostAllocationConfig,
13781    /// Revenue recognition configuration for project accounting.
13782    #[serde(default)]
13783    pub revenue_recognition: ProjectRevenueRecognitionConfig,
13784    /// Milestone configuration.
13785    #[serde(default)]
13786    pub milestones: MilestoneSchemaConfig,
13787    /// Change order configuration.
13788    #[serde(default)]
13789    pub change_orders: ChangeOrderSchemaConfig,
13790    /// Retainage configuration.
13791    #[serde(default)]
13792    pub retainage: RetainageSchemaConfig,
13793    /// Earned value management configuration.
13794    #[serde(default)]
13795    pub earned_value: EarnedValueSchemaConfig,
13796    /// Anomaly injection rate for project accounting data (0.0 to 1.0).
13797    #[serde(default = "default_project_anomaly_rate")]
13798    pub anomaly_rate: f64,
13799}
13800
13801fn default_project_count() -> u32 {
13802    10
13803}
13804
13805fn default_project_anomaly_rate() -> f64 {
13806    0.03
13807}
13808
13809impl Default for ProjectAccountingConfig {
13810    fn default() -> Self {
13811        Self {
13812            enabled: false,
13813            project_count: default_project_count(),
13814            project_types: ProjectTypeDistribution::default(),
13815            wbs: WbsSchemaConfig::default(),
13816            cost_allocation: CostAllocationConfig::default(),
13817            revenue_recognition: ProjectRevenueRecognitionConfig::default(),
13818            milestones: MilestoneSchemaConfig::default(),
13819            change_orders: ChangeOrderSchemaConfig::default(),
13820            retainage: RetainageSchemaConfig::default(),
13821            earned_value: EarnedValueSchemaConfig::default(),
13822            anomaly_rate: default_project_anomaly_rate(),
13823        }
13824    }
13825}
13826
13827/// Distribution of project types by weight.
13828#[derive(Debug, Clone, Serialize, Deserialize)]
13829pub struct ProjectTypeDistribution {
13830    /// Weight for capital projects (default 0.25).
13831    #[serde(default = "default_capital_weight")]
13832    pub capital: f64,
13833    /// Weight for internal projects (default 0.20).
13834    #[serde(default = "default_internal_weight")]
13835    pub internal: f64,
13836    /// Weight for customer projects (default 0.30).
13837    #[serde(default = "default_customer_weight")]
13838    pub customer: f64,
13839    /// Weight for R&D projects (default 0.10).
13840    #[serde(default = "default_rnd_weight")]
13841    pub r_and_d: f64,
13842    /// Weight for maintenance projects (default 0.10).
13843    #[serde(default = "default_maintenance_weight")]
13844    pub maintenance: f64,
13845    /// Weight for technology projects (default 0.05).
13846    #[serde(default = "default_technology_weight")]
13847    pub technology: f64,
13848}
13849
13850fn default_capital_weight() -> f64 {
13851    0.25
13852}
13853fn default_internal_weight() -> f64 {
13854    0.20
13855}
13856fn default_customer_weight() -> f64 {
13857    0.30
13858}
13859fn default_rnd_weight() -> f64 {
13860    0.10
13861}
13862fn default_maintenance_weight() -> f64 {
13863    0.10
13864}
13865fn default_technology_weight() -> f64 {
13866    0.05
13867}
13868
13869impl Default for ProjectTypeDistribution {
13870    fn default() -> Self {
13871        Self {
13872            capital: default_capital_weight(),
13873            internal: default_internal_weight(),
13874            customer: default_customer_weight(),
13875            r_and_d: default_rnd_weight(),
13876            maintenance: default_maintenance_weight(),
13877            technology: default_technology_weight(),
13878        }
13879    }
13880}
13881
13882/// WBS structure configuration.
13883#[derive(Debug, Clone, Serialize, Deserialize)]
13884pub struct WbsSchemaConfig {
13885    /// Maximum depth of WBS hierarchy (default 3).
13886    #[serde(default = "default_wbs_max_depth")]
13887    pub max_depth: u32,
13888    /// Minimum elements per level-1 WBS (default 2).
13889    #[serde(default = "default_wbs_min_elements")]
13890    pub min_elements_per_level: u32,
13891    /// Maximum elements per level-1 WBS (default 6).
13892    #[serde(default = "default_wbs_max_elements")]
13893    pub max_elements_per_level: u32,
13894}
13895
13896fn default_wbs_max_depth() -> u32 {
13897    3
13898}
13899fn default_wbs_min_elements() -> u32 {
13900    2
13901}
13902fn default_wbs_max_elements() -> u32 {
13903    6
13904}
13905
13906impl Default for WbsSchemaConfig {
13907    fn default() -> Self {
13908        Self {
13909            max_depth: default_wbs_max_depth(),
13910            min_elements_per_level: default_wbs_min_elements(),
13911            max_elements_per_level: default_wbs_max_elements(),
13912        }
13913    }
13914}
13915
13916/// Cost allocation rates — what fraction of each document type gets linked to a project.
13917#[derive(Debug, Clone, Serialize, Deserialize)]
13918pub struct CostAllocationConfig {
13919    /// Fraction of time entries assigned to projects (0.0 to 1.0).
13920    #[serde(default = "default_time_entry_rate")]
13921    pub time_entry_project_rate: f64,
13922    /// Fraction of expense reports assigned to projects (0.0 to 1.0).
13923    #[serde(default = "default_expense_rate")]
13924    pub expense_project_rate: f64,
13925    /// Fraction of purchase orders assigned to projects (0.0 to 1.0).
13926    #[serde(default = "default_po_rate")]
13927    pub purchase_order_project_rate: f64,
13928    /// Fraction of vendor invoices assigned to projects (0.0 to 1.0).
13929    #[serde(default = "default_vi_rate")]
13930    pub vendor_invoice_project_rate: f64,
13931}
13932
13933fn default_time_entry_rate() -> f64 {
13934    0.60
13935}
13936fn default_expense_rate() -> f64 {
13937    0.30
13938}
13939fn default_po_rate() -> f64 {
13940    0.40
13941}
13942fn default_vi_rate() -> f64 {
13943    0.35
13944}
13945
13946impl Default for CostAllocationConfig {
13947    fn default() -> Self {
13948        Self {
13949            time_entry_project_rate: default_time_entry_rate(),
13950            expense_project_rate: default_expense_rate(),
13951            purchase_order_project_rate: default_po_rate(),
13952            vendor_invoice_project_rate: default_vi_rate(),
13953        }
13954    }
13955}
13956
13957/// Revenue recognition configuration for project accounting.
13958#[derive(Debug, Clone, Serialize, Deserialize)]
13959pub struct ProjectRevenueRecognitionConfig {
13960    /// Whether revenue recognition is enabled for customer projects.
13961    #[serde(default = "default_true")]
13962    pub enabled: bool,
13963    /// Default method: "percentage_of_completion", "completed_contract", "milestone_based".
13964    #[serde(default = "default_revenue_method")]
13965    pub method: String,
13966    /// Default completion measure: "cost_to_cost", "labor_hours", "physical_completion".
13967    #[serde(default = "default_completion_measure")]
13968    pub completion_measure: String,
13969    /// Average contract value for customer projects.
13970    #[serde(default = "default_avg_contract_value")]
13971    pub avg_contract_value: f64,
13972}
13973
13974fn default_revenue_method() -> String {
13975    "percentage_of_completion".to_string()
13976}
13977fn default_completion_measure() -> String {
13978    "cost_to_cost".to_string()
13979}
13980fn default_avg_contract_value() -> f64 {
13981    500_000.0
13982}
13983
13984impl Default for ProjectRevenueRecognitionConfig {
13985    fn default() -> Self {
13986        Self {
13987            enabled: true,
13988            method: default_revenue_method(),
13989            completion_measure: default_completion_measure(),
13990            avg_contract_value: default_avg_contract_value(),
13991        }
13992    }
13993}
13994
13995/// Milestone configuration.
13996#[derive(Debug, Clone, Serialize, Deserialize)]
13997pub struct MilestoneSchemaConfig {
13998    /// Whether milestone generation is enabled.
13999    #[serde(default = "default_true")]
14000    pub enabled: bool,
14001    /// Average number of milestones per project.
14002    #[serde(default = "default_milestones_per_project")]
14003    pub avg_per_project: u32,
14004    /// Fraction of milestones that are payment milestones (0.0 to 1.0).
14005    #[serde(default = "default_payment_milestone_rate")]
14006    pub payment_milestone_rate: f64,
14007}
14008
14009fn default_milestones_per_project() -> u32 {
14010    4
14011}
14012fn default_payment_milestone_rate() -> f64 {
14013    0.50
14014}
14015
14016impl Default for MilestoneSchemaConfig {
14017    fn default() -> Self {
14018        Self {
14019            enabled: true,
14020            avg_per_project: default_milestones_per_project(),
14021            payment_milestone_rate: default_payment_milestone_rate(),
14022        }
14023    }
14024}
14025
14026/// Change order configuration.
14027#[derive(Debug, Clone, Serialize, Deserialize)]
14028pub struct ChangeOrderSchemaConfig {
14029    /// Whether change order generation is enabled.
14030    #[serde(default = "default_true")]
14031    pub enabled: bool,
14032    /// Probability that a project will have at least one change order (0.0 to 1.0).
14033    #[serde(default = "default_change_order_probability")]
14034    pub probability: f64,
14035    /// Maximum change orders per project.
14036    #[serde(default = "default_max_change_orders")]
14037    pub max_per_project: u32,
14038    /// Approval rate for change orders (0.0 to 1.0).
14039    #[serde(default = "default_change_order_approval_rate")]
14040    pub approval_rate: f64,
14041}
14042
14043fn default_change_order_probability() -> f64 {
14044    0.40
14045}
14046fn default_max_change_orders() -> u32 {
14047    3
14048}
14049fn default_change_order_approval_rate() -> f64 {
14050    0.75
14051}
14052
14053impl Default for ChangeOrderSchemaConfig {
14054    fn default() -> Self {
14055        Self {
14056            enabled: true,
14057            probability: default_change_order_probability(),
14058            max_per_project: default_max_change_orders(),
14059            approval_rate: default_change_order_approval_rate(),
14060        }
14061    }
14062}
14063
14064/// Retainage configuration.
14065#[derive(Debug, Clone, Serialize, Deserialize)]
14066pub struct RetainageSchemaConfig {
14067    /// Whether retainage is enabled.
14068    #[serde(default)]
14069    pub enabled: bool,
14070    /// Default retainage percentage (0.0 to 1.0, e.g., 0.10 for 10%).
14071    #[serde(default = "default_retainage_pct")]
14072    pub default_percentage: f64,
14073}
14074
14075fn default_retainage_pct() -> f64 {
14076    0.10
14077}
14078
14079impl Default for RetainageSchemaConfig {
14080    fn default() -> Self {
14081        Self {
14082            enabled: false,
14083            default_percentage: default_retainage_pct(),
14084        }
14085    }
14086}
14087
14088/// Earned value management (EVM) configuration.
14089#[derive(Debug, Clone, Serialize, Deserialize)]
14090pub struct EarnedValueSchemaConfig {
14091    /// Whether EVM metrics are generated.
14092    #[serde(default = "default_true")]
14093    pub enabled: bool,
14094    /// Measurement frequency: "weekly", "biweekly", "monthly".
14095    #[serde(default = "default_evm_frequency")]
14096    pub frequency: String,
14097}
14098
14099fn default_evm_frequency() -> String {
14100    "monthly".to_string()
14101}
14102
14103impl Default for EarnedValueSchemaConfig {
14104    fn default() -> Self {
14105        Self {
14106            enabled: true,
14107            frequency: default_evm_frequency(),
14108        }
14109    }
14110}
14111
14112// =============================================================================
14113// ESG / Sustainability Configuration
14114// =============================================================================
14115
14116/// Top-level ESG / sustainability reporting configuration.
14117#[derive(Debug, Clone, Serialize, Deserialize)]
14118pub struct EsgConfig {
14119    /// Whether ESG generation is enabled.
14120    #[serde(default)]
14121    pub enabled: bool,
14122    /// Environmental metrics (emissions, energy, water, waste).
14123    #[serde(default)]
14124    pub environmental: EnvironmentalConfig,
14125    /// Social metrics (diversity, pay equity, safety).
14126    #[serde(default)]
14127    pub social: SocialConfig,
14128    /// Governance metrics (board composition, ethics, compliance).
14129    #[serde(default)]
14130    pub governance: GovernanceSchemaConfig,
14131    /// Supply-chain ESG assessment settings.
14132    #[serde(default)]
14133    pub supply_chain_esg: SupplyChainEsgConfig,
14134    /// ESG reporting / disclosure framework settings.
14135    #[serde(default)]
14136    pub reporting: EsgReportingConfig,
14137    /// Climate scenario analysis settings.
14138    #[serde(default)]
14139    pub climate_scenarios: ClimateScenarioConfig,
14140    /// Anomaly injection rate for ESG data (0.0 to 1.0).
14141    #[serde(default = "default_esg_anomaly_rate")]
14142    pub anomaly_rate: f64,
14143}
14144
14145fn default_esg_anomaly_rate() -> f64 {
14146    0.02
14147}
14148
14149impl Default for EsgConfig {
14150    fn default() -> Self {
14151        Self {
14152            enabled: false,
14153            environmental: EnvironmentalConfig::default(),
14154            social: SocialConfig::default(),
14155            governance: GovernanceSchemaConfig::default(),
14156            supply_chain_esg: SupplyChainEsgConfig::default(),
14157            reporting: EsgReportingConfig::default(),
14158            climate_scenarios: ClimateScenarioConfig::default(),
14159            anomaly_rate: default_esg_anomaly_rate(),
14160        }
14161    }
14162}
14163
14164/// Country pack configuration.
14165///
14166/// Controls where to load additional country packs and per-country overrides.
14167/// When omitted, only the built-in packs (_default, US, DE, GB) are used.
14168#[derive(Debug, Clone, Serialize, Deserialize, Default)]
14169pub struct CountryPacksSchemaConfig {
14170    /// Optional directory containing additional `*.json` country packs.
14171    #[serde(default)]
14172    pub external_dir: Option<PathBuf>,
14173    /// Per-country overrides applied after loading.
14174    /// Keys are ISO 3166-1 alpha-2 codes; values are partial JSON objects
14175    /// that are deep-merged on top of the loaded pack.
14176    #[serde(default)]
14177    pub overrides: std::collections::HashMap<String, serde_json::Value>,
14178}
14179
14180/// Environmental metrics configuration.
14181#[derive(Debug, Clone, Serialize, Deserialize)]
14182pub struct EnvironmentalConfig {
14183    /// Whether environmental metrics are generated.
14184    #[serde(default = "default_true")]
14185    pub enabled: bool,
14186    /// Scope 1 (direct) emission generation settings.
14187    #[serde(default)]
14188    pub scope1: EmissionScopeConfig,
14189    /// Scope 2 (purchased energy) emission generation settings.
14190    #[serde(default)]
14191    pub scope2: EmissionScopeConfig,
14192    /// Scope 3 (value chain) emission generation settings.
14193    #[serde(default)]
14194    pub scope3: Scope3Config,
14195    /// Energy consumption tracking settings.
14196    #[serde(default)]
14197    pub energy: EnergySchemaConfig,
14198    /// Water usage tracking settings.
14199    #[serde(default)]
14200    pub water: WaterSchemaConfig,
14201    /// Waste management tracking settings.
14202    #[serde(default)]
14203    pub waste: WasteSchemaConfig,
14204}
14205
14206impl Default for EnvironmentalConfig {
14207    fn default() -> Self {
14208        Self {
14209            enabled: true,
14210            scope1: EmissionScopeConfig::default(),
14211            scope2: EmissionScopeConfig::default(),
14212            scope3: Scope3Config::default(),
14213            energy: EnergySchemaConfig::default(),
14214            water: WaterSchemaConfig::default(),
14215            waste: WasteSchemaConfig::default(),
14216        }
14217    }
14218}
14219
14220/// Configuration for a single emission scope (Scope 1 or 2).
14221#[derive(Debug, Clone, Serialize, Deserialize)]
14222pub struct EmissionScopeConfig {
14223    /// Whether this scope is enabled.
14224    #[serde(default = "default_true")]
14225    pub enabled: bool,
14226    /// Emission factor region (e.g., "US", "EU", "global").
14227    #[serde(default = "default_emission_region")]
14228    pub factor_region: String,
14229}
14230
14231fn default_emission_region() -> String {
14232    "US".to_string()
14233}
14234
14235impl Default for EmissionScopeConfig {
14236    fn default() -> Self {
14237        Self {
14238            enabled: true,
14239            factor_region: default_emission_region(),
14240        }
14241    }
14242}
14243
14244/// Scope 3 (value chain) emission configuration.
14245#[derive(Debug, Clone, Serialize, Deserialize)]
14246pub struct Scope3Config {
14247    /// Whether Scope 3 emissions are generated.
14248    #[serde(default = "default_true")]
14249    pub enabled: bool,
14250    /// Categories to include (e.g., "purchased_goods", "business_travel", "commuting").
14251    #[serde(default = "default_scope3_categories")]
14252    pub categories: Vec<String>,
14253    /// Spend-based emission intensity (kg CO2e per USD).
14254    #[serde(default = "default_spend_intensity")]
14255    pub default_spend_intensity_kg_per_usd: f64,
14256}
14257
14258fn default_scope3_categories() -> Vec<String> {
14259    vec![
14260        "purchased_goods".to_string(),
14261        "business_travel".to_string(),
14262        "employee_commuting".to_string(),
14263    ]
14264}
14265
14266fn default_spend_intensity() -> f64 {
14267    0.5
14268}
14269
14270impl Default for Scope3Config {
14271    fn default() -> Self {
14272        Self {
14273            enabled: true,
14274            categories: default_scope3_categories(),
14275            default_spend_intensity_kg_per_usd: default_spend_intensity(),
14276        }
14277    }
14278}
14279
14280/// Energy consumption configuration.
14281#[derive(Debug, Clone, Serialize, Deserialize)]
14282pub struct EnergySchemaConfig {
14283    /// Whether energy consumption tracking is enabled.
14284    #[serde(default = "default_true")]
14285    pub enabled: bool,
14286    /// Number of facilities to generate.
14287    #[serde(default = "default_facility_count")]
14288    pub facility_count: u32,
14289    /// Target percentage of energy from renewable sources (0.0 to 1.0).
14290    #[serde(default = "default_renewable_target")]
14291    pub renewable_target: f64,
14292}
14293
14294fn default_facility_count() -> u32 {
14295    5
14296}
14297
14298fn default_renewable_target() -> f64 {
14299    0.30
14300}
14301
14302impl Default for EnergySchemaConfig {
14303    fn default() -> Self {
14304        Self {
14305            enabled: true,
14306            facility_count: default_facility_count(),
14307            renewable_target: default_renewable_target(),
14308        }
14309    }
14310}
14311
14312/// Water usage configuration.
14313#[derive(Debug, Clone, Serialize, Deserialize)]
14314pub struct WaterSchemaConfig {
14315    /// Whether water usage tracking is enabled.
14316    #[serde(default = "default_true")]
14317    pub enabled: bool,
14318    /// Number of facilities with water tracking.
14319    #[serde(default = "default_water_facility_count")]
14320    pub facility_count: u32,
14321}
14322
14323fn default_water_facility_count() -> u32 {
14324    3
14325}
14326
14327impl Default for WaterSchemaConfig {
14328    fn default() -> Self {
14329        Self {
14330            enabled: true,
14331            facility_count: default_water_facility_count(),
14332        }
14333    }
14334}
14335
14336/// Waste management configuration.
14337#[derive(Debug, Clone, Serialize, Deserialize)]
14338pub struct WasteSchemaConfig {
14339    /// Whether waste tracking is enabled.
14340    #[serde(default = "default_true")]
14341    pub enabled: bool,
14342    /// Target diversion rate (0.0 to 1.0).
14343    #[serde(default = "default_diversion_target")]
14344    pub diversion_target: f64,
14345}
14346
14347fn default_diversion_target() -> f64 {
14348    0.50
14349}
14350
14351impl Default for WasteSchemaConfig {
14352    fn default() -> Self {
14353        Self {
14354            enabled: true,
14355            diversion_target: default_diversion_target(),
14356        }
14357    }
14358}
14359
14360/// Social metrics configuration.
14361#[derive(Debug, Clone, Serialize, Deserialize)]
14362pub struct SocialConfig {
14363    /// Whether social metrics are generated.
14364    #[serde(default = "default_true")]
14365    pub enabled: bool,
14366    /// Workforce diversity tracking settings.
14367    #[serde(default)]
14368    pub diversity: DiversitySchemaConfig,
14369    /// Pay equity analysis settings.
14370    #[serde(default)]
14371    pub pay_equity: PayEquitySchemaConfig,
14372    /// Safety incident and metrics settings.
14373    #[serde(default)]
14374    pub safety: SafetySchemaConfig,
14375}
14376
14377impl Default for SocialConfig {
14378    fn default() -> Self {
14379        Self {
14380            enabled: true,
14381            diversity: DiversitySchemaConfig::default(),
14382            pay_equity: PayEquitySchemaConfig::default(),
14383            safety: SafetySchemaConfig::default(),
14384        }
14385    }
14386}
14387
14388/// Workforce diversity configuration.
14389#[derive(Debug, Clone, Serialize, Deserialize)]
14390pub struct DiversitySchemaConfig {
14391    /// Whether diversity metrics are generated.
14392    #[serde(default = "default_true")]
14393    pub enabled: bool,
14394    /// Dimensions to track (e.g., "gender", "ethnicity", "age_group").
14395    #[serde(default = "default_diversity_dimensions")]
14396    pub dimensions: Vec<String>,
14397}
14398
14399fn default_diversity_dimensions() -> Vec<String> {
14400    vec![
14401        "gender".to_string(),
14402        "ethnicity".to_string(),
14403        "age_group".to_string(),
14404    ]
14405}
14406
14407impl Default for DiversitySchemaConfig {
14408    fn default() -> Self {
14409        Self {
14410            enabled: true,
14411            dimensions: default_diversity_dimensions(),
14412        }
14413    }
14414}
14415
14416/// Pay equity analysis configuration.
14417#[derive(Debug, Clone, Serialize, Deserialize)]
14418pub struct PayEquitySchemaConfig {
14419    /// Whether pay equity analysis is generated.
14420    #[serde(default = "default_true")]
14421    pub enabled: bool,
14422    /// Target pay gap threshold for flagging (e.g., 0.05 = 5% gap).
14423    #[serde(default = "default_pay_gap_threshold")]
14424    pub gap_threshold: f64,
14425}
14426
14427fn default_pay_gap_threshold() -> f64 {
14428    0.05
14429}
14430
14431impl Default for PayEquitySchemaConfig {
14432    fn default() -> Self {
14433        Self {
14434            enabled: true,
14435            gap_threshold: default_pay_gap_threshold(),
14436        }
14437    }
14438}
14439
14440/// Safety metrics configuration.
14441#[derive(Debug, Clone, Serialize, Deserialize)]
14442pub struct SafetySchemaConfig {
14443    /// Whether safety metrics are generated.
14444    #[serde(default = "default_true")]
14445    pub enabled: bool,
14446    /// Average annual recordable incidents per 200,000 hours.
14447    #[serde(default = "default_trir_target")]
14448    pub target_trir: f64,
14449    /// Number of safety incidents to generate.
14450    #[serde(default = "default_incident_count")]
14451    pub incident_count: u32,
14452}
14453
14454fn default_trir_target() -> f64 {
14455    2.5
14456}
14457
14458fn default_incident_count() -> u32 {
14459    20
14460}
14461
14462impl Default for SafetySchemaConfig {
14463    fn default() -> Self {
14464        Self {
14465            enabled: true,
14466            target_trir: default_trir_target(),
14467            incident_count: default_incident_count(),
14468        }
14469    }
14470}
14471
14472/// Governance metrics configuration.
14473#[derive(Debug, Clone, Serialize, Deserialize)]
14474pub struct GovernanceSchemaConfig {
14475    /// Whether governance metrics are generated.
14476    #[serde(default = "default_true")]
14477    pub enabled: bool,
14478    /// Number of board members.
14479    #[serde(default = "default_board_size")]
14480    pub board_size: u32,
14481    /// Target independent director ratio (0.0 to 1.0).
14482    #[serde(default = "default_independence_target")]
14483    pub independence_target: f64,
14484}
14485
14486fn default_board_size() -> u32 {
14487    11
14488}
14489
14490fn default_independence_target() -> f64 {
14491    0.67
14492}
14493
14494impl Default for GovernanceSchemaConfig {
14495    fn default() -> Self {
14496        Self {
14497            enabled: true,
14498            board_size: default_board_size(),
14499            independence_target: default_independence_target(),
14500        }
14501    }
14502}
14503
14504/// Supply-chain ESG assessment configuration.
14505#[derive(Debug, Clone, Serialize, Deserialize)]
14506pub struct SupplyChainEsgConfig {
14507    /// Whether supply chain ESG assessments are generated.
14508    #[serde(default = "default_true")]
14509    pub enabled: bool,
14510    /// Proportion of vendors to assess (0.0 to 1.0).
14511    #[serde(default = "default_assessment_coverage")]
14512    pub assessment_coverage: f64,
14513    /// High-risk country codes for automatic flagging.
14514    #[serde(default = "default_high_risk_countries")]
14515    pub high_risk_countries: Vec<String>,
14516}
14517
14518fn default_assessment_coverage() -> f64 {
14519    0.80
14520}
14521
14522fn default_high_risk_countries() -> Vec<String> {
14523    vec!["CN".to_string(), "BD".to_string(), "MM".to_string()]
14524}
14525
14526impl Default for SupplyChainEsgConfig {
14527    fn default() -> Self {
14528        Self {
14529            enabled: true,
14530            assessment_coverage: default_assessment_coverage(),
14531            high_risk_countries: default_high_risk_countries(),
14532        }
14533    }
14534}
14535
14536/// ESG reporting / disclosure framework configuration.
14537#[derive(Debug, Clone, Serialize, Deserialize)]
14538pub struct EsgReportingConfig {
14539    /// Whether ESG disclosures are generated.
14540    #[serde(default = "default_true")]
14541    pub enabled: bool,
14542    /// Frameworks to generate disclosures for.
14543    #[serde(default = "default_esg_frameworks")]
14544    pub frameworks: Vec<String>,
14545    /// Whether materiality assessment is performed.
14546    #[serde(default = "default_true")]
14547    pub materiality_assessment: bool,
14548    /// Materiality threshold for impact dimension (0.0 to 1.0).
14549    #[serde(default = "default_materiality_threshold")]
14550    pub impact_threshold: f64,
14551    /// Materiality threshold for financial dimension (0.0 to 1.0).
14552    #[serde(default = "default_materiality_threshold")]
14553    pub financial_threshold: f64,
14554}
14555
14556fn default_esg_frameworks() -> Vec<String> {
14557    vec!["GRI".to_string(), "ESRS".to_string()]
14558}
14559
14560fn default_materiality_threshold() -> f64 {
14561    0.6
14562}
14563
14564impl Default for EsgReportingConfig {
14565    fn default() -> Self {
14566        Self {
14567            enabled: true,
14568            frameworks: default_esg_frameworks(),
14569            materiality_assessment: true,
14570            impact_threshold: default_materiality_threshold(),
14571            financial_threshold: default_materiality_threshold(),
14572        }
14573    }
14574}
14575
14576/// Climate scenario analysis configuration.
14577#[derive(Debug, Clone, Serialize, Deserialize)]
14578pub struct ClimateScenarioConfig {
14579    /// Whether climate scenario analysis is generated.
14580    #[serde(default)]
14581    pub enabled: bool,
14582    /// Scenarios to model (e.g., "net_zero_2050", "stated_policies", "current_trajectory").
14583    #[serde(default = "default_climate_scenarios")]
14584    pub scenarios: Vec<String>,
14585    /// Time horizons in years to project.
14586    #[serde(default = "default_time_horizons")]
14587    pub time_horizons: Vec<u32>,
14588}
14589
14590fn default_climate_scenarios() -> Vec<String> {
14591    vec![
14592        "net_zero_2050".to_string(),
14593        "stated_policies".to_string(),
14594        "current_trajectory".to_string(),
14595    ]
14596}
14597
14598fn default_time_horizons() -> Vec<u32> {
14599    vec![5, 10, 30]
14600}
14601
14602impl Default for ClimateScenarioConfig {
14603    fn default() -> Self {
14604        Self {
14605            enabled: false,
14606            scenarios: default_climate_scenarios(),
14607            time_horizons: default_time_horizons(),
14608        }
14609    }
14610}
14611
14612// ===== Counterfactual Simulation Scenarios =====
14613
14614/// Configuration for counterfactual simulation scenarios.
14615#[derive(Debug, Clone, Serialize, Deserialize, Default)]
14616pub struct ScenariosConfig {
14617    /// Whether scenario generation is enabled.
14618    #[serde(default)]
14619    pub enabled: bool,
14620    /// List of scenario definitions.
14621    #[serde(default)]
14622    pub scenarios: Vec<ScenarioSchemaConfig>,
14623    /// Causal model configuration.
14624    #[serde(default)]
14625    pub causal_model: CausalModelSchemaConfig,
14626    /// Default settings applied to all scenarios.
14627    #[serde(default)]
14628    pub defaults: ScenarioDefaultsConfig,
14629    /// Generate counterfactual (original, mutated) JE pairs for ML training.
14630    /// When true, the orchestrator produces paired clean/anomalous journal entries.
14631    #[serde(default)]
14632    pub generate_counterfactuals: bool,
14633}
14634
14635/// A single scenario definition in the config.
14636#[derive(Debug, Clone, Serialize, Deserialize)]
14637pub struct ScenarioSchemaConfig {
14638    /// Scenario name (must be unique).
14639    pub name: String,
14640    /// Human-readable description.
14641    #[serde(default)]
14642    pub description: String,
14643    /// Tags for categorization.
14644    #[serde(default)]
14645    pub tags: Vec<String>,
14646    /// Base scenario name (None = default config).
14647    pub base: Option<String>,
14648    /// IFRS 9-style probability weight.
14649    pub probability_weight: Option<f64>,
14650    /// List of interventions to apply.
14651    #[serde(default)]
14652    pub interventions: Vec<InterventionSchemaConfig>,
14653    /// Constraint overrides for this scenario.
14654    #[serde(default)]
14655    pub constraints: ScenarioConstraintsSchemaConfig,
14656    /// Output configuration for this scenario.
14657    #[serde(default)]
14658    pub output: ScenarioOutputSchemaConfig,
14659    /// Arbitrary metadata.
14660    #[serde(default)]
14661    pub metadata: std::collections::HashMap<String, String>,
14662}
14663
14664/// An intervention definition in the config.
14665#[derive(Debug, Clone, Serialize, Deserialize)]
14666pub struct InterventionSchemaConfig {
14667    /// Intervention type and parameters (flattened tagged enum).
14668    #[serde(flatten)]
14669    pub intervention_type: serde_json::Value,
14670    /// Timing configuration.
14671    #[serde(default)]
14672    pub timing: InterventionTimingSchemaConfig,
14673    /// Human-readable label.
14674    pub label: Option<String>,
14675    /// Priority for conflict resolution (higher wins).
14676    #[serde(default)]
14677    pub priority: u32,
14678}
14679
14680/// Timing configuration for an intervention.
14681#[derive(Debug, Clone, Serialize, Deserialize)]
14682pub struct InterventionTimingSchemaConfig {
14683    /// Month offset from start (1-indexed).
14684    #[serde(default = "default_start_month")]
14685    pub start_month: u32,
14686    /// Duration in months.
14687    pub duration_months: Option<u32>,
14688    /// Onset type: "sudden", "gradual", "oscillating", "custom".
14689    #[serde(default = "default_onset")]
14690    pub onset: String,
14691    /// Ramp period in months.
14692    pub ramp_months: Option<u32>,
14693}
14694
14695fn default_start_month() -> u32 {
14696    1
14697}
14698
14699fn default_onset() -> String {
14700    "sudden".to_string()
14701}
14702
14703impl Default for InterventionTimingSchemaConfig {
14704    fn default() -> Self {
14705        Self {
14706            start_month: 1,
14707            duration_months: None,
14708            onset: "sudden".to_string(),
14709            ramp_months: None,
14710        }
14711    }
14712}
14713
14714/// Scenario constraint overrides.
14715#[derive(Debug, Clone, Serialize, Deserialize)]
14716pub struct ScenarioConstraintsSchemaConfig {
14717    #[serde(default = "default_true")]
14718    pub preserve_accounting_identity: bool,
14719    #[serde(default = "default_true")]
14720    pub preserve_document_chains: bool,
14721    #[serde(default = "default_true")]
14722    pub preserve_period_close: bool,
14723    #[serde(default = "default_true")]
14724    pub preserve_balance_coherence: bool,
14725    #[serde(default)]
14726    pub custom: Vec<CustomConstraintSchemaConfig>,
14727}
14728
14729impl Default for ScenarioConstraintsSchemaConfig {
14730    fn default() -> Self {
14731        Self {
14732            preserve_accounting_identity: true,
14733            preserve_document_chains: true,
14734            preserve_period_close: true,
14735            preserve_balance_coherence: true,
14736            custom: Vec::new(),
14737        }
14738    }
14739}
14740
14741/// Custom constraint in config.
14742#[derive(Debug, Clone, Serialize, Deserialize)]
14743pub struct CustomConstraintSchemaConfig {
14744    pub config_path: String,
14745    pub min: Option<f64>,
14746    pub max: Option<f64>,
14747    #[serde(default)]
14748    pub description: String,
14749}
14750
14751/// Output configuration for a scenario.
14752#[derive(Debug, Clone, Serialize, Deserialize)]
14753pub struct ScenarioOutputSchemaConfig {
14754    #[serde(default = "default_true")]
14755    pub paired: bool,
14756    #[serde(default = "default_diff_formats_schema")]
14757    pub diff_formats: Vec<String>,
14758    #[serde(default)]
14759    pub diff_scope: Vec<String>,
14760}
14761
14762fn default_diff_formats_schema() -> Vec<String> {
14763    vec!["summary".to_string(), "aggregate".to_string()]
14764}
14765
14766impl Default for ScenarioOutputSchemaConfig {
14767    fn default() -> Self {
14768        Self {
14769            paired: true,
14770            diff_formats: default_diff_formats_schema(),
14771            diff_scope: Vec::new(),
14772        }
14773    }
14774}
14775
14776/// Causal model configuration.
14777#[derive(Debug, Clone, Serialize, Deserialize)]
14778pub struct CausalModelSchemaConfig {
14779    /// Preset name: "default", "minimal", or "custom".
14780    #[serde(default = "default_causal_preset")]
14781    pub preset: String,
14782    /// Custom nodes (merged with preset).
14783    #[serde(default)]
14784    pub nodes: Vec<serde_json::Value>,
14785    /// Custom edges (merged with preset).
14786    #[serde(default)]
14787    pub edges: Vec<serde_json::Value>,
14788}
14789
14790fn default_causal_preset() -> String {
14791    "default".to_string()
14792}
14793
14794impl Default for CausalModelSchemaConfig {
14795    fn default() -> Self {
14796        Self {
14797            preset: "default".to_string(),
14798            nodes: Vec::new(),
14799            edges: Vec::new(),
14800        }
14801    }
14802}
14803
14804/// Default settings applied to all scenarios.
14805#[derive(Debug, Clone, Serialize, Deserialize, Default)]
14806pub struct ScenarioDefaultsConfig {
14807    #[serde(default)]
14808    pub constraints: ScenarioConstraintsSchemaConfig,
14809    #[serde(default)]
14810    pub output: ScenarioOutputSchemaConfig,
14811}
14812
14813// =====================================================================
14814// Compliance Regulations Framework Configuration
14815// =====================================================================
14816
14817/// Top-level configuration for the compliance regulations framework.
14818///
14819/// Controls standards registry, jurisdiction profiles, temporal versioning,
14820/// audit procedure templates, compliance graph integration, and output settings.
14821///
14822/// # Example
14823///
14824/// ```yaml
14825/// compliance_regulations:
14826///   enabled: true
14827///   jurisdictions: [US, DE, GB]
14828///   reference_date: "2025-06-30"
14829///   standards_selection:
14830///     categories: [accounting, auditing, regulatory]
14831///     include: ["IFRS-16", "ASC-606"]
14832///   audit_procedures:
14833///     enabled: true
14834///     procedures_per_standard: 3
14835///   findings:
14836///     enabled: true
14837///     finding_rate: 0.05
14838///   filings:
14839///     enabled: true
14840///   graph:
14841///     enabled: true
14842///     include_compliance_nodes: true
14843///     include_compliance_edges: true
14844/// ```
14845#[derive(Debug, Clone, Default, Serialize, Deserialize)]
14846pub struct ComplianceRegulationsConfig {
14847    /// Master switch for the compliance regulations framework.
14848    #[serde(default)]
14849    pub enabled: bool,
14850    /// Jurisdictions to generate compliance data for (ISO 3166-1 alpha-2 codes).
14851    /// If empty, inferred from company countries in the config.
14852    #[serde(default)]
14853    pub jurisdictions: Vec<String>,
14854    /// Reference date for temporal standard resolution (YYYY-MM-DD).
14855    /// Defaults to the global start_date if not set.
14856    #[serde(default)]
14857    pub reference_date: Option<String>,
14858    /// Standards selection filters.
14859    #[serde(default)]
14860    pub standards_selection: StandardsSelectionConfig,
14861    /// Audit procedure generation settings.
14862    #[serde(default)]
14863    pub audit_procedures: AuditProcedureGenConfig,
14864    /// Compliance finding generation settings.
14865    #[serde(default)]
14866    pub findings: ComplianceFindingGenConfig,
14867    /// Regulatory filing generation settings.
14868    #[serde(default)]
14869    pub filings: ComplianceFilingGenConfig,
14870    /// Compliance graph integration settings.
14871    #[serde(default)]
14872    pub graph: ComplianceGraphConfig,
14873    /// Output settings for compliance-specific files.
14874    #[serde(default)]
14875    pub output: ComplianceOutputConfig,
14876    /// v3.3.0: legal-document generation (engagement letters,
14877    /// management reps, legal opinions, regulatory filings, board
14878    /// resolutions). Requires `compliance_regulations.enabled = true`
14879    /// AND `legal_documents.enabled = true` to take effect.
14880    #[serde(default)]
14881    pub legal_documents: LegalDocumentsConfig,
14882}
14883
14884/// Legal-document generation settings (v3.3.0+).
14885///
14886/// Wires `LegalDocumentGenerator` into the orchestrator. Generates one
14887/// batch per audit engagement when enabled.
14888#[derive(Debug, Clone, Serialize, Deserialize)]
14889pub struct LegalDocumentsConfig {
14890    /// Master switch.
14891    #[serde(default)]
14892    pub enabled: bool,
14893    /// Probability of including a legal-opinion document in an engagement.
14894    #[serde(default = "default_legal_opinion_probability")]
14895    pub legal_opinion_probability: f64,
14896}
14897
14898fn default_legal_opinion_probability() -> f64 {
14899    0.40
14900}
14901
14902impl Default for LegalDocumentsConfig {
14903    fn default() -> Self {
14904        Self {
14905            enabled: false,
14906            legal_opinion_probability: default_legal_opinion_probability(),
14907        }
14908    }
14909}
14910
14911/// Filters which standards are included in the generation.
14912#[derive(Debug, Clone, Default, Serialize, Deserialize)]
14913pub struct StandardsSelectionConfig {
14914    /// Standard categories to include (accounting, auditing, regulatory, tax, esg).
14915    /// Empty = all categories.
14916    #[serde(default)]
14917    pub categories: Vec<String>,
14918    /// Explicit standard IDs to include (e.g., ["IFRS-16", "ASC-606"]).
14919    /// When non-empty, only these standards (plus mandatory ones for selected jurisdictions) are used.
14920    #[serde(default)]
14921    pub include: Vec<String>,
14922    /// Standard IDs to exclude.
14923    #[serde(default)]
14924    pub exclude: Vec<String>,
14925    /// Include superseded standards in the output (for historical analysis).
14926    #[serde(default)]
14927    pub include_superseded: bool,
14928}
14929
14930/// Configuration for audit procedure template generation.
14931#[derive(Debug, Clone, Serialize, Deserialize)]
14932pub struct AuditProcedureGenConfig {
14933    /// Whether audit procedure generation is enabled.
14934    #[serde(default)]
14935    pub enabled: bool,
14936    /// Number of procedures to generate per applicable standard.
14937    #[serde(default = "default_procedures_per_standard")]
14938    pub procedures_per_standard: usize,
14939    /// Sampling methodology: "statistical", "non_statistical", "mixed".
14940    #[serde(default = "default_sampling_method")]
14941    pub sampling_method: String,
14942    /// Confidence level for statistical sampling (0.0-1.0).
14943    #[serde(default = "default_confidence_level")]
14944    pub confidence_level: f64,
14945    /// Tolerable misstatement rate for sampling (0.0-1.0).
14946    #[serde(default = "default_tolerable_misstatement")]
14947    pub tolerable_misstatement: f64,
14948}
14949
14950fn default_procedures_per_standard() -> usize {
14951    3
14952}
14953
14954fn default_sampling_method() -> String {
14955    "statistical".to_string()
14956}
14957
14958fn default_confidence_level() -> f64 {
14959    0.95
14960}
14961
14962fn default_tolerable_misstatement() -> f64 {
14963    0.05
14964}
14965
14966impl Default for AuditProcedureGenConfig {
14967    fn default() -> Self {
14968        Self {
14969            enabled: false,
14970            procedures_per_standard: default_procedures_per_standard(),
14971            sampling_method: default_sampling_method(),
14972            confidence_level: default_confidence_level(),
14973            tolerable_misstatement: default_tolerable_misstatement(),
14974        }
14975    }
14976}
14977
14978/// Configuration for compliance finding generation.
14979#[derive(Debug, Clone, Serialize, Deserialize)]
14980pub struct ComplianceFindingGenConfig {
14981    /// Whether finding generation is enabled.
14982    #[serde(default)]
14983    pub enabled: bool,
14984    /// Rate of findings per audit procedure (0.0-1.0).
14985    #[serde(default = "default_finding_rate")]
14986    pub finding_rate: f64,
14987    /// Rate of material weakness findings among all findings (0.0-1.0).
14988    #[serde(default = "default_cr_material_weakness_rate")]
14989    pub material_weakness_rate: f64,
14990    /// Rate of significant deficiency findings among all findings (0.0-1.0).
14991    #[serde(default = "default_cr_significant_deficiency_rate")]
14992    pub significant_deficiency_rate: f64,
14993    /// Whether to generate remediation plans for findings.
14994    #[serde(default = "default_true")]
14995    pub generate_remediation: bool,
14996}
14997
14998fn default_finding_rate() -> f64 {
14999    0.05
15000}
15001
15002fn default_cr_material_weakness_rate() -> f64 {
15003    0.02
15004}
15005
15006fn default_cr_significant_deficiency_rate() -> f64 {
15007    0.08
15008}
15009
15010impl Default for ComplianceFindingGenConfig {
15011    fn default() -> Self {
15012        Self {
15013            enabled: false,
15014            finding_rate: default_finding_rate(),
15015            material_weakness_rate: default_cr_material_weakness_rate(),
15016            significant_deficiency_rate: default_cr_significant_deficiency_rate(),
15017            generate_remediation: true,
15018        }
15019    }
15020}
15021
15022/// Configuration for regulatory filing generation.
15023#[derive(Debug, Clone, Serialize, Deserialize)]
15024pub struct ComplianceFilingGenConfig {
15025    /// Whether filing generation is enabled.
15026    #[serde(default)]
15027    pub enabled: bool,
15028    /// Filing types to include (e.g., ["10-K", "10-Q", "Jahresabschluss"]).
15029    /// Empty = all applicable filings for the selected jurisdictions.
15030    #[serde(default)]
15031    pub filing_types: Vec<String>,
15032    /// Generate filing status progression (draft → filed → accepted).
15033    #[serde(default = "default_true")]
15034    pub generate_status_progression: bool,
15035}
15036
15037impl Default for ComplianceFilingGenConfig {
15038    fn default() -> Self {
15039        Self {
15040            enabled: false,
15041            filing_types: Vec::new(),
15042            generate_status_progression: true,
15043        }
15044    }
15045}
15046
15047/// Configuration for compliance graph integration.
15048#[derive(Debug, Clone, Serialize, Deserialize)]
15049pub struct ComplianceGraphConfig {
15050    /// Whether compliance graph integration is enabled.
15051    #[serde(default)]
15052    pub enabled: bool,
15053    /// Include compliance nodes (Standard, Regulation, Jurisdiction, etc.).
15054    #[serde(default = "default_true")]
15055    pub include_compliance_nodes: bool,
15056    /// Include compliance edges (MapsToStandard, TestsControl, etc.).
15057    #[serde(default = "default_true")]
15058    pub include_compliance_edges: bool,
15059    /// Include cross-reference edges between standards.
15060    #[serde(default = "default_true")]
15061    pub include_cross_references: bool,
15062    /// Include temporal supersession edges.
15063    #[serde(default)]
15064    pub include_supersession_edges: bool,
15065    /// Include edges linking standards to the GL account types they govern.
15066    #[serde(default = "default_true")]
15067    pub include_account_links: bool,
15068    /// Include edges linking standards to the internal controls that implement them.
15069    #[serde(default = "default_true")]
15070    pub include_control_links: bool,
15071    /// Include edges linking filings and jurisdictions to the originating company.
15072    #[serde(default = "default_true")]
15073    pub include_company_links: bool,
15074}
15075
15076impl Default for ComplianceGraphConfig {
15077    fn default() -> Self {
15078        Self {
15079            enabled: false,
15080            include_compliance_nodes: true,
15081            include_compliance_edges: true,
15082            include_cross_references: true,
15083            include_supersession_edges: false,
15084            include_account_links: true,
15085            include_control_links: true,
15086            include_company_links: true,
15087        }
15088    }
15089}
15090
15091/// Output settings for compliance-specific data files.
15092#[derive(Debug, Clone, Serialize, Deserialize)]
15093pub struct ComplianceOutputConfig {
15094    /// Export the standards registry catalog.
15095    #[serde(default = "default_true")]
15096    pub export_registry: bool,
15097    /// Export jurisdiction profiles.
15098    #[serde(default = "default_true")]
15099    pub export_jurisdictions: bool,
15100    /// Export cross-reference map.
15101    #[serde(default = "default_true")]
15102    pub export_cross_references: bool,
15103    /// Export temporal version history.
15104    #[serde(default)]
15105    pub export_version_history: bool,
15106}
15107
15108impl Default for ComplianceOutputConfig {
15109    fn default() -> Self {
15110        Self {
15111            export_registry: true,
15112            export_jurisdictions: true,
15113            export_cross_references: true,
15114            export_version_history: false,
15115        }
15116    }
15117}
15118
15119#[cfg(test)]
15120mod tests {
15121    use super::*;
15122    use crate::presets::demo_preset;
15123
15124    // ==========================================================================
15125    // External-expectation config (ISA-520 substantive-analytics layer, Phase 2)
15126    // ==========================================================================
15127
15128    #[test]
15129    fn external_expectations_config_yaml_roundtrip() {
15130        // snake_case and camelCase aliases both parse; the driver enum parses from snake_case.
15131        let yaml = "enabled: true\n\
15132                    driver: market_index\n\
15133                    tolerance_pct: 0.08\n\
15134                    forecastNoise: 0.03\n\
15135                    growth_rate: 0.04\n\
15136                    minMaterialityShare: 0.01\n";
15137        let c: ExternalExpectationsConfig = serde_yaml::from_str(yaml).unwrap();
15138        assert!(c.enabled);
15139        assert_eq!(c.driver, ExpectationDriver::MarketIndex);
15140        assert_eq!(c.tolerance_pct, 0.08);
15141        assert_eq!(c.forecast_noise, 0.03);
15142        assert_eq!(c.growth_rate, 0.04);
15143        assert_eq!(c.min_materiality_share, 0.01);
15144    }
15145
15146    #[test]
15147    fn evidence_anchors_config_yaml_roundtrip_and_defaults() {
15148        let yaml = "enabled: true\n\
15149                    minMaterialityShare: 0.01\n\
15150                    corroboration_rate: 0.95\n\
15151                    fabricationEvadeRate: 0.05\n";
15152        let c: EvidenceAnchorsConfig = serde_yaml::from_str(yaml).unwrap();
15153        assert!(c.enabled);
15154        assert_eq!(c.min_materiality_share, 0.01);
15155        assert_eq!(c.corroboration_rate, 0.95);
15156        assert_eq!(c.fabrication_evade_rate, 0.05);
15157
15158        let d = EvidenceAnchorsConfig::default();
15159        assert!(!d.enabled, "default off → byte-identical output");
15160        assert_eq!(d.corroboration_rate, 0.92);
15161        assert_eq!(d.fabrication_evade_rate, 0.10);
15162
15163        let fr: FinancialReportingConfig =
15164            serde_yaml::from_str("enabled: true\nevidence_anchors:\n  enabled: true\n").unwrap();
15165        assert!(fr.evidence_anchors.enabled);
15166        let fr_default: FinancialReportingConfig = serde_yaml::from_str("enabled: true\n").unwrap();
15167        assert!(!fr_default.evidence_anchors.enabled);
15168    }
15169
15170    #[test]
15171    fn external_expectations_defaults_and_nesting() {
15172        let d = ExternalExpectationsConfig::default();
15173        assert!(!d.enabled, "default off → byte-identical output");
15174        assert_eq!(d.driver, ExpectationDriver::PriorYear);
15175        assert_eq!(d.tolerance_pct, 0.10);
15176        assert_eq!(d.min_materiality_share, 0.005);
15177
15178        // nests under financial_reporting and defaults when omitted
15179        let fr: FinancialReportingConfig = serde_yaml::from_str(
15180            "enabled: true\nexternal_expectations:\n  enabled: true\n  driver: budget\n",
15181        )
15182        .unwrap();
15183        assert!(fr.external_expectations.enabled);
15184        assert_eq!(fr.external_expectations.driver, ExpectationDriver::Budget);
15185        let fr_default: FinancialReportingConfig = serde_yaml::from_str("enabled: true\n").unwrap();
15186        assert!(!fr_default.external_expectations.enabled);
15187    }
15188
15189    // ==========================================================================
15190    // Fraud behavioral-bias config (exposed in v5.x — was hardcoded)
15191    // ==========================================================================
15192
15193    #[test]
15194    fn fraud_bias_defaults_match_core_engine() {
15195        // The YAML-facing defaults must equal the engine's historical hardcoded values, so
15196        // configs that don't set `fraud.bias` produce byte-identical output.
15197        let c = FraudBiasConfig::default();
15198        assert!(c.enabled);
15199        assert_eq!(c.weekend_bias, 0.30);
15200        assert_eq!(c.round_dollar_bias, 0.40);
15201        assert_eq!(c.off_hours_bias, 0.35);
15202        assert_eq!(c.post_close_bias, 0.25);
15203        let core = c.to_core();
15204        assert!(core.enabled);
15205        assert_eq!(core.weekend_bias, 0.30);
15206        assert_eq!(core.round_dollar_bias, 0.40);
15207        assert_eq!(core.off_hours_bias, 0.35);
15208        assert_eq!(core.post_close_bias, 0.25);
15209        // A FraudConfig with no bias key carries the defaults.
15210        assert_eq!(FraudConfig::default().bias.weekend_bias, 0.30);
15211    }
15212
15213    #[test]
15214    fn fraud_bias_deserializes_overrides_and_maps_to_core() {
15215        // The adversary's subtlety lever: lower the detectable signatures via YAML.
15216        let yaml = r#"
15217enabled: true
15218fraud_rate: 0.04
15219bias:
15220  weekend_bias: 0.0
15221  round_dollar_bias: 0.05
15222  off_hours_bias: 0.0
15223  post_close_bias: 0.10
15224"#;
15225        let fc: FraudConfig = serde_yaml::from_str(yaml).expect("fraud config with bias overrides");
15226        assert_eq!(fc.bias.weekend_bias, 0.0);
15227        assert_eq!(fc.bias.round_dollar_bias, 0.05);
15228        assert_eq!(fc.bias.off_hours_bias, 0.0);
15229        assert_eq!(fc.bias.post_close_bias, 0.10);
15230        assert!(fc.bias.enabled, "enabled defaults to true when omitted");
15231        // camelCase aliases also parse.
15232        let fc2: FraudConfig =
15233            serde_yaml::from_str("bias:\n  weekendBias: 0.5\n  roundDollarBias: 0.5\n").unwrap();
15234        assert_eq!(fc2.bias.weekend_bias, 0.5);
15235        assert_eq!(fc2.bias.round_dollar_bias, 0.5);
15236        // unset fields fall back to defaults
15237        assert_eq!(fc2.bias.off_hours_bias, 0.35);
15238        // mapping to the core engine config is field-for-field
15239        let core = fc.bias.to_core();
15240        assert_eq!(core.weekend_bias, 0.0);
15241        assert_eq!(core.post_close_bias, 0.10);
15242    }
15243
15244    #[test]
15245    fn fraud_config_without_bias_key_uses_defaults() {
15246        // Back-compat: a pre-existing fraud config (no `bias:` key) deserializes with default bias.
15247        let fc: FraudConfig = serde_yaml::from_str("enabled: true\nfraud_rate: 0.02\n").unwrap();
15248        assert_eq!(fc.bias.weekend_bias, 0.30);
15249        assert!(fc.bias.enabled);
15250    }
15251
15252    #[test]
15253    fn fraud_campaigns_default_off_and_back_compat() {
15254        // Default is disabled / inactive → byte-identical behaviour.
15255        let c = FraudCampaignConfig::default();
15256        assert!(!c.enabled && !c.is_active());
15257        assert_eq!(c.count, 1);
15258        assert_eq!(c.booking_leg_pool, 6);
15259        // A pre-existing fraud config (no `campaigns:` key) deserializes with campaigns off.
15260        let fc: FraudConfig = serde_yaml::from_str("enabled: true\nfraud_rate: 0.02\n").unwrap();
15261        assert!(!fc.campaigns.is_active());
15262    }
15263
15264    #[test]
15265    fn fraud_campaigns_deserialize_camelcase_and_is_active() {
15266        let fc: FraudConfig = serde_yaml::from_str(
15267            "enabled: true\ncampaigns:\n  enabled: true\n  count: 2\n  perPeriodCount: 3\n  bookingLegPool: 8\n  rotateEveryPeriods: 2\n  periodDays: 30\n",
15268        )
15269        .unwrap();
15270        assert!(fc.campaigns.is_active());
15271        assert_eq!(fc.campaigns.count, 2);
15272        assert_eq!(fc.campaigns.per_period_count, 3);
15273        assert_eq!(fc.campaigns.booking_leg_pool, 8);
15274        assert_eq!(fc.campaigns.rotate_every_periods, 2);
15275        // A degenerate spec (period_days 0) is not active.
15276        let degenerate = FraudCampaignConfig {
15277            enabled: true,
15278            period_days: 0,
15279            ..FraudCampaignConfig::default()
15280        };
15281        assert!(!degenerate.is_active());
15282    }
15283
15284    #[test]
15285    fn fraud_difficulty_resolves_to_bias_presets() {
15286        // Standard (default) returns the explicit bias unchanged → byte-identical.
15287        let mut fc = FraudConfig::default();
15288        assert_eq!(fc.difficulty, FraudDifficulty::Standard);
15289        assert_eq!(fc.effective_bias().weekend_bias, fc.bias.weekend_bias);
15290        assert!(fc.effective_bias().enabled);
15291        // Adversarial disables bias entirely (residual-faint, §44).
15292        fc.difficulty = FraudDifficulty::Adversarial;
15293        assert!(!fc.effective_bias().enabled);
15294        // Forensic is louder than Subtle on every signature; both stay enabled.
15295        fc.difficulty = FraudDifficulty::Forensic;
15296        let forensic = fc.effective_bias();
15297        fc.difficulty = FraudDifficulty::Subtle;
15298        let subtle = fc.effective_bias();
15299        assert!(forensic.enabled && subtle.enabled);
15300        assert!(forensic.weekend_bias > subtle.weekend_bias);
15301        assert!(forensic.round_dollar_bias > subtle.round_dollar_bias);
15302        assert!(forensic.off_hours_bias > subtle.off_hours_bias);
15303        assert!(forensic.post_close_bias > subtle.post_close_bias);
15304        // snake_case YAML.
15305        let parsed: FraudConfig =
15306            serde_yaml::from_str("enabled: true\ndifficulty: adversarial\n").unwrap();
15307        assert_eq!(parsed.difficulty, FraudDifficulty::Adversarial);
15308    }
15309
15310    // ==========================================================================
15311    // Serialization/Deserialization Tests
15312    // ==========================================================================
15313
15314    #[test]
15315    fn test_config_yaml_roundtrip() {
15316        let config = demo_preset();
15317        let yaml = serde_yaml::to_string(&config).expect("Failed to serialize to YAML");
15318        let deserialized: GeneratorConfig =
15319            serde_yaml::from_str(&yaml).expect("Failed to deserialize from YAML");
15320
15321        assert_eq!(
15322            config.global.period_months,
15323            deserialized.global.period_months
15324        );
15325        assert_eq!(config.global.industry, deserialized.global.industry);
15326        assert_eq!(config.companies.len(), deserialized.companies.len());
15327        assert_eq!(config.companies[0].code, deserialized.companies[0].code);
15328    }
15329
15330    #[test]
15331    fn test_config_json_roundtrip() {
15332        // Create a config without infinity values (JSON can't serialize f64::INFINITY)
15333        let mut config = demo_preset();
15334        // Replace infinity with a large but finite value for JSON compatibility
15335        config.master_data.employees.approval_limits.executive = 1e12;
15336
15337        let json = serde_json::to_string(&config).expect("Failed to serialize to JSON");
15338        let deserialized: GeneratorConfig =
15339            serde_json::from_str(&json).expect("Failed to deserialize from JSON");
15340
15341        assert_eq!(
15342            config.global.period_months,
15343            deserialized.global.period_months
15344        );
15345        assert_eq!(config.global.industry, deserialized.global.industry);
15346        assert_eq!(config.companies.len(), deserialized.companies.len());
15347    }
15348
15349    #[test]
15350    fn test_transaction_volume_serialization() {
15351        // Test various transaction volumes serialize correctly
15352        let volumes = vec![
15353            (TransactionVolume::TenK, "ten_k"),
15354            (TransactionVolume::HundredK, "hundred_k"),
15355            (TransactionVolume::OneM, "one_m"),
15356            (TransactionVolume::TenM, "ten_m"),
15357            (TransactionVolume::HundredM, "hundred_m"),
15358        ];
15359
15360        for (volume, expected_key) in volumes {
15361            let json = serde_json::to_string(&volume).expect("Failed to serialize");
15362            assert!(
15363                json.contains(expected_key),
15364                "Expected {} in JSON: {}",
15365                expected_key,
15366                json
15367            );
15368        }
15369    }
15370
15371    #[test]
15372    fn test_transaction_volume_custom_serialization() {
15373        let volume = TransactionVolume::Custom(12345);
15374        let json = serde_json::to_string(&volume).expect("Failed to serialize");
15375        let deserialized: TransactionVolume =
15376            serde_json::from_str(&json).expect("Failed to deserialize");
15377        assert_eq!(deserialized.count(), 12345);
15378    }
15379
15380    #[test]
15381    fn test_output_mode_serialization() {
15382        let modes = vec![
15383            OutputMode::Streaming,
15384            OutputMode::FlatFile,
15385            OutputMode::Both,
15386        ];
15387
15388        for mode in modes {
15389            let json = serde_json::to_string(&mode).expect("Failed to serialize");
15390            let deserialized: OutputMode =
15391                serde_json::from_str(&json).expect("Failed to deserialize");
15392            assert!(format!("{:?}", mode) == format!("{:?}", deserialized));
15393        }
15394    }
15395
15396    #[test]
15397    fn test_file_format_serialization() {
15398        let formats = vec![
15399            FileFormat::Csv,
15400            FileFormat::Parquet,
15401            FileFormat::Json,
15402            FileFormat::JsonLines,
15403        ];
15404
15405        for format in formats {
15406            let json = serde_json::to_string(&format).expect("Failed to serialize");
15407            let deserialized: FileFormat =
15408                serde_json::from_str(&json).expect("Failed to deserialize");
15409            assert!(format!("{:?}", format) == format!("{:?}", deserialized));
15410        }
15411    }
15412
15413    #[test]
15414    fn test_compression_algorithm_serialization() {
15415        let algos = vec![
15416            CompressionAlgorithm::Gzip,
15417            CompressionAlgorithm::Zstd,
15418            CompressionAlgorithm::Lz4,
15419            CompressionAlgorithm::Snappy,
15420        ];
15421
15422        for algo in algos {
15423            let json = serde_json::to_string(&algo).expect("Failed to serialize");
15424            let deserialized: CompressionAlgorithm =
15425                serde_json::from_str(&json).expect("Failed to deserialize");
15426            assert!(format!("{:?}", algo) == format!("{:?}", deserialized));
15427        }
15428    }
15429
15430    #[test]
15431    fn test_transfer_pricing_method_serialization() {
15432        let methods = vec![
15433            TransferPricingMethod::CostPlus,
15434            TransferPricingMethod::ComparableUncontrolled,
15435            TransferPricingMethod::ResalePrice,
15436            TransferPricingMethod::TransactionalNetMargin,
15437            TransferPricingMethod::ProfitSplit,
15438        ];
15439
15440        for method in methods {
15441            let json = serde_json::to_string(&method).expect("Failed to serialize");
15442            let deserialized: TransferPricingMethod =
15443                serde_json::from_str(&json).expect("Failed to deserialize");
15444            assert!(format!("{:?}", method) == format!("{:?}", deserialized));
15445        }
15446    }
15447
15448    #[test]
15449    fn test_benford_exemption_serialization() {
15450        let exemptions = vec![
15451            BenfordExemption::Recurring,
15452            BenfordExemption::Payroll,
15453            BenfordExemption::FixedFees,
15454            BenfordExemption::RoundAmounts,
15455        ];
15456
15457        for exemption in exemptions {
15458            let json = serde_json::to_string(&exemption).expect("Failed to serialize");
15459            let deserialized: BenfordExemption =
15460                serde_json::from_str(&json).expect("Failed to deserialize");
15461            assert!(format!("{:?}", exemption) == format!("{:?}", deserialized));
15462        }
15463    }
15464
15465    // ==========================================================================
15466    // Default Value Tests
15467    // ==========================================================================
15468
15469    #[test]
15470    fn test_global_config_defaults() {
15471        let yaml = r#"
15472            industry: manufacturing
15473            start_date: "2024-01-01"
15474            period_months: 6
15475        "#;
15476        let config: GlobalConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15477        assert_eq!(config.group_currency, "USD");
15478        assert!(config.parallel);
15479        assert_eq!(config.worker_threads, 0);
15480        assert_eq!(config.memory_limit_mb, 0);
15481    }
15482
15483    #[test]
15484    fn test_fraud_config_defaults() {
15485        let config = FraudConfig::default();
15486        assert!(!config.enabled);
15487        assert_eq!(config.fraud_rate, 0.005);
15488        assert!(!config.clustering_enabled);
15489    }
15490
15491    #[test]
15492    fn test_internal_controls_config_defaults() {
15493        let config = InternalControlsConfig::default();
15494        assert!(!config.enabled);
15495        assert_eq!(config.exception_rate, 0.02);
15496        assert_eq!(config.sod_violation_rate, 0.01);
15497        assert!(config.export_control_master_data);
15498        assert_eq!(config.sox_materiality_threshold, 10000.0);
15499        // COSO fields
15500        assert!(config.coso_enabled);
15501        assert!(!config.include_entity_level_controls);
15502        assert_eq!(config.target_maturity_level, "mixed");
15503    }
15504
15505    #[test]
15506    fn test_output_config_defaults() {
15507        let config = OutputConfig::default();
15508        assert!(matches!(config.mode, OutputMode::FlatFile));
15509        assert_eq!(config.formats, vec![FileFormat::Parquet]);
15510        assert!(config.compression.enabled);
15511        assert!(matches!(
15512            config.compression.algorithm,
15513            CompressionAlgorithm::Zstd
15514        ));
15515        assert!(config.include_acdoca);
15516        assert!(!config.include_bseg);
15517        assert!(config.partition_by_period);
15518        assert!(!config.partition_by_company);
15519    }
15520
15521    #[test]
15522    fn test_approval_config_defaults() {
15523        let config = ApprovalConfig::default();
15524        assert!(!config.enabled);
15525        assert_eq!(config.auto_approve_threshold, 1000.0);
15526        assert_eq!(config.rejection_rate, 0.02);
15527        assert_eq!(config.revision_rate, 0.05);
15528        assert_eq!(config.average_approval_delay_hours, 4.0);
15529        assert_eq!(config.thresholds.len(), 4);
15530    }
15531
15532    #[test]
15533    fn test_p2p_flow_config_defaults() {
15534        let config = P2PFlowConfig::default();
15535        assert!(config.enabled);
15536        assert_eq!(config.three_way_match_rate, 0.95);
15537        assert_eq!(config.partial_delivery_rate, 0.15);
15538        assert_eq!(config.average_po_to_gr_days, 14);
15539    }
15540
15541    #[test]
15542    fn test_o2c_flow_config_defaults() {
15543        let config = O2CFlowConfig::default();
15544        assert!(config.enabled);
15545        assert_eq!(config.credit_check_failure_rate, 0.02);
15546        assert_eq!(config.return_rate, 0.03);
15547        assert_eq!(config.bad_debt_rate, 0.01);
15548    }
15549
15550    #[test]
15551    fn test_balance_config_defaults() {
15552        let config = BalanceConfig::default();
15553        assert!(!config.generate_opening_balances);
15554        assert!(config.generate_trial_balances);
15555        assert_eq!(config.target_gross_margin, 0.35);
15556        assert!(config.validate_balance_equation);
15557        assert!(config.reconcile_subledgers);
15558    }
15559
15560    // ==========================================================================
15561    // Partial Config Deserialization Tests
15562    // ==========================================================================
15563
15564    #[test]
15565    fn test_partial_config_with_defaults() {
15566        // Minimal config that should use all defaults
15567        let yaml = r#"
15568            global:
15569              industry: manufacturing
15570              start_date: "2024-01-01"
15571              period_months: 3
15572            companies:
15573              - code: "TEST"
15574                name: "Test Company"
15575                currency: "USD"
15576                country: "US"
15577                annual_transaction_volume: ten_k
15578            chart_of_accounts:
15579              complexity: small
15580            output:
15581              output_directory: "./output"
15582        "#;
15583
15584        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15585        assert_eq!(config.global.period_months, 3);
15586        assert_eq!(config.companies.len(), 1);
15587        assert!(!config.fraud.enabled); // Default
15588        assert!(!config.internal_controls.enabled); // Default
15589    }
15590
15591    #[test]
15592    fn test_config_with_fraud_enabled() {
15593        let yaml = r#"
15594            global:
15595              industry: retail
15596              start_date: "2024-01-01"
15597              period_months: 12
15598            companies:
15599              - code: "RETAIL"
15600                name: "Retail Co"
15601                currency: "USD"
15602                country: "US"
15603                annual_transaction_volume: hundred_k
15604            chart_of_accounts:
15605              complexity: medium
15606            output:
15607              output_directory: "./output"
15608            fraud:
15609              enabled: true
15610              fraud_rate: 0.05
15611              clustering_enabled: true
15612        "#;
15613
15614        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15615        assert!(config.fraud.enabled);
15616        assert_eq!(config.fraud.fraud_rate, 0.05);
15617        assert!(config.fraud.clustering_enabled);
15618    }
15619
15620    #[test]
15621    fn test_config_with_multiple_companies() {
15622        let yaml = r#"
15623            global:
15624              industry: manufacturing
15625              start_date: "2024-01-01"
15626              period_months: 6
15627            companies:
15628              - code: "HQ"
15629                name: "Headquarters"
15630                currency: "USD"
15631                country: "US"
15632                annual_transaction_volume: hundred_k
15633                volume_weight: 1.0
15634              - code: "EU"
15635                name: "European Subsidiary"
15636                currency: "EUR"
15637                country: "DE"
15638                annual_transaction_volume: hundred_k
15639                volume_weight: 0.5
15640              - code: "APAC"
15641                name: "Asia Pacific"
15642                currency: "JPY"
15643                country: "JP"
15644                annual_transaction_volume: ten_k
15645                volume_weight: 0.3
15646            chart_of_accounts:
15647              complexity: large
15648            output:
15649              output_directory: "./output"
15650        "#;
15651
15652        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15653        assert_eq!(config.companies.len(), 3);
15654        assert_eq!(config.companies[0].code, "HQ");
15655        assert_eq!(config.companies[1].currency, "EUR");
15656        assert_eq!(config.companies[2].volume_weight, 0.3);
15657    }
15658
15659    #[test]
15660    fn test_intercompany_config() {
15661        let yaml = r#"
15662            enabled: true
15663            ic_transaction_rate: 0.20
15664            transfer_pricing_method: cost_plus
15665            markup_percent: 0.08
15666            generate_matched_pairs: true
15667            generate_eliminations: true
15668        "#;
15669
15670        let config: IntercompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15671        assert!(config.enabled);
15672        assert_eq!(config.ic_transaction_rate, 0.20);
15673        assert!(matches!(
15674            config.transfer_pricing_method,
15675            TransferPricingMethod::CostPlus
15676        ));
15677        assert_eq!(config.markup_percent, 0.08);
15678        assert!(config.generate_eliminations);
15679    }
15680
15681    // ==========================================================================
15682    // Company Config Tests
15683    // ==========================================================================
15684
15685    #[test]
15686    fn test_company_config_defaults() {
15687        let yaml = r#"
15688            code: "TEST"
15689            name: "Test Company"
15690            currency: "USD"
15691            country: "US"
15692            annual_transaction_volume: ten_k
15693        "#;
15694
15695        let config: CompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15696        assert_eq!(config.fiscal_year_variant, "K4"); // Default
15697        assert_eq!(config.volume_weight, 1.0); // Default
15698    }
15699
15700    // ==========================================================================
15701    // Chart of Accounts Config Tests
15702    // ==========================================================================
15703
15704    #[test]
15705    fn test_coa_config_defaults() {
15706        let yaml = r#"
15707            complexity: medium
15708        "#;
15709
15710        let config: ChartOfAccountsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15711        assert!(config.industry_specific); // Default true
15712        assert!(config.custom_accounts.is_none());
15713        assert_eq!(config.min_hierarchy_depth, 2); // Default
15714        assert_eq!(config.max_hierarchy_depth, 5); // Default
15715    }
15716
15717    // ==========================================================================
15718    // Accounting Standards Config Tests
15719    // ==========================================================================
15720
15721    #[test]
15722    fn test_accounting_standards_config_defaults() {
15723        let config = AccountingStandardsConfig::default();
15724        assert!(!config.enabled);
15725        assert!(config.framework.is_none());
15726        assert!(!config.revenue_recognition.enabled);
15727        assert!(!config.leases.enabled);
15728        assert!(!config.fair_value.enabled);
15729        assert!(!config.impairment.enabled);
15730        assert!(!config.generate_differences);
15731    }
15732
15733    #[test]
15734    fn test_accounting_standards_config_yaml() {
15735        let yaml = r#"
15736            enabled: true
15737            framework: ifrs
15738            revenue_recognition:
15739              enabled: true
15740              generate_contracts: true
15741              avg_obligations_per_contract: 2.5
15742              variable_consideration_rate: 0.20
15743              over_time_recognition_rate: 0.35
15744              contract_count: 150
15745            leases:
15746              enabled: true
15747              lease_count: 75
15748              finance_lease_percent: 0.25
15749              avg_lease_term_months: 48
15750            generate_differences: true
15751        "#;
15752
15753        let config: AccountingStandardsConfig =
15754            serde_yaml::from_str(yaml).expect("Failed to parse");
15755        assert!(config.enabled);
15756        assert!(matches!(
15757            config.framework,
15758            Some(AccountingFrameworkConfig::Ifrs)
15759        ));
15760        assert!(config.revenue_recognition.enabled);
15761        assert_eq!(config.revenue_recognition.contract_count, 150);
15762        assert_eq!(config.revenue_recognition.avg_obligations_per_contract, 2.5);
15763        assert!(config.leases.enabled);
15764        assert_eq!(config.leases.lease_count, 75);
15765        assert_eq!(config.leases.finance_lease_percent, 0.25);
15766        assert!(config.generate_differences);
15767    }
15768
15769    #[test]
15770    fn test_accounting_framework_serialization() {
15771        let frameworks = [
15772            AccountingFrameworkConfig::UsGaap,
15773            AccountingFrameworkConfig::Ifrs,
15774            AccountingFrameworkConfig::DualReporting,
15775            AccountingFrameworkConfig::FrenchGaap,
15776            AccountingFrameworkConfig::GermanGaap,
15777        ];
15778
15779        for framework in frameworks {
15780            let json = serde_json::to_string(&framework).expect("Failed to serialize");
15781            let deserialized: AccountingFrameworkConfig =
15782                serde_json::from_str(&json).expect("Failed to deserialize");
15783            assert!(format!("{:?}", framework) == format!("{:?}", deserialized));
15784        }
15785    }
15786
15787    #[test]
15788    fn test_revenue_recognition_config_defaults() {
15789        let config = RevenueRecognitionConfig::default();
15790        assert!(!config.enabled);
15791        assert!(config.generate_contracts);
15792        assert_eq!(config.avg_obligations_per_contract, 2.0);
15793        assert_eq!(config.variable_consideration_rate, 0.15);
15794        assert_eq!(config.over_time_recognition_rate, 0.30);
15795        assert_eq!(config.contract_count, 100);
15796    }
15797
15798    #[test]
15799    fn test_lease_accounting_config_defaults() {
15800        let config = LeaseAccountingConfig::default();
15801        assert!(!config.enabled);
15802        assert_eq!(config.lease_count, 50);
15803        assert_eq!(config.finance_lease_percent, 0.30);
15804        assert_eq!(config.avg_lease_term_months, 60);
15805        assert!(config.generate_amortization);
15806        assert_eq!(config.real_estate_percent, 0.40);
15807    }
15808
15809    #[test]
15810    fn test_fair_value_config_defaults() {
15811        let config = FairValueConfig::default();
15812        assert!(!config.enabled);
15813        assert_eq!(config.measurement_count, 25);
15814        assert_eq!(config.level1_percent, 0.40);
15815        assert_eq!(config.level2_percent, 0.35);
15816        assert_eq!(config.level3_percent, 0.25);
15817        assert!(!config.include_sensitivity_analysis);
15818    }
15819
15820    #[test]
15821    fn test_impairment_config_defaults() {
15822        let config = ImpairmentConfig::default();
15823        assert!(!config.enabled);
15824        assert_eq!(config.test_count, 15);
15825        assert_eq!(config.impairment_rate, 0.10);
15826        assert!(config.generate_projections);
15827        assert!(!config.include_goodwill);
15828    }
15829
15830    // ==========================================================================
15831    // Audit Standards Config Tests
15832    // ==========================================================================
15833
15834    #[test]
15835    fn test_audit_standards_config_defaults() {
15836        let config = AuditStandardsConfig::default();
15837        assert!(!config.enabled);
15838        assert!(!config.isa_compliance.enabled);
15839        assert!(!config.analytical_procedures.enabled);
15840        assert!(!config.confirmations.enabled);
15841        assert!(!config.opinion.enabled);
15842        assert!(!config.generate_audit_trail);
15843        assert!(!config.sox.enabled);
15844        assert!(!config.pcaob.enabled);
15845    }
15846
15847    #[test]
15848    fn test_audit_standards_config_yaml() {
15849        let yaml = r#"
15850            enabled: true
15851            isa_compliance:
15852              enabled: true
15853              compliance_level: comprehensive
15854              generate_isa_mappings: true
15855              include_pcaob: true
15856              framework: dual
15857            analytical_procedures:
15858              enabled: true
15859              procedures_per_account: 5
15860              variance_probability: 0.25
15861            confirmations:
15862              enabled: true
15863              confirmation_count: 75
15864              positive_response_rate: 0.90
15865              exception_rate: 0.08
15866            opinion:
15867              enabled: true
15868              generate_kam: true
15869              average_kam_count: 4
15870            sox:
15871              enabled: true
15872              generate_302_certifications: true
15873              generate_404_assessments: true
15874              material_weakness_rate: 0.03
15875            pcaob:
15876              enabled: true
15877              is_pcaob_audit: true
15878              include_icfr_opinion: true
15879            generate_audit_trail: true
15880        "#;
15881
15882        let config: AuditStandardsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15883        assert!(config.enabled);
15884        assert!(config.isa_compliance.enabled);
15885        assert_eq!(config.isa_compliance.compliance_level, "comprehensive");
15886        assert!(config.isa_compliance.include_pcaob);
15887        assert_eq!(config.isa_compliance.framework, "dual");
15888        assert!(config.analytical_procedures.enabled);
15889        assert_eq!(config.analytical_procedures.procedures_per_account, 5);
15890        assert!(config.confirmations.enabled);
15891        assert_eq!(config.confirmations.confirmation_count, 75);
15892        assert!(config.opinion.enabled);
15893        assert_eq!(config.opinion.average_kam_count, 4);
15894        assert!(config.sox.enabled);
15895        assert!(config.sox.generate_302_certifications);
15896        assert_eq!(config.sox.material_weakness_rate, 0.03);
15897        assert!(config.pcaob.enabled);
15898        assert!(config.pcaob.is_pcaob_audit);
15899        assert!(config.pcaob.include_icfr_opinion);
15900        assert!(config.generate_audit_trail);
15901    }
15902
15903    #[test]
15904    fn test_isa_compliance_config_defaults() {
15905        let config = IsaComplianceConfig::default();
15906        assert!(!config.enabled);
15907        assert_eq!(config.compliance_level, "standard");
15908        assert!(config.generate_isa_mappings);
15909        assert!(config.generate_coverage_summary);
15910        assert!(!config.include_pcaob);
15911        assert_eq!(config.framework, "isa");
15912    }
15913
15914    #[test]
15915    fn test_sox_compliance_config_defaults() {
15916        let config = SoxComplianceConfig::default();
15917        assert!(!config.enabled);
15918        assert!(config.generate_302_certifications);
15919        assert!(config.generate_404_assessments);
15920        assert_eq!(config.materiality_threshold, 10000.0);
15921        assert_eq!(config.material_weakness_rate, 0.02);
15922        assert_eq!(config.significant_deficiency_rate, 0.08);
15923    }
15924
15925    #[test]
15926    fn test_pcaob_config_defaults() {
15927        let config = PcaobConfig::default();
15928        assert!(!config.enabled);
15929        assert!(!config.is_pcaob_audit);
15930        assert!(config.generate_cam);
15931        assert!(!config.include_icfr_opinion);
15932        assert!(!config.generate_standard_mappings);
15933    }
15934
15935    #[test]
15936    fn test_config_with_standards_enabled() {
15937        let yaml = r#"
15938            global:
15939              industry: financial_services
15940              start_date: "2024-01-01"
15941              period_months: 12
15942            companies:
15943              - code: "BANK"
15944                name: "Test Bank"
15945                currency: "USD"
15946                country: "US"
15947                annual_transaction_volume: hundred_k
15948            chart_of_accounts:
15949              complexity: large
15950            output:
15951              output_directory: "./output"
15952            accounting_standards:
15953              enabled: true
15954              framework: us_gaap
15955              revenue_recognition:
15956                enabled: true
15957              leases:
15958                enabled: true
15959            audit_standards:
15960              enabled: true
15961              isa_compliance:
15962                enabled: true
15963              sox:
15964                enabled: true
15965        "#;
15966
15967        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15968        assert!(config.accounting_standards.enabled);
15969        assert!(matches!(
15970            config.accounting_standards.framework,
15971            Some(AccountingFrameworkConfig::UsGaap)
15972        ));
15973        assert!(config.accounting_standards.revenue_recognition.enabled);
15974        assert!(config.accounting_standards.leases.enabled);
15975        assert!(config.audit_standards.enabled);
15976        assert!(config.audit_standards.isa_compliance.enabled);
15977        assert!(config.audit_standards.sox.enabled);
15978    }
15979
15980    // ==========================================================================
15981    // Industry-Specific Config Tests
15982    // ==========================================================================
15983
15984    #[test]
15985    fn test_industry_specific_config_defaults() {
15986        let config = IndustrySpecificConfig::default();
15987        assert!(!config.enabled);
15988        assert!(!config.manufacturing.enabled);
15989        assert!(!config.retail.enabled);
15990        assert!(!config.healthcare.enabled);
15991        assert!(!config.technology.enabled);
15992        assert!(!config.financial_services.enabled);
15993        assert!(!config.professional_services.enabled);
15994    }
15995
15996    #[test]
15997    fn test_manufacturing_config_defaults() {
15998        let config = ManufacturingConfig::default();
15999        assert!(!config.enabled);
16000        assert_eq!(config.bom_depth, 4);
16001        assert!(!config.just_in_time);
16002        assert_eq!(config.supplier_tiers, 2);
16003        assert_eq!(config.target_yield_rate, 0.97);
16004        assert_eq!(config.scrap_alert_threshold, 0.03);
16005    }
16006
16007    #[test]
16008    fn test_retail_config_defaults() {
16009        let config = RetailConfig::default();
16010        assert!(!config.enabled);
16011        assert_eq!(config.avg_daily_transactions, 500);
16012        assert!(config.loss_prevention);
16013        assert_eq!(config.shrinkage_rate, 0.015);
16014    }
16015
16016    #[test]
16017    fn test_healthcare_config_defaults() {
16018        let config = HealthcareConfig::default();
16019        assert!(!config.enabled);
16020        assert_eq!(config.facility_type, "hospital");
16021        assert_eq!(config.avg_daily_encounters, 150);
16022        assert!(config.compliance.hipaa);
16023        assert!(config.compliance.stark_law);
16024        assert!(config.coding_systems.icd10);
16025        assert!(config.coding_systems.cpt);
16026    }
16027
16028    #[test]
16029    fn test_technology_config_defaults() {
16030        let config = TechnologyConfig::default();
16031        assert!(!config.enabled);
16032        assert_eq!(config.revenue_model, "saas");
16033        assert_eq!(config.subscription_revenue_pct, 0.60);
16034        assert!(config.rd_capitalization.enabled);
16035    }
16036
16037    #[test]
16038    fn test_config_with_industry_specific() {
16039        let yaml = r#"
16040            global:
16041              industry: healthcare
16042              start_date: "2024-01-01"
16043              period_months: 12
16044            companies:
16045              - code: "HOSP"
16046                name: "Test Hospital"
16047                currency: "USD"
16048                country: "US"
16049                annual_transaction_volume: hundred_k
16050            chart_of_accounts:
16051              complexity: medium
16052            output:
16053              output_directory: "./output"
16054            industry_specific:
16055              enabled: true
16056              healthcare:
16057                enabled: true
16058                facility_type: hospital
16059                payer_mix:
16060                  medicare: 0.45
16061                  medicaid: 0.15
16062                  commercial: 0.35
16063                  self_pay: 0.05
16064                coding_systems:
16065                  icd10: true
16066                  cpt: true
16067                  drg: true
16068                compliance:
16069                  hipaa: true
16070                  stark_law: true
16071                anomaly_rates:
16072                  upcoding: 0.03
16073                  unbundling: 0.02
16074        "#;
16075
16076        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
16077        assert!(config.industry_specific.enabled);
16078        assert!(config.industry_specific.healthcare.enabled);
16079        assert_eq!(
16080            config.industry_specific.healthcare.facility_type,
16081            "hospital"
16082        );
16083        assert_eq!(config.industry_specific.healthcare.payer_mix.medicare, 0.45);
16084        assert_eq!(config.industry_specific.healthcare.payer_mix.self_pay, 0.05);
16085        assert!(config.industry_specific.healthcare.coding_systems.icd10);
16086        assert!(config.industry_specific.healthcare.compliance.hipaa);
16087        assert_eq!(
16088            config.industry_specific.healthcare.anomaly_rates.upcoding,
16089            0.03
16090        );
16091    }
16092
16093    #[test]
16094    fn test_config_with_manufacturing_specific() {
16095        let yaml = r#"
16096            global:
16097              industry: manufacturing
16098              start_date: "2024-01-01"
16099              period_months: 12
16100            companies:
16101              - code: "MFG"
16102                name: "Test Manufacturing"
16103                currency: "USD"
16104                country: "US"
16105                annual_transaction_volume: hundred_k
16106            chart_of_accounts:
16107              complexity: medium
16108            output:
16109              output_directory: "./output"
16110            industry_specific:
16111              enabled: true
16112              manufacturing:
16113                enabled: true
16114                bom_depth: 5
16115                just_in_time: true
16116                supplier_tiers: 3
16117                target_yield_rate: 0.98
16118                anomaly_rates:
16119                  yield_manipulation: 0.02
16120                  phantom_production: 0.01
16121        "#;
16122
16123        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
16124        assert!(config.industry_specific.enabled);
16125        assert!(config.industry_specific.manufacturing.enabled);
16126        assert_eq!(config.industry_specific.manufacturing.bom_depth, 5);
16127        assert!(config.industry_specific.manufacturing.just_in_time);
16128        assert_eq!(config.industry_specific.manufacturing.supplier_tiers, 3);
16129        assert_eq!(
16130            config.industry_specific.manufacturing.target_yield_rate,
16131            0.98
16132        );
16133        assert_eq!(
16134            config
16135                .industry_specific
16136                .manufacturing
16137                .anomaly_rates
16138                .yield_manipulation,
16139            0.02
16140        );
16141    }
16142
16143    // ==========================================================================
16144    // Tax Configuration Tests
16145    // ==========================================================================
16146
16147    #[test]
16148    fn test_tax_config_defaults() {
16149        let tax = TaxConfig::default();
16150        assert!(!tax.enabled);
16151        assert!(tax.jurisdictions.countries.is_empty());
16152        assert!(!tax.jurisdictions.include_subnational);
16153        assert!(!tax.vat_gst.enabled);
16154        assert!(tax.vat_gst.standard_rates.is_empty());
16155        assert!(tax.vat_gst.reduced_rates.is_empty());
16156        assert!(tax.vat_gst.exempt_categories.is_empty());
16157        assert!(tax.vat_gst.reverse_charge);
16158        assert!(!tax.sales_tax.enabled);
16159        assert!(tax.sales_tax.nexus_states.is_empty());
16160        assert!(!tax.withholding.enabled);
16161        assert!(tax.withholding.treaty_network);
16162        assert_eq!(tax.withholding.default_rate, 0.30);
16163        assert_eq!(tax.withholding.treaty_reduced_rate, 0.15);
16164        assert!(tax.provisions.enabled);
16165        assert_eq!(tax.provisions.statutory_rate, 0.21);
16166        assert!(tax.provisions.uncertain_positions);
16167        assert!(!tax.payroll_tax.enabled);
16168        assert_eq!(tax.anomaly_rate, 0.03);
16169    }
16170
16171    #[test]
16172    fn test_tax_config_from_yaml() {
16173        let yaml = r#"
16174            global:
16175              seed: 42
16176              start_date: "2024-01-01"
16177              period_months: 12
16178              industry: retail
16179            companies:
16180              - code: C001
16181                name: Test Corp
16182                currency: USD
16183                country: US
16184                annual_transaction_volume: ten_k
16185            chart_of_accounts:
16186              complexity: small
16187            output:
16188              output_directory: ./output
16189            tax:
16190              enabled: true
16191              anomaly_rate: 0.05
16192              jurisdictions:
16193                countries: ["US", "DE", "GB"]
16194                include_subnational: true
16195              vat_gst:
16196                enabled: true
16197                standard_rates:
16198                  DE: 0.19
16199                  GB: 0.20
16200                reduced_rates:
16201                  DE: 0.07
16202                  GB: 0.05
16203                exempt_categories:
16204                  - financial_services
16205                  - healthcare
16206                reverse_charge: false
16207              sales_tax:
16208                enabled: true
16209                nexus_states: ["CA", "NY", "TX"]
16210              withholding:
16211                enabled: true
16212                treaty_network: false
16213                default_rate: 0.25
16214                treaty_reduced_rate: 0.10
16215              provisions:
16216                enabled: false
16217                statutory_rate: 0.28
16218                uncertain_positions: false
16219              payroll_tax:
16220                enabled: true
16221        "#;
16222
16223        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
16224        assert!(config.tax.enabled);
16225        assert_eq!(config.tax.anomaly_rate, 0.05);
16226
16227        // Jurisdictions
16228        assert_eq!(config.tax.jurisdictions.countries.len(), 3);
16229        assert!(config
16230            .tax
16231            .jurisdictions
16232            .countries
16233            .contains(&"DE".to_string()));
16234        assert!(config.tax.jurisdictions.include_subnational);
16235
16236        // VAT/GST
16237        assert!(config.tax.vat_gst.enabled);
16238        assert_eq!(config.tax.vat_gst.standard_rates.get("DE"), Some(&0.19));
16239        assert_eq!(config.tax.vat_gst.standard_rates.get("GB"), Some(&0.20));
16240        assert_eq!(config.tax.vat_gst.reduced_rates.get("DE"), Some(&0.07));
16241        assert_eq!(config.tax.vat_gst.exempt_categories.len(), 2);
16242        assert!(!config.tax.vat_gst.reverse_charge);
16243
16244        // Sales tax
16245        assert!(config.tax.sales_tax.enabled);
16246        assert_eq!(config.tax.sales_tax.nexus_states.len(), 3);
16247        assert!(config
16248            .tax
16249            .sales_tax
16250            .nexus_states
16251            .contains(&"CA".to_string()));
16252
16253        // Withholding
16254        assert!(config.tax.withholding.enabled);
16255        assert!(!config.tax.withholding.treaty_network);
16256        assert_eq!(config.tax.withholding.default_rate, 0.25);
16257        assert_eq!(config.tax.withholding.treaty_reduced_rate, 0.10);
16258
16259        // Provisions
16260        assert!(!config.tax.provisions.enabled);
16261        assert_eq!(config.tax.provisions.statutory_rate, 0.28);
16262        assert!(!config.tax.provisions.uncertain_positions);
16263
16264        // Payroll tax
16265        assert!(config.tax.payroll_tax.enabled);
16266    }
16267
16268    #[test]
16269    fn test_generator_config_with_tax_default() {
16270        let yaml = r#"
16271            global:
16272              seed: 42
16273              start_date: "2024-01-01"
16274              period_months: 12
16275              industry: retail
16276            companies:
16277              - code: C001
16278                name: Test Corp
16279                currency: USD
16280                country: US
16281                annual_transaction_volume: ten_k
16282            chart_of_accounts:
16283              complexity: small
16284            output:
16285              output_directory: ./output
16286        "#;
16287
16288        let config: GeneratorConfig =
16289            serde_yaml::from_str(yaml).expect("Failed to parse config without tax section");
16290        // Tax should be present with defaults when not specified in YAML
16291        assert!(!config.tax.enabled);
16292        assert!(config.tax.jurisdictions.countries.is_empty());
16293        assert_eq!(config.tax.anomaly_rate, 0.03);
16294        assert!(config.tax.provisions.enabled); // provisions default to enabled=true
16295        assert_eq!(config.tax.provisions.statutory_rate, 0.21);
16296    }
16297
16298    // ==========================================================================
16299    // SessionSchemaConfig Tests
16300    // ==========================================================================
16301
16302    #[test]
16303    fn test_session_config_default_disabled() {
16304        let yaml = "{}";
16305        let config: SessionSchemaConfig =
16306            serde_yaml::from_str(yaml).expect("Failed to parse empty session config");
16307        assert!(!config.enabled);
16308        assert!(config.checkpoint_path.is_none());
16309        assert!(config.per_period_output);
16310        assert!(config.consolidated_output);
16311    }
16312
16313    #[test]
16314    fn test_config_backward_compatible_without_session() {
16315        let yaml = r#"
16316            global:
16317              seed: 42
16318              start_date: "2024-01-01"
16319              period_months: 12
16320              industry: retail
16321            companies:
16322              - code: C001
16323                name: Test Corp
16324                currency: USD
16325                country: US
16326                annual_transaction_volume: ten_k
16327            chart_of_accounts:
16328              complexity: small
16329            output:
16330              output_directory: ./output
16331        "#;
16332
16333        let config: GeneratorConfig =
16334            serde_yaml::from_str(yaml).expect("Failed to parse config without session");
16335        // Session should default to disabled
16336        assert!(!config.session.enabled);
16337        assert!(config.session.per_period_output);
16338        assert!(config.session.consolidated_output);
16339        // fiscal_year_months should be None
16340        assert!(config.global.fiscal_year_months.is_none());
16341    }
16342
16343    #[test]
16344    fn test_fiscal_year_months_parsed() {
16345        let yaml = r#"
16346            global:
16347              seed: 42
16348              start_date: "2024-01-01"
16349              period_months: 24
16350              industry: retail
16351              fiscal_year_months: 12
16352            companies:
16353              - code: C001
16354                name: Test Corp
16355                currency: USD
16356                country: US
16357                annual_transaction_volume: ten_k
16358            chart_of_accounts:
16359              complexity: small
16360            output:
16361              output_directory: ./output
16362            session:
16363              enabled: true
16364              checkpoint_path: /tmp/checkpoints
16365              per_period_output: true
16366              consolidated_output: false
16367        "#;
16368
16369        let config: GeneratorConfig =
16370            serde_yaml::from_str(yaml).expect("Failed to parse config with fiscal_year_months");
16371        assert_eq!(config.global.fiscal_year_months, Some(12));
16372        assert!(config.session.enabled);
16373        assert_eq!(
16374            config.session.checkpoint_path,
16375            Some("/tmp/checkpoints".to_string())
16376        );
16377        assert!(config.session.per_period_output);
16378        assert!(!config.session.consolidated_output);
16379    }
16380
16381    // -----------------------------------------------------------------------
16382    // SP3 — IndustryProfileField / IndustryPriorsConfig tests
16383    // -----------------------------------------------------------------------
16384
16385    #[test]
16386    fn industry_profile_legacy_string_form_parses() {
16387        // Legacy YAML: bare enum variant name.  Must round-trip without changes
16388        // to existing config files.
16389        let yaml = r#"
16390enabled: true
16391industry_profile: retail
16392"#;
16393        let cfg: AdvancedDistributionConfig =
16394            serde_yaml::from_str(yaml).expect("parse legacy industry_profile string");
16395        let profile = cfg.industry_profile.expect("Some");
16396        assert_eq!(profile.profile_type(), IndustryProfileType::Retail);
16397        assert!(profile.priors().is_none());
16398    }
16399
16400    #[test]
16401    fn industry_profile_full_form_with_priors_parses() {
16402        let yaml = r#"
16403enabled: true
16404industry_profile:
16405  name: healthcare
16406  priors:
16407    enabled: true
16408    source: bundled
16409"#;
16410        let cfg: AdvancedDistributionConfig =
16411            serde_yaml::from_str(yaml).expect("parse full industry_profile struct");
16412        let profile = cfg.industry_profile.expect("Some");
16413        assert_eq!(profile.profile_type(), IndustryProfileType::Healthcare);
16414        let priors = profile.priors().expect("priors present");
16415        assert!(priors.enabled);
16416        assert_eq!(priors.source, PriorsSource::Bundled);
16417        assert!(priors.path.is_none());
16418    }
16419
16420    #[test]
16421    fn industry_profile_full_form_without_priors_parses() {
16422        // Struct form with only `name` and no priors block.
16423        let yaml = r#"
16424enabled: true
16425industry_profile:
16426  name: manufacturing
16427"#;
16428        let cfg: AdvancedDistributionConfig =
16429            serde_yaml::from_str(yaml).expect("parse struct without priors");
16430        let profile = cfg.industry_profile.expect("Some");
16431        assert_eq!(profile.profile_type(), IndustryProfileType::Manufacturing);
16432        assert!(profile.priors().is_none());
16433    }
16434
16435    #[test]
16436    fn industry_profile_priors_file_without_path_fails_validation() {
16437        use crate::validation::validate_config;
16438
16439        // Minimal valid config plumbing.
16440        let yaml = r#"
16441global:
16442  seed: 42
16443  start_date: "2024-01-01"
16444  period_months: 1
16445  industry: retail
16446companies:
16447  - code: C001
16448    name: Test Corp
16449    currency: USD
16450    country: US
16451    annual_transaction_volume: ten_k
16452chart_of_accounts:
16453  complexity: small
16454output:
16455  output_directory: ./output
16456distributions:
16457  enabled: true
16458  industry_profile:
16459    name: retail
16460    priors:
16461      enabled: true
16462      source: file
16463"#;
16464        let cfg: GeneratorConfig = serde_yaml::from_str(yaml).expect("serde parse should succeed");
16465        let err = validate_config(&cfg).expect_err("path required when source=file");
16466        let msg = err.to_string();
16467        assert!(
16468            msg.contains("path") || msg.contains("required"),
16469            "unexpected error message: {msg}"
16470        );
16471    }
16472
16473    #[test]
16474    fn industry_profile_priors_file_with_path_passes_validation() {
16475        use crate::validation::validate_config;
16476
16477        let yaml = r#"
16478global:
16479  seed: 42
16480  start_date: "2024-01-01"
16481  period_months: 1
16482  industry: retail
16483companies:
16484  - code: C001
16485    name: Test Corp
16486    currency: USD
16487    country: US
16488    annual_transaction_volume: ten_k
16489chart_of_accounts:
16490  complexity: small
16491output:
16492  output_directory: ./output
16493distributions:
16494  enabled: true
16495  industry_profile:
16496    name: retail
16497    priors:
16498      enabled: true
16499      source: file
16500      path: /tmp/priors.json
16501"#;
16502        let cfg: GeneratorConfig = serde_yaml::from_str(yaml).expect("serde parse should succeed");
16503        validate_config(&cfg).expect("validation should pass with path supplied");
16504    }
16505}