Skip to main content

datasynth_config/
schema.rs

1//! Configuration schema for synthetic data generation.
2
3use datasynth_core::distributions::{
4    AmountDistributionConfig, DebitCreditDistributionConfig, EvenOddDistributionConfig,
5    LineItemDistributionConfig, SeasonalityConfig,
6};
7use datasynth_core::models::{CoAComplexity, ExpectationDriver, IndustrySector};
8use serde::{Deserialize, Serialize};
9use std::path::PathBuf;
10
11/// Root configuration for the synthetic data generator.
12///
13/// # camelCase alias policy
14///
15/// Every multi-word field carries `#[serde(alias = "camelCaseName")]`
16/// so SDK clients that follow JSON conventions can submit configs
17/// without round-tripping through a snake_case transformer.
18///
19/// Before v4.4.1 several fields — `documentFlows`, `accountingStandards`,
20/// `complianceRegulations`, `analyticsMetadata` — had no alias, so SDK
21/// submissions silently fell through to defaults. The symptom was
22/// "enabling the 6 feature subsections together collapses the archive
23/// from 99 files to 19". Root cause: those four fields never parsed;
24/// the orchestrator produced far less data than requested, and
25/// `output.exportFormat` similarly fell through so journal_entries
26/// landed as the default Parquet/CSV rather than JSON.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct GeneratorConfig {
29    /// Global settings
30    pub global: GlobalConfig,
31    /// Company configuration
32    pub companies: Vec<CompanyConfig>,
33    /// Chart of Accounts configuration
34    #[serde(alias = "chartOfAccounts")]
35    pub chart_of_accounts: ChartOfAccountsConfig,
36    /// Transaction generation settings
37    #[serde(default)]
38    pub transactions: TransactionConfig,
39    /// Output configuration
40    pub output: OutputConfig,
41    /// Fraud simulation settings
42    #[serde(default)]
43    pub fraud: FraudConfig,
44    /// Data quality variation settings
45    #[serde(default, alias = "dataQuality")]
46    pub data_quality: DataQualitySchemaConfig,
47    /// Internal Controls System settings
48    #[serde(default, alias = "internalControls")]
49    pub internal_controls: InternalControlsConfig,
50    /// Business process mix
51    #[serde(default, alias = "businessProcesses")]
52    pub business_processes: BusinessProcessConfig,
53    /// User persona distribution
54    #[serde(default, alias = "userPersonas")]
55    pub user_personas: UserPersonaConfig,
56    /// Template configuration for realistic data
57    #[serde(default)]
58    pub templates: TemplateConfig,
59    /// Approval workflow configuration
60    #[serde(default)]
61    pub approval: ApprovalConfig,
62    /// Department structure configuration
63    #[serde(default)]
64    pub departments: DepartmentConfig,
65    /// Master data generation settings
66    #[serde(default, alias = "masterData")]
67    pub master_data: MasterDataConfig,
68    /// Document flow generation settings
69    #[serde(default, alias = "documentFlows")]
70    pub document_flows: DocumentFlowConfig,
71    /// Intercompany transaction settings
72    #[serde(default)]
73    pub intercompany: IntercompanyConfig,
74    /// Balance and trial balance settings
75    #[serde(default)]
76    pub balance: BalanceConfig,
77    /// OCPM (Object-Centric Process Mining) settings
78    #[serde(default)]
79    pub ocpm: OcpmConfig,
80    /// Audit engagement and workpaper generation settings
81    #[serde(default)]
82    pub audit: AuditGenerationConfig,
83    /// Banking KYC/AML transaction generation settings
84    #[serde(default)]
85    pub banking: datasynth_banking::BankingConfig,
86    /// Scenario configuration for metadata and tagging (Phase 1.3)
87    #[serde(default)]
88    pub scenario: ScenarioConfig,
89    /// Temporal drift configuration for simulating distribution changes over time (Phase 2.2)
90    #[serde(default)]
91    pub temporal: TemporalDriftConfig,
92    /// Graph export configuration for accounting network export
93    #[serde(default, alias = "graphExport")]
94    pub graph_export: GraphExportConfig,
95    /// Streaming output API configuration
96    #[serde(default)]
97    pub streaming: StreamingSchemaConfig,
98    /// Rate limiting configuration
99    #[serde(default, alias = "rateLimit")]
100    pub rate_limit: RateLimitSchemaConfig,
101    /// Temporal attribute generation configuration
102    #[serde(default, alias = "temporalAttributes")]
103    pub temporal_attributes: TemporalAttributeSchemaConfig,
104    /// Relationship generation configuration
105    #[serde(default)]
106    pub relationships: RelationshipSchemaConfig,
107    /// Accounting standards framework configuration (IFRS, US GAAP)
108    #[serde(default, alias = "accountingStandards")]
109    pub accounting_standards: AccountingStandardsConfig,
110    /// Audit standards framework configuration (ISA, PCAOB)
111    #[serde(default, alias = "auditStandards")]
112    pub audit_standards: AuditStandardsConfig,
113    /// Advanced distribution configuration (mixture models, correlations, regime changes)
114    #[serde(default)]
115    pub distributions: AdvancedDistributionConfig,
116    /// Temporal patterns configuration (business days, period-end dynamics, processing lags)
117    #[serde(default, alias = "temporalPatterns")]
118    pub temporal_patterns: TemporalPatternsConfig,
119    /// Vendor network configuration (multi-tier supply chain modeling)
120    #[serde(default, alias = "vendorNetwork")]
121    pub vendor_network: VendorNetworkSchemaConfig,
122    /// Customer segmentation configuration (value segments, lifecycle stages)
123    #[serde(default, alias = "customerSegmentation")]
124    pub customer_segmentation: CustomerSegmentationSchemaConfig,
125    /// Relationship strength calculation configuration
126    #[serde(default, alias = "relationshipStrength")]
127    pub relationship_strength: RelationshipStrengthSchemaConfig,
128    /// Cross-process link configuration (P2P ↔ O2C via inventory)
129    #[serde(default, alias = "crossProcessLinks")]
130    pub cross_process_links: CrossProcessLinksSchemaConfig,
131    /// Organizational events configuration (acquisitions, divestitures, etc.)
132    #[serde(default, alias = "organizationalEvents")]
133    pub organizational_events: OrganizationalEventsSchemaConfig,
134    /// Behavioral drift configuration (vendor, customer, employee behavior)
135    #[serde(default, alias = "behavioralDrift")]
136    pub behavioral_drift: BehavioralDriftSchemaConfig,
137    /// Market drift configuration (economic cycles, commodities, price shocks)
138    #[serde(default, alias = "marketDrift")]
139    pub market_drift: MarketDriftSchemaConfig,
140    /// Drift labeling configuration for ground truth generation
141    #[serde(default, alias = "driftLabeling")]
142    pub drift_labeling: DriftLabelingSchemaConfig,
143    /// Enhanced anomaly injection configuration (multi-stage schemes, correlated injection, near-miss)
144    #[serde(default, alias = "anomalyInjection")]
145    pub anomaly_injection: EnhancedAnomalyConfig,
146    /// Industry-specific transaction and anomaly generation configuration
147    #[serde(default, alias = "industrySpecific")]
148    pub industry_specific: IndustrySpecificConfig,
149    /// Fingerprint privacy configuration for extraction/synthesis
150    #[serde(default, alias = "fingerprintPrivacy")]
151    pub fingerprint_privacy: FingerprintPrivacyConfig,
152    /// Quality gate configuration for pass/fail thresholds
153    #[serde(default, alias = "qualityGates")]
154    pub quality_gates: QualityGatesSchemaConfig,
155    /// Compliance configuration (EU AI Act, content marking)
156    #[serde(default)]
157    pub compliance: ComplianceSchemaConfig,
158    /// Webhook notification configuration
159    #[serde(default)]
160    pub webhooks: WebhookSchemaConfig,
161    /// LLM enrichment configuration (AI-augmented vendor names, descriptions, explanations)
162    #[serde(default)]
163    pub llm: LlmSchemaConfig,
164    /// Diffusion model configuration (statistical diffusion-based data enhancement)
165    #[serde(default)]
166    pub diffusion: DiffusionSchemaConfig,
167    /// Causal generation configuration (structural causal models, interventions)
168    #[serde(default)]
169    pub causal: CausalSchemaConfig,
170
171    // ===== Enterprise Process Chain Extensions =====
172    /// Source-to-Pay (S2C/S2P) configuration (sourcing, contracts, catalogs, scorecards)
173    #[serde(default, alias = "sourceToPay")]
174    pub source_to_pay: SourceToPayConfig,
175    /// Financial reporting configuration (financial statements, KPIs, budgets)
176    #[serde(default, alias = "financialReporting")]
177    pub financial_reporting: FinancialReportingConfig,
178    /// HR process configuration (payroll, time & attendance, expenses)
179    #[serde(default)]
180    pub hr: HrConfig,
181    /// Manufacturing configuration (production orders, WIP, routing)
182    #[serde(default)]
183    pub manufacturing: ManufacturingProcessConfig,
184    /// Sales quote configuration (quote-to-order pipeline)
185    #[serde(default, alias = "salesQuotes")]
186    pub sales_quotes: SalesQuoteConfig,
187    /// Tax accounting configuration (VAT/GST, sales tax, withholding, provisions, payroll tax)
188    #[serde(default)]
189    pub tax: TaxConfig,
190    /// Treasury and cash management configuration
191    #[serde(default)]
192    pub treasury: TreasuryConfig,
193    /// Project accounting configuration
194    #[serde(default, alias = "projectAccounting")]
195    pub project_accounting: ProjectAccountingConfig,
196    /// ESG / Sustainability reporting configuration
197    #[serde(default)]
198    pub esg: EsgConfig,
199    /// Country pack configuration (external packs directory, per-country overrides)
200    #[serde(default, alias = "countryPacks")]
201    pub country_packs: Option<CountryPacksSchemaConfig>,
202    /// Counterfactual simulation scenario configuration
203    #[serde(default)]
204    pub scenarios: ScenariosConfig,
205    /// Generation session configuration (period-by-period generation with balance carry-forward)
206    #[serde(default)]
207    pub session: SessionSchemaConfig,
208    /// Compliance regulations framework configuration (standards registry, jurisdictions, temporal versioning, audit templates, graph integration)
209    #[serde(default, alias = "complianceRegulations")]
210    pub compliance_regulations: ComplianceRegulationsConfig,
211    /// v3.3.0: analytics metadata phase — prior-year comparatives,
212    /// industry benchmarks, management reports, drift events. Off by
213    /// default so v3.2.1 archives are byte-identical.
214    #[serde(default, alias = "analyticsMetadata")]
215    pub analytics_metadata: AnalyticsMetadataConfig,
216    /// Phase 1 of the central concentration abstraction (#143). Post-generation
217    /// passes over the JE batch that reshape distributional structure toward a
218    /// corpus-derived target. Off by default — see
219    /// `docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md`.
220    #[serde(default)]
221    pub concentration: ConcentrationConfig,
222}
223
224/// v3.3.0: analytics-metadata phase configuration.
225///
226/// Gates the `phase_analytics_metadata` pass that runs AFTER all
227/// JE-adding phases (including the fraud-bias sweep at Phase 20b).
228/// When enabled, the orchestrator calls `PriorYearGenerator`,
229/// `IndustryBenchmarkGenerator`, `ManagementReportGenerator`, and
230/// `DriftEventGenerator` in sequence; each sub-flag below controls
231/// whether that specific generator fires.
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct AnalyticsMetadataConfig {
234    /// Master switch for the whole analytics phase.
235    #[serde(default)]
236    pub enabled: bool,
237    /// Emit `PriorYearComparative` records derived from current
238    /// period's account balances.
239    #[serde(default = "default_true")]
240    pub prior_year: bool,
241    /// Emit `IndustryBenchmark` records for the configured industry.
242    #[serde(default = "default_true")]
243    pub industry_benchmark: bool,
244    /// Emit management-report artefacts.
245    #[serde(default = "default_true")]
246    pub management_reports: bool,
247    /// Emit `LabeledDriftEvent` records — post-generation sweep over
248    /// journal entries to label detected drift patterns.
249    #[serde(default = "default_true")]
250    pub drift_events: bool,
251}
252
253impl Default for AnalyticsMetadataConfig {
254    fn default() -> Self {
255        Self {
256            enabled: false,
257            prior_year: true,
258            industry_benchmark: true,
259            management_reports: true,
260            drift_events: true,
261        }
262    }
263}
264
265/// LLM enrichment configuration.
266///
267/// Controls AI-augmented metadata enrichment using LLM providers.
268/// When enabled, vendor names, transaction descriptions, and anomaly explanations
269/// are enriched using the configured provider (mock by default).
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct LlmSchemaConfig {
272    /// Whether LLM enrichment is enabled.
273    #[serde(default)]
274    pub enabled: bool,
275    /// Provider type: "mock", "openai", "anthropic", "custom".
276    #[serde(default = "default_llm_provider")]
277    pub provider: String,
278    /// Model name/ID for the provider.
279    #[serde(default = "default_llm_model_name")]
280    pub model: String,
281    /// Maximum number of vendor names to enrich per run.
282    #[serde(default = "default_llm_batch_size")]
283    pub max_vendor_enrichments: usize,
284
285    /// v4.1.1+: also enrich customer names at generate time.
286    /// Default `false` preserves v4.1.0 behaviour.
287    #[serde(default)]
288    pub enrich_customers: bool,
289
290    /// v4.1.1+: also enrich material descriptions at generate time.
291    /// Default `false`.
292    #[serde(default)]
293    pub enrich_materials: bool,
294
295    /// v4.1.1+: also enrich audit finding titles at generate time
296    /// (the finding narratives remain on their existing template path
297    /// because they're richer and locale-specific). Default `false`.
298    #[serde(default)]
299    pub enrich_findings: bool,
300
301    /// v4.1.1+: upper bound on customer enrichments per run. Matches
302    /// `max_vendor_enrichments` semantics.
303    #[serde(default = "default_llm_batch_size")]
304    pub max_customer_enrichments: usize,
305
306    /// v4.1.1+: upper bound on material enrichments per run.
307    #[serde(default = "default_llm_batch_size")]
308    pub max_material_enrichments: usize,
309
310    /// v4.1.1+: upper bound on finding enrichments per run.
311    #[serde(default = "default_llm_batch_size")]
312    pub max_finding_enrichments: usize,
313}
314
315fn default_llm_provider() -> String {
316    "mock".to_string()
317}
318
319fn default_llm_model_name() -> String {
320    "gpt-4o-mini".to_string()
321}
322
323fn default_llm_batch_size() -> usize {
324    50
325}
326
327impl Default for LlmSchemaConfig {
328    fn default() -> Self {
329        Self {
330            enabled: false,
331            provider: default_llm_provider(),
332            model: default_llm_model_name(),
333            max_vendor_enrichments: default_llm_batch_size(),
334            enrich_customers: false,
335            enrich_materials: false,
336            enrich_findings: false,
337            max_customer_enrichments: default_llm_batch_size(),
338            max_material_enrichments: default_llm_batch_size(),
339            max_finding_enrichments: default_llm_batch_size(),
340        }
341    }
342}
343
344/// Diffusion model configuration.
345///
346/// Controls statistical diffusion-based data enhancement that generates samples
347/// matching target distribution properties (means, standard deviations, correlations).
348#[derive(Debug, Clone, Serialize, Deserialize)]
349pub struct DiffusionSchemaConfig {
350    /// Whether diffusion enhancement is enabled.
351    #[serde(default)]
352    pub enabled: bool,
353    /// Number of diffusion steps (higher = better quality, slower).
354    #[serde(default = "default_diffusion_steps")]
355    pub n_steps: usize,
356    /// Noise schedule type: "linear", "cosine", "sigmoid".
357    #[serde(default = "default_diffusion_schedule")]
358    pub schedule: String,
359    /// Number of sample rows to generate for demonstration.
360    #[serde(default = "default_diffusion_sample_size")]
361    pub sample_size: usize,
362    /// Backend type: "statistical" (default), "neural", "hybrid".
363    #[serde(default = "default_diffusion_backend")]
364    pub backend: String,
365    /// Neural diffusion backend configuration (used when backend is "neural" or "hybrid").
366    #[serde(default)]
367    pub neural: NeuralDiffusionSchemaConfig,
368}
369
370fn default_diffusion_steps() -> usize {
371    100
372}
373
374fn default_diffusion_schedule() -> String {
375    "linear".to_string()
376}
377
378fn default_diffusion_sample_size() -> usize {
379    100
380}
381
382fn default_diffusion_backend() -> String {
383    "statistical".to_string()
384}
385
386impl Default for DiffusionSchemaConfig {
387    fn default() -> Self {
388        Self {
389            enabled: false,
390            n_steps: default_diffusion_steps(),
391            schedule: default_diffusion_schedule(),
392            sample_size: default_diffusion_sample_size(),
393            backend: default_diffusion_backend(),
394            neural: NeuralDiffusionSchemaConfig::default(),
395        }
396    }
397}
398
399/// Neural diffusion backend configuration.
400///
401/// Controls the `candle`-based neural score network that learns joint distributions
402/// from training data for the neural and hybrid diffusion backends.
403#[derive(Debug, Clone, Serialize, Deserialize)]
404pub struct NeuralDiffusionSchemaConfig {
405    /// Hidden layer dimensions for the score network MLP.
406    #[serde(default = "default_neural_hidden_dims")]
407    pub hidden_dims: Vec<usize>,
408    /// Dimensionality of the timestep embedding.
409    #[serde(default = "default_neural_timestep_embed_dim")]
410    pub timestep_embed_dim: usize,
411    /// Learning rate for training.
412    #[serde(default = "default_neural_learning_rate")]
413    pub learning_rate: f64,
414    /// Number of training epochs.
415    #[serde(default = "default_neural_training_epochs")]
416    pub training_epochs: usize,
417    /// Training batch size.
418    #[serde(default = "default_neural_batch_size")]
419    pub batch_size: usize,
420    /// Blend weight for hybrid mode (0.0 = all statistical, 1.0 = all neural).
421    #[serde(default = "default_neural_hybrid_weight")]
422    pub hybrid_weight: f64,
423    /// Hybrid blending strategy: "weighted_average", "column_select", "threshold".
424    #[serde(default = "default_neural_hybrid_strategy")]
425    pub hybrid_strategy: String,
426    /// Columns to apply neural generation to (empty = all numeric columns).
427    #[serde(default)]
428    pub neural_columns: Vec<String>,
429    /// v4.4.0+ Optional path to a pre-trained score-network checkpoint
430    /// (`.safetensors`). When set, the orchestrator loads the
431    /// checkpoint instead of training from the first batch — useful
432    /// for long-running production deployments where training cost
433    /// dominates per-run cost. When empty, the orchestrator trains
434    /// on the first generated JE amounts.
435    #[serde(default, skip_serializing_if = "Option::is_none")]
436    pub checkpoint_path: Option<String>,
437}
438
439fn default_neural_hidden_dims() -> Vec<usize> {
440    vec![256, 256, 128]
441}
442
443fn default_neural_timestep_embed_dim() -> usize {
444    64
445}
446
447fn default_neural_learning_rate() -> f64 {
448    0.001
449}
450
451fn default_neural_training_epochs() -> usize {
452    100
453}
454
455fn default_neural_batch_size() -> usize {
456    64
457}
458
459fn default_neural_hybrid_weight() -> f64 {
460    0.5
461}
462
463fn default_neural_hybrid_strategy() -> String {
464    "weighted_average".to_string()
465}
466
467impl Default for NeuralDiffusionSchemaConfig {
468    fn default() -> Self {
469        Self {
470            hidden_dims: default_neural_hidden_dims(),
471            timestep_embed_dim: default_neural_timestep_embed_dim(),
472            learning_rate: default_neural_learning_rate(),
473            training_epochs: default_neural_training_epochs(),
474            batch_size: default_neural_batch_size(),
475            hybrid_weight: default_neural_hybrid_weight(),
476            hybrid_strategy: default_neural_hybrid_strategy(),
477            neural_columns: Vec::new(),
478            checkpoint_path: None,
479        }
480    }
481}
482
483/// Causal generation configuration.
484///
485/// Controls structural causal model (SCM) based data generation that respects
486/// causal relationships between variables, supports do-calculus interventions,
487/// and enables counterfactual scenarios.
488#[derive(Debug, Clone, Serialize, Deserialize)]
489pub struct CausalSchemaConfig {
490    /// Whether causal generation is enabled.
491    #[serde(default)]
492    pub enabled: bool,
493    /// Built-in template to use: "fraud_detection", "revenue_cycle", or "custom".
494    #[serde(default = "default_causal_template")]
495    pub template: String,
496    /// Number of causal samples to generate.
497    #[serde(default = "default_causal_sample_size")]
498    pub sample_size: usize,
499    /// Whether to run causal validation on the output.
500    #[serde(default = "default_true")]
501    pub validate: bool,
502}
503
504fn default_causal_template() -> String {
505    "fraud_detection".to_string()
506}
507
508fn default_causal_sample_size() -> usize {
509    500
510}
511
512impl Default for CausalSchemaConfig {
513    fn default() -> Self {
514        Self {
515            enabled: false,
516            template: default_causal_template(),
517            sample_size: default_causal_sample_size(),
518            validate: true,
519        }
520    }
521}
522
523/// Graph export configuration for accounting network and ML training exports.
524///
525/// This section enables exporting generated data as graphs for:
526/// - Network reconstruction algorithms
527/// - Graph neural network training
528/// - Neo4j graph database import
529#[derive(Debug, Clone, Serialize, Deserialize)]
530pub struct GraphExportConfig {
531    /// Enable graph export.
532    #[serde(default)]
533    pub enabled: bool,
534
535    /// Graph types to generate.
536    #[serde(default = "default_graph_types")]
537    pub graph_types: Vec<GraphTypeConfig>,
538
539    /// Export formats to generate.
540    #[serde(default = "default_graph_formats")]
541    pub formats: Vec<GraphExportFormat>,
542
543    /// Train split ratio for ML datasets.
544    #[serde(default = "default_train_ratio")]
545    pub train_ratio: f64,
546
547    /// Validation split ratio for ML datasets.
548    #[serde(default = "default_val_ratio")]
549    pub validation_ratio: f64,
550
551    /// Random seed for train/val/test splits.
552    #[serde(default)]
553    pub split_seed: Option<u64>,
554
555    /// Output subdirectory for graph exports (relative to output directory).
556    #[serde(default = "default_graph_subdir")]
557    pub output_subdirectory: String,
558
559    /// Multi-layer hypergraph export settings for RustGraph integration.
560    #[serde(default)]
561    pub hypergraph: HypergraphExportSettings,
562
563    /// DGL-specific export settings.
564    #[serde(default)]
565    pub dgl: DglExportConfig,
566
567    /// `graphs/je_network.csv` flat edge-list export settings (v5.8.0+).
568    #[serde(default)]
569    pub je_network: JeNetworkConfig,
570}
571
572/// Method used to construct edges from journal entries when writing
573/// `graphs/je_network.csv` (v5.8.0+).
574///
575/// Reference: Ivertowski (2024), *Hardware Accelerated Method for
576/// Accounting Network Generation*, Methods A through E.
577#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
578#[serde(rename_all = "snake_case")]
579pub enum JeNetworkMethod {
580    /// Method B (full Cartesian product) for every JE — bijective on
581    /// 2-line entries (Method A) and `n × m` Cartesian for multi-line
582    /// entries with proportional amount allocation.  Produces
583    /// O(n × m) edges per JE — a 50-debit / 50-credit period-close
584    /// consolidation alone yields 2 500 edges, and a typical
585    /// HF-scale 1 M-line config can blow up to 200 M+ edges (and tens
586    /// of GB of memory). Use explicitly when downstream consumers
587    /// already depend on the Cartesian shape.
588    Cartesian,
589    /// Method A only — emit a single edge per 2-line journal entry
590    /// (1 debit + 1 credit) and skip multi-line entries entirely.
591    /// Edge count = number of 2-line JEs (≈ 60 % of entries per the
592    /// 2024 paper); per-edge confidence is exactly `1.0`.
593    ///
594    /// **Default since v5.27** (previously `Cartesian`). The Cartesian
595    /// default OOM'd small-complexity CLI smoke tests on 14-16 GB CI
596    /// runners — a 50 × 50 period-close JE alone wanted 20 GB of edge
597    /// memory. Method A is the bounded, exactness-preserving fallback
598    /// recommended for published reference datasets where size and
599    /// exactness matter more than recall on multi-line consolidations.
600    /// Set `je_network.method: cartesian` explicitly to restore the
601    /// pre-v5.27 behaviour.
602    #[default]
603    A,
604}
605
606/// Configuration for the `graphs/je_network.csv` flat edge-list
607/// export (v5.8.0+).
608#[derive(Debug, Clone, Default, Serialize, Deserialize)]
609#[serde(deny_unknown_fields)]
610pub struct JeNetworkConfig {
611    /// Edge-construction method (see [`JeNetworkMethod`]).
612    #[serde(default)]
613    pub method: JeNetworkMethod,
614}
615
616fn default_graph_types() -> Vec<GraphTypeConfig> {
617    vec![GraphTypeConfig::default()]
618}
619
620fn default_graph_formats() -> Vec<GraphExportFormat> {
621    vec![GraphExportFormat::PytorchGeometric]
622}
623
624fn default_train_ratio() -> f64 {
625    0.7
626}
627
628fn default_val_ratio() -> f64 {
629    0.15
630}
631
632fn default_graph_subdir() -> String {
633    "graphs".to_string()
634}
635
636impl Default for GraphExportConfig {
637    fn default() -> Self {
638        Self {
639            enabled: false,
640            graph_types: default_graph_types(),
641            formats: default_graph_formats(),
642            train_ratio: 0.7,
643            validation_ratio: 0.15,
644            split_seed: None,
645            output_subdirectory: "graphs".to_string(),
646            hypergraph: HypergraphExportSettings::default(),
647            dgl: DglExportConfig::default(),
648            je_network: JeNetworkConfig::default(),
649        }
650    }
651}
652
653/// DGL-specific export settings.
654#[derive(Debug, Clone, Default, Serialize, Deserialize)]
655pub struct DglExportConfig {
656    /// Export as a heterogeneous graph (distinct node/edge types).
657    ///
658    /// When `true` the DGL exporter produces a `HeteroData` object with typed
659    /// node and edge stores rather than a single homogeneous graph.
660    /// Set to `true` in `graph_export.dgl.heterogeneous: true` in YAML.
661    #[serde(default)]
662    pub heterogeneous: bool,
663}
664
665// Default derived: heterogeneous = false (bool default)
666
667/// Settings for the multi-layer hypergraph export (RustGraph integration).
668///
669/// Produces a 3-layer hypergraph:
670/// - Layer 1: Governance & Controls (COSO, SOX, internal controls, organizational)
671/// - Layer 2: Process Events (P2P/O2C document flows, OCPM events)
672/// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
673#[derive(Debug, Clone, Serialize, Deserialize)]
674pub struct HypergraphExportSettings {
675    /// Enable hypergraph export.
676    #[serde(default)]
677    pub enabled: bool,
678
679    /// Maximum total nodes across all layers (default 50000).
680    #[serde(default = "default_hypergraph_max_nodes")]
681    pub max_nodes: usize,
682
683    /// Aggregation strategy when node budget is exceeded.
684    #[serde(default = "default_aggregation_strategy")]
685    pub aggregation_strategy: String,
686
687    /// Layer 1 (Governance & Controls) settings.
688    #[serde(default)]
689    pub governance_layer: GovernanceLayerSettings,
690
691    /// Layer 2 (Process Events) settings.
692    #[serde(default)]
693    pub process_layer: ProcessLayerSettings,
694
695    /// Layer 3 (Accounting Network) settings.
696    #[serde(default)]
697    pub accounting_layer: AccountingLayerSettings,
698
699    /// Cross-layer edge generation settings.
700    #[serde(default)]
701    pub cross_layer: CrossLayerSettings,
702
703    /// Output subdirectory for hypergraph files (relative to graph output directory).
704    #[serde(default = "default_hypergraph_subdir")]
705    pub output_subdirectory: String,
706
707    /// Output format: "native" (default) for internal field names, "unified" for RustGraph format.
708    #[serde(default = "default_hypergraph_format")]
709    pub output_format: String,
710
711    /// Optional URL for streaming unified JSONL to a RustGraph ingest endpoint.
712    #[serde(default)]
713    pub stream_target: Option<String>,
714
715    /// Batch size for streaming (number of JSONL lines per HTTP POST). Default: 1000.
716    #[serde(default = "default_stream_batch_size")]
717    pub stream_batch_size: usize,
718}
719
720fn default_hypergraph_max_nodes() -> usize {
721    50_000
722}
723
724fn default_aggregation_strategy() -> String {
725    "pool_by_counterparty".to_string()
726}
727
728fn default_hypergraph_subdir() -> String {
729    "hypergraph".to_string()
730}
731
732fn default_hypergraph_format() -> String {
733    "native".to_string()
734}
735
736fn default_stream_batch_size() -> usize {
737    1000
738}
739
740impl Default for HypergraphExportSettings {
741    fn default() -> Self {
742        Self {
743            enabled: false,
744            max_nodes: 50_000,
745            aggregation_strategy: "pool_by_counterparty".to_string(),
746            governance_layer: GovernanceLayerSettings::default(),
747            process_layer: ProcessLayerSettings::default(),
748            accounting_layer: AccountingLayerSettings::default(),
749            cross_layer: CrossLayerSettings::default(),
750            output_subdirectory: "hypergraph".to_string(),
751            output_format: "native".to_string(),
752            stream_target: None,
753            stream_batch_size: 1000,
754        }
755    }
756}
757
758/// Layer 1: Governance & Controls layer settings.
759#[derive(Debug, Clone, Serialize, Deserialize)]
760pub struct GovernanceLayerSettings {
761    /// Include COSO framework nodes (5 components + 17 principles).
762    #[serde(default = "default_true")]
763    pub include_coso: bool,
764    /// Include internal control nodes.
765    #[serde(default = "default_true")]
766    pub include_controls: bool,
767    /// Include SOX assertion nodes.
768    #[serde(default = "default_true")]
769    pub include_sox: bool,
770    /// Include vendor master data nodes.
771    #[serde(default = "default_true")]
772    pub include_vendors: bool,
773    /// Include customer master data nodes.
774    #[serde(default = "default_true")]
775    pub include_customers: bool,
776    /// Include employee/organizational nodes.
777    #[serde(default = "default_true")]
778    pub include_employees: bool,
779}
780
781impl Default for GovernanceLayerSettings {
782    fn default() -> Self {
783        Self {
784            include_coso: true,
785            include_controls: true,
786            include_sox: true,
787            include_vendors: true,
788            include_customers: true,
789            include_employees: true,
790        }
791    }
792}
793
794/// Layer 2: Process Events layer settings.
795#[derive(Debug, Clone, Serialize, Deserialize)]
796pub struct ProcessLayerSettings {
797    /// Include P2P (Procure-to-Pay) document flow nodes.
798    #[serde(default = "default_true")]
799    pub include_p2p: bool,
800    /// Include O2C (Order-to-Cash) document flow nodes.
801    #[serde(default = "default_true")]
802    pub include_o2c: bool,
803    /// Include S2C (Source-to-Contract) document flow nodes.
804    #[serde(default = "default_true")]
805    pub include_s2c: bool,
806    /// Include H2R (Hire-to-Retire) document flow nodes.
807    #[serde(default = "default_true")]
808    pub include_h2r: bool,
809    /// Include MFG (Manufacturing) document flow nodes.
810    #[serde(default = "default_true")]
811    pub include_mfg: bool,
812    /// Include BANK (Banking) document flow nodes.
813    #[serde(default = "default_true")]
814    pub include_bank: bool,
815    /// Include AUDIT document flow nodes.
816    #[serde(default = "default_true")]
817    pub include_audit: bool,
818    /// Include R2R (Record-to-Report) document flow nodes (bank recon + period close).
819    #[serde(default = "default_true")]
820    pub include_r2r: bool,
821    /// Export OCPM events as hyperedges.
822    #[serde(default = "default_true")]
823    pub events_as_hyperedges: bool,
824    /// Threshold: if a counterparty has more documents than this, aggregate into pool nodes.
825    #[serde(default = "default_docs_per_counterparty_threshold")]
826    pub docs_per_counterparty_threshold: usize,
827}
828
829fn default_docs_per_counterparty_threshold() -> usize {
830    20
831}
832
833impl Default for ProcessLayerSettings {
834    fn default() -> Self {
835        Self {
836            include_p2p: true,
837            include_o2c: true,
838            include_s2c: true,
839            include_h2r: true,
840            include_mfg: true,
841            include_bank: true,
842            include_audit: true,
843            include_r2r: true,
844            events_as_hyperedges: true,
845            docs_per_counterparty_threshold: 20,
846        }
847    }
848}
849
850/// Layer 3: Accounting Network layer settings.
851#[derive(Debug, Clone, Serialize, Deserialize)]
852pub struct AccountingLayerSettings {
853    /// Include GL account nodes.
854    #[serde(default = "default_true")]
855    pub include_accounts: bool,
856    /// Export journal entries as hyperedges (debit+credit accounts as participants).
857    #[serde(default = "default_true")]
858    pub je_as_hyperedges: bool,
859}
860
861impl Default for AccountingLayerSettings {
862    fn default() -> Self {
863        Self {
864            include_accounts: true,
865            je_as_hyperedges: true,
866        }
867    }
868}
869
870/// Cross-layer edge generation settings.
871#[derive(Debug, Clone, Serialize, Deserialize)]
872pub struct CrossLayerSettings {
873    /// Generate cross-layer edges (Control→Account, Vendor→PO, etc.).
874    #[serde(default = "default_true")]
875    pub enabled: bool,
876}
877
878impl Default for CrossLayerSettings {
879    fn default() -> Self {
880        Self { enabled: true }
881    }
882}
883
884/// Configuration for a specific graph type to export.
885#[derive(Debug, Clone, Serialize, Deserialize)]
886pub struct GraphTypeConfig {
887    /// Name identifier for this graph configuration.
888    #[serde(default = "default_graph_name")]
889    pub name: String,
890
891    /// Whether to aggregate parallel edges between the same nodes.
892    #[serde(default)]
893    pub aggregate_edges: bool,
894
895    /// Minimum edge weight to include (filters out small transactions).
896    #[serde(default)]
897    pub min_edge_weight: f64,
898
899    /// Whether to include document nodes (creates hub-and-spoke structure).
900    #[serde(default)]
901    pub include_document_nodes: bool,
902}
903
904fn default_graph_name() -> String {
905    "accounting_network".to_string()
906}
907
908impl Default for GraphTypeConfig {
909    fn default() -> Self {
910        Self {
911            name: "accounting_network".to_string(),
912            aggregate_edges: false,
913            min_edge_weight: 0.0,
914            include_document_nodes: false,
915        }
916    }
917}
918
919/// Export format for graph data.
920#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
921#[serde(rename_all = "snake_case")]
922pub enum GraphExportFormat {
923    /// PyTorch Geometric format (.npy files + metadata.json).
924    PytorchGeometric,
925    /// Neo4j format (CSV files + Cypher import scripts).
926    Neo4j,
927    /// Deep Graph Library format.
928    Dgl,
929    /// RustGraph/RustAssureTwin JSON format.
930    RustGraph,
931    /// RustGraph multi-layer hypergraph format (nodes.jsonl + edges.jsonl + hyperedges.jsonl).
932    RustGraphHypergraph,
933}
934
935/// Scenario configuration for metadata, tagging, and ML training setup.
936///
937/// This section enables tracking the purpose and characteristics of a generation run.
938#[derive(Debug, Clone, Default, Serialize, Deserialize)]
939pub struct ScenarioConfig {
940    /// Tags for categorizing and filtering datasets.
941    /// Examples: "fraud_detection", "retail", "month_end_stress", "ml_training"
942    #[serde(default)]
943    pub tags: Vec<String>,
944
945    /// Data quality profile preset.
946    /// - "clean": Minimal data quality issues (0.1% missing, 0.05% typos)
947    /// - "noisy": Moderate issues (5% missing, 2% typos, 1% duplicates)
948    /// - "legacy": Heavy issues simulating legacy system data (10% missing, 5% typos)
949    #[serde(default)]
950    pub profile: Option<String>,
951
952    /// Human-readable description of the scenario purpose.
953    #[serde(default)]
954    pub description: Option<String>,
955
956    /// Whether this run is for ML training (enables balanced labeling).
957    #[serde(default)]
958    pub ml_training: bool,
959
960    /// Target anomaly class balance for ML training.
961    /// If set, anomalies will be injected to achieve this ratio.
962    #[serde(default)]
963    pub target_anomaly_ratio: Option<f64>,
964
965    /// Custom metadata key-value pairs.
966    #[serde(default)]
967    pub metadata: std::collections::HashMap<String, String>,
968}
969
970/// Temporal drift configuration for simulating distribution changes over time.
971///
972/// This enables generation of data that shows realistic temporal evolution,
973/// useful for training drift detection models and testing temporal robustness.
974#[derive(Debug, Clone, Serialize, Deserialize)]
975pub struct TemporalDriftConfig {
976    /// Enable temporal drift simulation.
977    #[serde(default)]
978    pub enabled: bool,
979
980    /// Amount mean drift per period (e.g., 0.02 = 2% mean shift per month).
981    /// Simulates gradual inflation or business growth.
982    #[serde(default = "default_amount_drift")]
983    pub amount_mean_drift: f64,
984
985    /// Amount variance drift per period (e.g., 0.01 = 1% variance increase per month).
986    /// Simulates increasing volatility over time.
987    #[serde(default)]
988    pub amount_variance_drift: f64,
989
990    /// Anomaly rate drift per period (e.g., 0.001 = 0.1% increase per month).
991    /// Simulates increasing fraud attempts or degrading controls.
992    #[serde(default)]
993    pub anomaly_rate_drift: f64,
994
995    /// Concept drift rate - how quickly feature distributions change (0.0-1.0).
996    /// Higher values cause more rapid distribution shifts.
997    #[serde(default = "default_concept_drift")]
998    pub concept_drift_rate: f64,
999
1000    /// Sudden drift events - probability of a sudden distribution shift in any period.
1001    #[serde(default)]
1002    pub sudden_drift_probability: f64,
1003
1004    /// Magnitude of sudden drift events when they occur (multiplier).
1005    #[serde(default = "default_sudden_drift_magnitude")]
1006    pub sudden_drift_magnitude: f64,
1007
1008    /// Seasonal drift - enable cyclic patterns that repeat annually.
1009    #[serde(default)]
1010    pub seasonal_drift: bool,
1011
1012    /// Drift start period (0 = from beginning). Use to simulate stable baseline before drift.
1013    #[serde(default)]
1014    pub drift_start_period: u32,
1015
1016    /// Drift type: "gradual", "sudden", "recurring", "mixed"
1017    #[serde(default = "default_drift_type")]
1018    pub drift_type: DriftType,
1019}
1020
1021fn default_amount_drift() -> f64 {
1022    0.02
1023}
1024
1025fn default_concept_drift() -> f64 {
1026    0.01
1027}
1028
1029fn default_sudden_drift_magnitude() -> f64 {
1030    2.0
1031}
1032
1033fn default_drift_type() -> DriftType {
1034    DriftType::Gradual
1035}
1036
1037impl Default for TemporalDriftConfig {
1038    fn default() -> Self {
1039        Self {
1040            enabled: false,
1041            amount_mean_drift: 0.02,
1042            amount_variance_drift: 0.0,
1043            anomaly_rate_drift: 0.0,
1044            concept_drift_rate: 0.01,
1045            sudden_drift_probability: 0.0,
1046            sudden_drift_magnitude: 2.0,
1047            seasonal_drift: false,
1048            drift_start_period: 0,
1049            drift_type: DriftType::Gradual,
1050        }
1051    }
1052}
1053
1054impl TemporalDriftConfig {
1055    /// Convert to core DriftConfig for use in generators.
1056    pub fn to_core_config(&self) -> datasynth_core::distributions::DriftConfig {
1057        datasynth_core::distributions::DriftConfig {
1058            enabled: self.enabled,
1059            amount_mean_drift: self.amount_mean_drift,
1060            amount_variance_drift: self.amount_variance_drift,
1061            anomaly_rate_drift: self.anomaly_rate_drift,
1062            concept_drift_rate: self.concept_drift_rate,
1063            sudden_drift_probability: self.sudden_drift_probability,
1064            sudden_drift_magnitude: self.sudden_drift_magnitude,
1065            seasonal_drift: self.seasonal_drift,
1066            drift_start_period: self.drift_start_period,
1067            drift_type: match self.drift_type {
1068                DriftType::Gradual => datasynth_core::distributions::DriftType::Gradual,
1069                DriftType::Sudden => datasynth_core::distributions::DriftType::Sudden,
1070                DriftType::Recurring => datasynth_core::distributions::DriftType::Recurring,
1071                DriftType::Mixed => datasynth_core::distributions::DriftType::Mixed,
1072            },
1073            regime_changes: Vec::new(),
1074            economic_cycle: Default::default(),
1075            parameter_drifts: Vec::new(),
1076        }
1077    }
1078}
1079
1080/// Types of temporal drift patterns.
1081#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1082#[serde(rename_all = "snake_case")]
1083pub enum DriftType {
1084    /// Gradual, continuous drift over time (like inflation).
1085    #[default]
1086    Gradual,
1087    /// Sudden, point-in-time shifts (like policy changes).
1088    Sudden,
1089    /// Recurring patterns that cycle (like seasonal variations).
1090    Recurring,
1091    /// Combination of gradual background drift with occasional sudden shifts.
1092    Mixed,
1093}
1094
1095// ============================================================================
1096// Streaming Output API Configuration (Phase 2)
1097// ============================================================================
1098
1099/// Configuration for streaming output API.
1100#[derive(Debug, Clone, Serialize, Deserialize)]
1101pub struct StreamingSchemaConfig {
1102    /// Enable streaming output.
1103    #[serde(default)]
1104    pub enabled: bool,
1105    /// Target events per second (0 = unlimited, default 0).
1106    #[serde(default)]
1107    pub events_per_second: f64,
1108    /// Token bucket burst size (default 100).
1109    #[serde(default = "default_burst_size")]
1110    pub burst_size: u32,
1111    /// Buffer size for streaming (number of items).
1112    #[serde(default = "default_buffer_size")]
1113    pub buffer_size: usize,
1114    /// Enable progress reporting.
1115    #[serde(default = "default_true")]
1116    pub enable_progress: bool,
1117    /// Progress reporting interval (number of items).
1118    #[serde(default = "default_progress_interval")]
1119    pub progress_interval: u64,
1120    /// Backpressure strategy.
1121    #[serde(default)]
1122    pub backpressure: BackpressureSchemaStrategy,
1123}
1124
1125fn default_buffer_size() -> usize {
1126    1000
1127}
1128
1129fn default_progress_interval() -> u64 {
1130    100
1131}
1132
1133impl Default for StreamingSchemaConfig {
1134    fn default() -> Self {
1135        Self {
1136            enabled: false,
1137            events_per_second: 0.0,
1138            burst_size: 100,
1139            buffer_size: 1000,
1140            enable_progress: true,
1141            progress_interval: 100,
1142            backpressure: BackpressureSchemaStrategy::Block,
1143        }
1144    }
1145}
1146
1147/// Backpressure strategy for streaming output.
1148#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1149#[serde(rename_all = "snake_case")]
1150pub enum BackpressureSchemaStrategy {
1151    /// Block until space is available in the buffer.
1152    #[default]
1153    Block,
1154    /// Drop oldest items when buffer is full.
1155    DropOldest,
1156    /// Drop newest items when buffer is full.
1157    DropNewest,
1158    /// Buffer overflow items up to a limit, then block.
1159    Buffer,
1160}
1161
1162// ============================================================================
1163// Rate Limiting Configuration (Phase 5)
1164// ============================================================================
1165
1166/// Configuration for rate limiting.
1167#[derive(Debug, Clone, Serialize, Deserialize)]
1168pub struct RateLimitSchemaConfig {
1169    /// Enable rate limiting.
1170    #[serde(default)]
1171    pub enabled: bool,
1172    /// Entities per second limit.
1173    #[serde(default = "default_entities_per_second")]
1174    pub entities_per_second: f64,
1175    /// Burst size (number of tokens in bucket).
1176    #[serde(default = "default_burst_size")]
1177    pub burst_size: u32,
1178    /// Backpressure strategy for rate limiting.
1179    #[serde(default)]
1180    pub backpressure: RateLimitBackpressureSchema,
1181}
1182
1183fn default_entities_per_second() -> f64 {
1184    1000.0
1185}
1186
1187fn default_burst_size() -> u32 {
1188    100
1189}
1190
1191impl Default for RateLimitSchemaConfig {
1192    fn default() -> Self {
1193        Self {
1194            enabled: false,
1195            entities_per_second: 1000.0,
1196            burst_size: 100,
1197            backpressure: RateLimitBackpressureSchema::Block,
1198        }
1199    }
1200}
1201
1202/// Backpressure strategy for rate limiting.
1203#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1204#[serde(rename_all = "snake_case")]
1205pub enum RateLimitBackpressureSchema {
1206    /// Block until rate allows.
1207    #[default]
1208    Block,
1209    /// Drop items that exceed rate.
1210    Drop,
1211    /// Buffer items and process when rate allows.
1212    Buffer,
1213}
1214
1215// ============================================================================
1216// Temporal Attribute Generation Configuration (Phase 3)
1217// ============================================================================
1218
1219/// Configuration for temporal attribute generation.
1220#[derive(Debug, Clone, Serialize, Deserialize)]
1221pub struct TemporalAttributeSchemaConfig {
1222    /// Enable temporal attribute generation.
1223    #[serde(default)]
1224    pub enabled: bool,
1225    /// Valid time configuration.
1226    #[serde(default)]
1227    pub valid_time: ValidTimeSchemaConfig,
1228    /// Transaction time configuration.
1229    #[serde(default)]
1230    pub transaction_time: TransactionTimeSchemaConfig,
1231    /// Generate version chains for entities.
1232    #[serde(default)]
1233    pub generate_version_chains: bool,
1234    /// Average number of versions per entity.
1235    #[serde(default = "default_avg_versions")]
1236    pub avg_versions_per_entity: f64,
1237}
1238
1239fn default_avg_versions() -> f64 {
1240    1.5
1241}
1242
1243impl Default for TemporalAttributeSchemaConfig {
1244    fn default() -> Self {
1245        Self {
1246            enabled: false,
1247            valid_time: ValidTimeSchemaConfig::default(),
1248            transaction_time: TransactionTimeSchemaConfig::default(),
1249            generate_version_chains: false,
1250            avg_versions_per_entity: 1.5,
1251        }
1252    }
1253}
1254
1255/// Configuration for valid time (business time) generation.
1256#[derive(Debug, Clone, Serialize, Deserialize)]
1257pub struct ValidTimeSchemaConfig {
1258    /// Probability that valid_to is set (entity has ended validity).
1259    #[serde(default = "default_closed_probability")]
1260    pub closed_probability: f64,
1261    /// Average validity duration in days.
1262    #[serde(default = "default_avg_validity_days")]
1263    pub avg_validity_days: u32,
1264    /// Standard deviation of validity duration in days.
1265    #[serde(default = "default_validity_stddev")]
1266    pub validity_stddev_days: u32,
1267}
1268
1269fn default_closed_probability() -> f64 {
1270    0.1
1271}
1272
1273fn default_avg_validity_days() -> u32 {
1274    365
1275}
1276
1277fn default_validity_stddev() -> u32 {
1278    90
1279}
1280
1281impl Default for ValidTimeSchemaConfig {
1282    fn default() -> Self {
1283        Self {
1284            closed_probability: 0.1,
1285            avg_validity_days: 365,
1286            validity_stddev_days: 90,
1287        }
1288    }
1289}
1290
1291/// Configuration for transaction time (system time) generation.
1292#[derive(Debug, Clone, Serialize, Deserialize)]
1293pub struct TransactionTimeSchemaConfig {
1294    /// Average recording delay in seconds (0 = immediate).
1295    #[serde(default)]
1296    pub avg_recording_delay_seconds: u32,
1297    /// Allow backdating (recording time before valid time).
1298    #[serde(default)]
1299    pub allow_backdating: bool,
1300    /// Probability of backdating if allowed.
1301    #[serde(default = "default_backdating_probability")]
1302    pub backdating_probability: f64,
1303    /// Maximum backdate days.
1304    #[serde(default = "default_max_backdate_days")]
1305    pub max_backdate_days: u32,
1306}
1307
1308fn default_backdating_probability() -> f64 {
1309    0.01
1310}
1311
1312fn default_max_backdate_days() -> u32 {
1313    30
1314}
1315
1316impl Default for TransactionTimeSchemaConfig {
1317    fn default() -> Self {
1318        Self {
1319            avg_recording_delay_seconds: 0,
1320            allow_backdating: false,
1321            backdating_probability: 0.01,
1322            max_backdate_days: 30,
1323        }
1324    }
1325}
1326
1327// ============================================================================
1328// Relationship Generation Configuration (Phase 4)
1329// ============================================================================
1330
1331/// Configuration for relationship generation.
1332#[derive(Debug, Clone, Serialize, Deserialize)]
1333pub struct RelationshipSchemaConfig {
1334    /// Relationship type definitions.
1335    #[serde(default)]
1336    pub relationship_types: Vec<RelationshipTypeSchemaConfig>,
1337    /// Allow orphan entities (entities with no relationships).
1338    #[serde(default = "default_true")]
1339    pub allow_orphans: bool,
1340    /// Probability of creating an orphan entity.
1341    #[serde(default = "default_orphan_probability")]
1342    pub orphan_probability: f64,
1343    /// Allow circular relationships.
1344    #[serde(default)]
1345    pub allow_circular: bool,
1346    /// Maximum depth for circular relationship detection.
1347    #[serde(default = "default_max_circular_depth")]
1348    pub max_circular_depth: u32,
1349}
1350
1351fn default_orphan_probability() -> f64 {
1352    0.01
1353}
1354
1355fn default_max_circular_depth() -> u32 {
1356    3
1357}
1358
1359impl Default for RelationshipSchemaConfig {
1360    fn default() -> Self {
1361        Self {
1362            relationship_types: Vec::new(),
1363            allow_orphans: true,
1364            orphan_probability: 0.01,
1365            allow_circular: false,
1366            max_circular_depth: 3,
1367        }
1368    }
1369}
1370
1371/// Configuration for a specific relationship type.
1372#[derive(Debug, Clone, Serialize, Deserialize)]
1373pub struct RelationshipTypeSchemaConfig {
1374    /// Name of the relationship type (e.g., "debits", "credits", "created").
1375    pub name: String,
1376    /// Source entity type (e.g., "journal_entry").
1377    pub source_type: String,
1378    /// Target entity type (e.g., "account").
1379    pub target_type: String,
1380    /// Cardinality rule for this relationship.
1381    #[serde(default)]
1382    pub cardinality: CardinalitySchemaRule,
1383    /// Weight for this relationship in random selection.
1384    #[serde(default = "default_relationship_weight")]
1385    pub weight: f64,
1386    /// Whether this relationship is required.
1387    #[serde(default)]
1388    pub required: bool,
1389    /// Whether this relationship is directed.
1390    #[serde(default = "default_true")]
1391    pub directed: bool,
1392}
1393
1394fn default_relationship_weight() -> f64 {
1395    1.0
1396}
1397
1398impl Default for RelationshipTypeSchemaConfig {
1399    fn default() -> Self {
1400        Self {
1401            name: String::new(),
1402            source_type: String::new(),
1403            target_type: String::new(),
1404            cardinality: CardinalitySchemaRule::default(),
1405            weight: 1.0,
1406            required: false,
1407            directed: true,
1408        }
1409    }
1410}
1411
1412/// Cardinality rule for relationships in schema config.
1413#[derive(Debug, Clone, Serialize, Deserialize)]
1414#[serde(rename_all = "snake_case")]
1415pub enum CardinalitySchemaRule {
1416    /// One source to one target.
1417    OneToOne,
1418    /// One source to many targets.
1419    OneToMany {
1420        /// Minimum number of targets.
1421        min: u32,
1422        /// Maximum number of targets.
1423        max: u32,
1424    },
1425    /// Many sources to one target.
1426    ManyToOne {
1427        /// Minimum number of sources.
1428        min: u32,
1429        /// Maximum number of sources.
1430        max: u32,
1431    },
1432    /// Many sources to many targets.
1433    ManyToMany {
1434        /// Minimum targets per source.
1435        min_per_source: u32,
1436        /// Maximum targets per source.
1437        max_per_source: u32,
1438    },
1439}
1440
1441impl Default for CardinalitySchemaRule {
1442    fn default() -> Self {
1443        Self::OneToMany { min: 1, max: 5 }
1444    }
1445}
1446
1447/// Global configuration settings.
1448#[derive(Debug, Clone, Serialize, Deserialize)]
1449pub struct GlobalConfig {
1450    /// Random seed for reproducibility
1451    pub seed: Option<u64>,
1452    /// Industry sector
1453    pub industry: IndustrySector,
1454    /// Simulation start date (YYYY-MM-DD)
1455    #[serde(alias = "startDate")]
1456    pub start_date: String,
1457    /// Simulation period in months
1458    #[serde(alias = "periodMonths")]
1459    pub period_months: u32,
1460    /// Base currency for group reporting
1461    #[serde(default = "default_currency", alias = "groupCurrency")]
1462    pub group_currency: String,
1463    /// Presentation currency for consolidated financial statements (ISO 4217).
1464    /// If not set, defaults to `group_currency`.
1465    #[serde(default, alias = "presentationCurrency")]
1466    pub presentation_currency: Option<String>,
1467    /// Enable parallel generation
1468    #[serde(default = "default_true")]
1469    pub parallel: bool,
1470    /// Number of worker threads (0 = auto-detect)
1471    #[serde(default, alias = "workerThreads")]
1472    pub worker_threads: usize,
1473    /// Memory limit in MB (0 = unlimited)
1474    #[serde(default, alias = "memoryLimitMb")]
1475    pub memory_limit_mb: usize,
1476    /// Fiscal year length in months (defaults to 12 if not set).
1477    /// Used by session-based generation to split the total period into fiscal years.
1478    #[serde(default, alias = "fiscalYearMonths")]
1479    pub fiscal_year_months: Option<u32>,
1480}
1481
1482fn default_currency() -> String {
1483    "USD".to_string()
1484}
1485fn default_true() -> bool {
1486    true
1487}
1488
1489/// Configuration for generation session behavior.
1490///
1491/// When enabled, the generation pipeline splits the total period into fiscal years
1492/// and generates data period-by-period, carrying forward balance state.
1493#[derive(Debug, Clone, Serialize, Deserialize)]
1494pub struct SessionSchemaConfig {
1495    /// Whether session-based (period-by-period) generation is enabled.
1496    #[serde(default)]
1497    pub enabled: bool,
1498    /// Optional path for saving/loading session checkpoint files.
1499    #[serde(default)]
1500    pub checkpoint_path: Option<String>,
1501    /// Whether to write output files per fiscal period (e.g., `period_01/`).
1502    #[serde(default = "default_true")]
1503    pub per_period_output: bool,
1504    /// Whether to also produce a single consolidated output across all periods.
1505    #[serde(default = "default_true")]
1506    pub consolidated_output: bool,
1507}
1508
1509impl Default for SessionSchemaConfig {
1510    fn default() -> Self {
1511        Self {
1512            enabled: false,
1513            checkpoint_path: None,
1514            per_period_output: true,
1515            consolidated_output: true,
1516        }
1517    }
1518}
1519
1520/// Company code configuration.
1521#[derive(Debug, Clone, Serialize, Deserialize)]
1522pub struct CompanyConfig {
1523    /// Company code identifier
1524    pub code: String,
1525    /// Company name
1526    pub name: String,
1527    /// Local currency (ISO 4217)
1528    pub currency: String,
1529    /// Functional currency for IAS 21 translation (ISO 4217).
1530    /// If not set, defaults to the `currency` field (i.e. local == functional).
1531    #[serde(default, alias = "functionalCurrency")]
1532    pub functional_currency: Option<String>,
1533    /// Country code (ISO 3166-1 alpha-2)
1534    pub country: String,
1535    /// Fiscal year variant
1536    #[serde(default = "default_fiscal_variant", alias = "fiscalYearVariant")]
1537    pub fiscal_year_variant: String,
1538    /// Transaction volume per year
1539    #[serde(alias = "annualTransactionVolume")]
1540    pub annual_transaction_volume: TransactionVolume,
1541    /// Company-specific transaction weight
1542    #[serde(default = "default_weight", alias = "volumeWeight")]
1543    pub volume_weight: f64,
1544}
1545
1546fn default_fiscal_variant() -> String {
1547    "K4".to_string()
1548}
1549fn default_weight() -> f64 {
1550    1.0
1551}
1552
1553/// Transaction volume presets.
1554#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1555#[serde(rename_all = "snake_case")]
1556pub enum TransactionVolume {
1557    /// 10,000 transactions per year
1558    TenK,
1559    /// 50,000 transactions per year
1560    FiftyK,
1561    /// 100,000 transactions per year
1562    HundredK,
1563    /// 1,000,000 transactions per year
1564    OneM,
1565    /// 10,000,000 transactions per year
1566    TenM,
1567    /// 100,000,000 transactions per year
1568    HundredM,
1569    /// Custom count
1570    Custom(u64),
1571}
1572
1573impl TransactionVolume {
1574    /// Get the transaction count.
1575    pub fn count(&self) -> u64 {
1576        match self {
1577            Self::TenK => 10_000,
1578            Self::FiftyK => 50_000,
1579            Self::HundredK => 100_000,
1580            Self::OneM => 1_000_000,
1581            Self::TenM => 10_000_000,
1582            Self::HundredM => 100_000_000,
1583            Self::Custom(n) => *n,
1584        }
1585    }
1586}
1587
1588/// Chart of Accounts configuration.
1589#[derive(Debug, Clone, Serialize, Deserialize)]
1590pub struct ChartOfAccountsConfig {
1591    /// CoA complexity level
1592    pub complexity: CoAComplexity,
1593    /// Use industry-specific accounts
1594    #[serde(default = "default_true")]
1595    pub industry_specific: bool,
1596    /// Custom account definitions file
1597    pub custom_accounts: Option<PathBuf>,
1598    /// Minimum hierarchy depth
1599    #[serde(default = "default_min_depth")]
1600    pub min_hierarchy_depth: u8,
1601    /// Maximum hierarchy depth
1602    #[serde(default = "default_max_depth")]
1603    pub max_hierarchy_depth: u8,
1604    /// **v5.7.0** — expand canonical accounts into industry-specific
1605    /// 6-digit sub-accounts using the embedded
1606    /// [`datasynth_core::industry_packs`] (manufacturing, retail,
1607    /// financial_services, healthcare, technology). When `true`:
1608    ///
1609    /// - Each canonical 4-digit account that has an expansion in the
1610    ///   pack becomes a non-postable control account (`is_postable =
1611    ///   false`).
1612    /// - 2–6 6-digit sub-accounts are added per parent, with
1613    ///   suffix-driven names (`"Product Revenue — Steel Products"`),
1614    ///   industry-realistic gaps, and inherited ISO 21378 codes.
1615    /// - Generators that currently target canonical accounts via
1616    ///   constants will pick a sub-account deterministically per
1617    ///   `document_id` (preserving seed-based reproducibility).
1618    ///
1619    /// Default: `false` (preserves v5.6.0 behaviour exactly — same
1620    /// account count, same numbering, same goldens).
1621    #[serde(default, alias = "expandIndustrySubaccounts")]
1622    pub expand_industry_subaccounts: bool,
1623}
1624
1625fn default_min_depth() -> u8 {
1626    2
1627}
1628fn default_max_depth() -> u8 {
1629    5
1630}
1631
1632impl Default for ChartOfAccountsConfig {
1633    fn default() -> Self {
1634        Self {
1635            complexity: CoAComplexity::Small,
1636            industry_specific: true,
1637            custom_accounts: None,
1638            min_hierarchy_depth: default_min_depth(),
1639            max_hierarchy_depth: default_max_depth(),
1640            expand_industry_subaccounts: false,
1641        }
1642    }
1643}
1644
1645/// Transaction generation configuration.
1646#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1647pub struct TransactionConfig {
1648    /// Line item distribution
1649    #[serde(default)]
1650    pub line_item_distribution: LineItemDistributionConfig,
1651    /// Debit/credit balance distribution
1652    #[serde(default)]
1653    pub debit_credit_distribution: DebitCreditDistributionConfig,
1654    /// Even/odd line count distribution
1655    #[serde(default)]
1656    pub even_odd_distribution: EvenOddDistributionConfig,
1657    /// Transaction source distribution
1658    #[serde(default)]
1659    pub source_distribution: SourceDistribution,
1660    /// **T2-D** Source-mix breadth. When unset or `true` (the default), the
1661    /// emitted `source` column is drawn from a generic SAP document-type mix
1662    /// (~25 codes, entropy ~2.7) instead of the coarse `TransactionSource`
1663    /// enum (~4 values, entropy ~0.75), closing the source-mix gap measured
1664    /// in experiments/ml/FINDINGS.md §6. Industry priors, when loaded, take
1665    /// precedence. Set `false` to restore the legacy enum labels. `Option`
1666    /// (not bare `bool`) so the default is genuinely on under both serde and
1667    /// `Default::default()`.
1668    #[serde(default)]
1669    pub synthetic_source_codes: Option<bool>,
1670    /// **SOTA-1** Recurring / standard-journal templating. When unset or `true`
1671    /// (the default), the no-priors generation path reuses a small per-(company,
1672    /// process) library of standard JE account-archetypes with high probability,
1673    /// so standard postings recur (and a hot subset of accounts dominates)
1674    /// instead of every JE drawing fresh uniform accounts. Matches the corpus's
1675    /// heavy templating (FINDINGS.md sec.8: 97% recurring, top-50 cover 65%; vs
1676    /// the engine's 758/1k unique). Reuse overrides only account *choice* (the
1677    /// main RNG + amounts/dates/counts are unchanged). Set `false` for the
1678    /// legacy uniform-per-line account selection.
1679    #[serde(default)]
1680    pub recurring_templates: Option<bool>,
1681    /// **SOTA-5** Fraction of journal entries that are reversals/corrections of
1682    /// a recent JE (swap dr/cr, reference the original) — a process auditors
1683    /// specifically look for, and largely absent from the engine (FINDINGS.md
1684    /// sec.8: corpus reversal-proxy ~10% vs synthetic ~0.2%). Unset → a default
1685    /// of ~0.10 (matching the corpus proxy); `0.0` disables it. Reversals are
1686    /// interspersed without perturbing the normal JEs (separate RNG + derived id).
1687    #[serde(default)]
1688    pub reversal_rate: Option<f64>,
1689    /// **SOTA-2** Concentrate posting activity onto a hot subset of accounts via
1690    /// a Zipf (power-law) override of the per-line account pick, so a few
1691    /// accounts carry most lines like a real GL (FINDINGS.md sec.8: corpus
1692    /// top-10% of accounts ≈ 95% of lines vs the engine's near-uniform ~0.21).
1693    /// The uniform draw is still consumed (amounts/dates/counts unchanged) — only
1694    /// the chosen account moves toward the hot set. Set `false` for the legacy
1695    /// uniform-over-pool selection. Default-on when unset.
1696    #[serde(default)]
1697    pub account_concentration: Option<bool>,
1698    /// **SOTA-6** Fraction of journal entries that are allocation/assessment
1699    /// batches — large 1-to-many postings (one cost pool spread across many
1700    /// cost centers) that drive the corpus lines-per-JE tail (FINDINGS.md
1701    /// sec.8: AB docs ~52 lines vs the engine's ~4.6 mean with no large-batch
1702    /// process). Each batch carries ~30-80 cost-center-spread sub-lines and
1703    /// stays balanced. Unset → a small default (~0.008, ≈8% of lines); `0.0`
1704    /// disables. Interspersed without perturbing the normal JEs (separate RNG +
1705    /// derived id, reusing a recent JE's header).
1706    #[serde(default)]
1707    pub allocation_batch_rate: Option<f64>,
1708    /// **SOTA-3** Populate a line-level `business_unit` dimension — an
1709    /// organisational segment that rolls up the cost center, or the profit
1710    /// center as fallback (the same dimension value always maps to the same BU).
1711    /// The corpus carries a BU dimension (~11 codes) the engine lacked entirely;
1712    /// this fills it wherever a cost or profit center is present (~corpus fill),
1713    /// so BU-level analytics are coherent. Default-on when unset; `false`
1714    /// leaves `business_unit` empty (legacy).
1715    #[serde(default)]
1716    pub business_unit_dimension: Option<bool>,
1717    /// **SOTA-4** Fraction of journal entries that post in a foreign
1718    /// (document) currency — SAP-style: `debit_amount`/`credit_amount`/
1719    /// `local_amount` stay the company-ledger amount (DMBTR; the trial balance
1720    /// is unaffected), and the line's `transaction_amount` (WRBTR) plus
1721    /// `header.currency` (WAERS) / `header.exchange_rate` carry the foreign
1722    /// value. The corpus shows ~3.5% functional≠reporting (FINDINGS §8).
1723    /// Unset/`0.0` → all company-currency (default). Additive — ledger
1724    /// coherence is preserved; enable for corpus-matching / FX realism.
1725    #[serde(default)]
1726    pub foreign_currency_rate: Option<f64>,
1727    /// Seasonality configuration
1728    #[serde(default)]
1729    pub seasonality: SeasonalityConfig,
1730    /// Amount distribution
1731    #[serde(default)]
1732    pub amounts: AmountDistributionConfig,
1733    /// Benford's Law compliance configuration
1734    #[serde(default)]
1735    pub benford: BenfordConfig,
1736    /// SOTA-10 (FINDINGS §14): optional hard cap on total lines per JE. Corpus has
1737    /// p99.9 ~99 lines / max ~924; the synthetic engine occasionally produces
1738    /// 2000+-line monster JEs that degrade the audit packet's signal-to-noise.
1739    /// `None` = no cap (legacy); ~100 is a realism-matching default. Applies after
1740    /// copula adjustment; preserves balance by scaling debit/credit proportionally.
1741    #[serde(default)]
1742    pub lines_per_je_cap: Option<usize>,
1743    /// SOTA-9 (FINDINGS §14): archetype reuse probability for the recurring-templates
1744    /// process (overrides the historical 0.90 default). Corpus recurring share ~0.97;
1745    /// raising this concentrates `edges/je` toward the corpus value (currently 8.75×
1746    /// too diffuse). Range [0.0, 1.0]. None = use legacy 0.90.
1747    #[serde(default)]
1748    pub archetype_reuse_probability: Option<f64>,
1749    /// SOTA-8 (FINDINGS §14): source-conditional Dirichlet account-pair sampler.
1750    /// Models the corpus finding that per-source account usage is *concentrated*
1751    /// (entropy ~0.68 vs synth 0.97) over a *larger* pool (~23 vs 5 accts/source).
1752    /// Default off — opt-in so existing synthetic streams stay byte-identical;
1753    /// enable for audit-realism + tighter inverse-audit normal manifold.
1754    #[serde(default)]
1755    pub source_conditional_account_pair: SourceConditionalAccountPairConfig,
1756}
1757
1758/// SOTA-8 — per-source Dirichlet over account pairs. Concentration α controls
1759/// per-source structure tightness (low α = razor-tight prior, high α = diffuse);
1760/// `accts_per_source_target` controls the per-source account-pool size.
1761#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1762pub struct SourceConditionalAccountPairConfig {
1763    /// Enable the source-conditional account-pair sampler (default off).
1764    #[serde(default)]
1765    pub enabled: bool,
1766    /// Symmetric Dirichlet α — lower = more concentrated PMF per source.
1767    /// α=0.5 + N_s=25 ⇒ expected normalised entropy ≈ 0.65 (corpus median 0.68).
1768    #[serde(default = "default_source_cond_concentration")]
1769    pub concentration: f64,
1770    /// Expected distinct accounts per source (jittered by LogNormal(0, 0.3)).
1771    /// Corpus median 23.5; synth pre-SOTA-8 is ~5.
1772    #[serde(default = "default_accts_per_source_target")]
1773    pub accts_per_source_target: usize,
1774}
1775
1776fn default_source_cond_concentration() -> f64 {
1777    0.5
1778}
1779
1780fn default_accts_per_source_target() -> usize {
1781    25
1782}
1783
1784impl Default for SourceConditionalAccountPairConfig {
1785    fn default() -> Self {
1786        Self {
1787            enabled: false,
1788            concentration: default_source_cond_concentration(),
1789            accts_per_source_target: default_accts_per_source_target(),
1790        }
1791    }
1792}
1793
1794/// Benford's Law compliance configuration.
1795#[derive(Debug, Clone, Serialize, Deserialize)]
1796pub struct BenfordConfig {
1797    /// Enable Benford's Law compliance for amount generation
1798    #[serde(default = "default_true")]
1799    pub enabled: bool,
1800    /// Tolerance for deviation from ideal Benford distribution (0.0-1.0)
1801    #[serde(default = "default_benford_tolerance")]
1802    pub tolerance: f64,
1803    /// Transaction sources exempt from Benford's Law (fixed amounts)
1804    #[serde(default)]
1805    pub exempt_sources: Vec<BenfordExemption>,
1806}
1807
1808fn default_benford_tolerance() -> f64 {
1809    0.05
1810}
1811
1812impl Default for BenfordConfig {
1813    fn default() -> Self {
1814        Self {
1815            enabled: true,
1816            tolerance: default_benford_tolerance(),
1817            exempt_sources: vec![BenfordExemption::Recurring, BenfordExemption::Payroll],
1818        }
1819    }
1820}
1821
1822/// Types of transactions exempt from Benford's Law.
1823#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1824#[serde(rename_all = "snake_case")]
1825pub enum BenfordExemption {
1826    /// Recurring fixed amounts (rent, subscriptions)
1827    Recurring,
1828    /// Payroll (standardized salaries)
1829    Payroll,
1830    /// Fixed fees and charges
1831    FixedFees,
1832    /// Round number purchases (often legitimate)
1833    RoundAmounts,
1834}
1835
1836/// Distribution of transaction sources.
1837#[derive(Debug, Clone, Serialize, Deserialize)]
1838pub struct SourceDistribution {
1839    /// Manual entries percentage
1840    pub manual: f64,
1841    /// Automated system entries
1842    pub automated: f64,
1843    /// Recurring entries
1844    pub recurring: f64,
1845    /// Adjustment entries
1846    pub adjustment: f64,
1847}
1848
1849impl Default for SourceDistribution {
1850    fn default() -> Self {
1851        Self {
1852            manual: 0.20,
1853            automated: 0.70,
1854            recurring: 0.07,
1855            adjustment: 0.03,
1856        }
1857    }
1858}
1859
1860/// Output configuration.
1861#[derive(Debug, Clone, Serialize, Deserialize)]
1862pub struct OutputConfig {
1863    /// Output mode
1864    #[serde(default)]
1865    pub mode: OutputMode,
1866    /// Output directory
1867    #[serde(alias = "outputDirectory")]
1868    pub output_directory: PathBuf,
1869    /// File formats to generate. Accepts both `formats: [json, csv]`
1870    /// (canonical YAML) and `exportFormat: "json"` / `exportFormats:
1871    /// ["json", "csv"]` (SDK-style camelCase). The single-string
1872    /// `exportFormat` form is deserialised via `one_or_many_formats`
1873    /// so SDK clients submitting `exportFormat: "json"` hit the right
1874    /// code path instead of silently falling through to the Parquet
1875    /// default — the bug the SDK team flagged in v4.4.0.
1876    #[serde(
1877        default = "default_formats",
1878        alias = "exportFormats",
1879        alias = "exportFormat",
1880        deserialize_with = "one_or_many_formats"
1881    )]
1882    pub formats: Vec<FileFormat>,
1883    /// Compression settings
1884    #[serde(default)]
1885    pub compression: CompressionConfig,
1886    /// Batch size for writes
1887    #[serde(default = "default_batch_size", alias = "batchSize")]
1888    pub batch_size: usize,
1889    /// Include ACDOCA format
1890    #[serde(default = "default_true", alias = "includeAcdoca")]
1891    pub include_acdoca: bool,
1892    /// Include BSEG format
1893    #[serde(default, alias = "includeBseg")]
1894    pub include_bseg: bool,
1895    /// Partition by fiscal period
1896    #[serde(default = "default_true", alias = "partitionByPeriod")]
1897    pub partition_by_period: bool,
1898    /// Partition by company code
1899    #[serde(default, alias = "partitionByCompany")]
1900    pub partition_by_company: bool,
1901    /// Numeric serialization mode for JSON output.
1902    /// "string" (default): decimals as `"1729237.30"` — lossless precision.
1903    /// "native": decimals as `1729237.30` — friendlier for pandas/analytics.
1904    #[serde(default, alias = "numericMode")]
1905    pub numeric_mode: NumericMode,
1906    /// JSON export layout for journal entries and document flows.
1907    /// "nested" (default): `{"header": {...}, "lines": [...]}` — natural ERP structure.
1908    /// "flat": header fields repeated on every line — friendlier for analytics/ML.
1909    ///
1910    /// Accepts both `export_layout` (canonical / YAML) and `exportLayout`
1911    /// (camelCase / SDK JSON) so SDKs that follow camelCase conventions
1912    /// hit the flat path rather than silently getting the Nested default.
1913    /// Before v3.1.1 the missing camelCase alias meant SDK requests with
1914    /// `exportLayout: "flat"` were silently ignored, which SDK operators
1915    /// reported as "flat hangs generation" (the job completed with Nested
1916    /// layout, but manifests didn't match the expected flat shape).
1917    #[serde(default, alias = "exportLayout")]
1918    pub export_layout: ExportLayout,
1919    /// SAP / HANA export settings (only read when the CLI
1920    /// `--export-format sap` flag is passed). Empty by default so
1921    /// existing configs don't change behaviour; dialect defaults to
1922    /// `classic` for backward compatibility.
1923    #[serde(default, alias = "sapExport")]
1924    pub sap: SapExportSettings,
1925    /// SAF-T (Standard Audit File for Tax) export settings. Read when
1926    /// the CLI `--export-format saft` flag is passed. Defaults to
1927    /// Portugal (`pt`) because the PT variant is the most mature and
1928    /// cross-jurisdiction compatible. Override with
1929    /// `jurisdiction: pl|ro|no|lu` for the other supported countries.
1930    #[serde(default, alias = "saftExport")]
1931    pub saft: SaftExportSettings,
1932}
1933
1934/// Configuration for the SAP export writers (BKPF / BSEG / ACDOCA and
1935/// master-data tables).
1936///
1937/// Mirror of `datasynth_output::SapExportConfig` in YAML form — the CLI
1938/// translates this into the runtime struct before invoking the exporter,
1939/// replacing the v3.x hardcoded `SapExportConfig::default()`.
1940#[derive(Debug, Clone, Serialize, Deserialize)]
1941pub struct SapExportSettings {
1942    /// SAP client / MANDT column value on every table.
1943    #[serde(default = "default_sap_client")]
1944    pub client: String,
1945    /// Leading ledger for ACDOCA rows (0L for S/4HANA default).
1946    #[serde(default = "default_sap_ledger")]
1947    pub ledger: String,
1948    /// Source system identifier — written to ACDOCA.AWSYS so downstream
1949    /// consumers can distinguish synthetic rows from production ones.
1950    #[serde(default = "default_sap_source_system")]
1951    pub source_system: String,
1952    /// Local currency (WAERS / RWCUR).
1953    #[serde(default = "default_sap_currency")]
1954    pub local_currency: String,
1955    /// Optional group / consolidation currency (triggers the HSL / RHCUR columns).
1956    #[serde(default, skip_serializing_if = "Option::is_none")]
1957    pub group_currency: Option<String>,
1958    /// Which SAP tables to export. Empty = default set (bkpf, bseg, acdoca).
1959    #[serde(default)]
1960    pub tables: Vec<String>,
1961    /// Include ZSIM_* extension columns on ACDOCA rows.
1962    #[serde(default = "default_true")]
1963    pub include_extension_fields: bool,
1964    /// Export dialect — `classic` (R/3 / BODS) or `hana` (S/4HANA CDS).
1965    #[serde(default)]
1966    pub dialect: SapDialectSetting,
1967    /// Legacy flag, retained for backward compatibility. Has no effect
1968    /// when `dialect = hana`.
1969    #[serde(default = "default_true")]
1970    pub use_sap_date_format: bool,
1971}
1972
1973impl Default for SapExportSettings {
1974    fn default() -> Self {
1975        Self {
1976            client: default_sap_client(),
1977            ledger: default_sap_ledger(),
1978            source_system: default_sap_source_system(),
1979            local_currency: default_sap_currency(),
1980            group_currency: None,
1981            tables: Vec::new(),
1982            include_extension_fields: true,
1983            dialect: SapDialectSetting::default(),
1984            use_sap_date_format: true,
1985        }
1986    }
1987}
1988
1989fn default_sap_client() -> String {
1990    "100".to_string()
1991}
1992fn default_sap_ledger() -> String {
1993    "0L".to_string()
1994}
1995fn default_sap_source_system() -> String {
1996    "SYNTH".to_string()
1997}
1998fn default_sap_currency() -> String {
1999    "USD".to_string()
2000}
2001
2002/// SAP export dialect (wire form — `datasynth_output::SapDialect` is the
2003/// runtime form).
2004#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
2005#[serde(rename_all = "snake_case")]
2006pub enum SapDialectSetting {
2007    /// Legacy R/3 / BODS-compatible CSV (default).
2008    #[default]
2009    Classic,
2010    /// S/4HANA CDS dialect (semicolon + UTF-8 BOM + decimal comma + ISO dates).
2011    Hana,
2012}
2013
2014/// SAF-T export settings (v4.3.1).
2015#[derive(Debug, Clone, Serialize, Deserialize)]
2016pub struct SaftExportSettings {
2017    /// ISO-ish two-letter code: `pt` / `pl` / `ro` / `no` / `lu`.
2018    /// Defaults to `pt` (Portugal, most mature variant).
2019    #[serde(default = "default_saft_jurisdiction")]
2020    pub jurisdiction: String,
2021    /// Company tax registration number / VAT ID / TIN used in the
2022    /// `Header.TaxRegistrationNumber` element. Falls back to
2023    /// `"Desconhecido"` (Portuguese for "unknown") when empty.
2024    #[serde(default)]
2025    pub company_tax_id: String,
2026    /// Optional override for the company name used in the Header.
2027    /// When empty, the first configured company's `name` is used.
2028    #[serde(default)]
2029    pub company_name: String,
2030}
2031
2032impl Default for SaftExportSettings {
2033    fn default() -> Self {
2034        Self {
2035            jurisdiction: default_saft_jurisdiction(),
2036            company_tax_id: String::new(),
2037            company_name: String::new(),
2038        }
2039    }
2040}
2041
2042fn default_saft_jurisdiction() -> String {
2043    "pt".to_string()
2044}
2045
2046fn default_formats() -> Vec<FileFormat> {
2047    vec![FileFormat::Parquet]
2048}
2049fn default_batch_size() -> usize {
2050    100_000
2051}
2052
2053/// Custom deserializer for `formats` that accepts either a single
2054/// `FileFormat` (e.g. `"json"` for SDK `exportFormat: "json"`) or a
2055/// vector (e.g. `["json", "csv"]`). Without this shim an SDK config
2056/// with `exportFormat: "json"` would fail to parse (serde expects a
2057/// sequence for a `Vec` field) and silently fall through to defaults.
2058fn one_or_many_formats<'de, D>(deserializer: D) -> Result<Vec<FileFormat>, D::Error>
2059where
2060    D: serde::Deserializer<'de>,
2061{
2062    #[derive(Deserialize)]
2063    #[serde(untagged)]
2064    enum OneOrMany {
2065        One(FileFormat),
2066        Many(Vec<FileFormat>),
2067    }
2068    match OneOrMany::deserialize(deserializer)? {
2069        OneOrMany::One(f) => Ok(vec![f]),
2070        OneOrMany::Many(v) => Ok(v),
2071    }
2072}
2073
2074impl Default for OutputConfig {
2075    fn default() -> Self {
2076        Self {
2077            mode: OutputMode::FlatFile,
2078            output_directory: PathBuf::from("./output"),
2079            formats: default_formats(),
2080            compression: CompressionConfig::default(),
2081            batch_size: default_batch_size(),
2082            include_acdoca: true,
2083            include_bseg: false,
2084            partition_by_period: true,
2085            partition_by_company: false,
2086            numeric_mode: NumericMode::default(),
2087            export_layout: ExportLayout::default(),
2088            sap: SapExportSettings::default(),
2089            saft: SaftExportSettings::default(),
2090        }
2091    }
2092}
2093
2094/// Numeric serialization mode for JSON decimal fields.
2095#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
2096#[serde(rename_all = "snake_case")]
2097pub enum NumericMode {
2098    /// Decimals as JSON strings (e.g. `"1729237.30"`). Preserves full precision.
2099    #[default]
2100    String,
2101    /// Decimals as JSON numbers (e.g. `1729237.30`). Friendlier for pandas/analytics.
2102    Native,
2103}
2104
2105/// JSON export layout for nested structures (journal entries, document flows).
2106#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
2107#[serde(rename_all = "snake_case")]
2108pub enum ExportLayout {
2109    /// Nested structure: `{"header": {...}, "lines": [...]}`. Natural ERP format.
2110    #[default]
2111    Nested,
2112    /// Flat structure: header fields repeated on every line. Analytics-friendly.
2113    Flat,
2114}
2115
2116/// Output mode.
2117#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2118#[serde(rename_all = "snake_case")]
2119pub enum OutputMode {
2120    /// Stream records as generated
2121    Streaming,
2122    /// Write to flat files
2123    #[default]
2124    FlatFile,
2125    /// Both streaming and flat file
2126    Both,
2127}
2128
2129/// Supported file formats.
2130#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
2131#[serde(rename_all = "snake_case")]
2132pub enum FileFormat {
2133    Csv,
2134    Parquet,
2135    Json,
2136    JsonLines,
2137}
2138
2139/// Compression configuration.
2140#[derive(Debug, Clone, Serialize, Deserialize)]
2141pub struct CompressionConfig {
2142    /// Enable compression
2143    #[serde(default = "default_true")]
2144    pub enabled: bool,
2145    /// Compression algorithm
2146    #[serde(default)]
2147    pub algorithm: CompressionAlgorithm,
2148    /// Compression level (1-9)
2149    #[serde(default = "default_compression_level")]
2150    pub level: u8,
2151}
2152
2153fn default_compression_level() -> u8 {
2154    3
2155}
2156
2157impl Default for CompressionConfig {
2158    fn default() -> Self {
2159        Self {
2160            enabled: true,
2161            algorithm: CompressionAlgorithm::default(),
2162            level: default_compression_level(),
2163        }
2164    }
2165}
2166
2167/// Compression algorithms.
2168#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2169#[serde(rename_all = "snake_case")]
2170pub enum CompressionAlgorithm {
2171    Gzip,
2172    #[default]
2173    Zstd,
2174    Lz4,
2175    Snappy,
2176}
2177
2178/// Fraud simulation configuration.
2179///
2180/// ## Document-level vs. line-level fraud
2181///
2182/// `fraud_rate` applies to individual journal-entry lines (line-level).
2183/// `document_fraud_rate` (optional) applies to source documents
2184/// (purchase orders, vendor invoices, customer invoices, payments), and when
2185/// `propagate_to_lines` is true, every JE derived from a fraudulent document
2186/// also gets `is_fraud = true`. This lets users express either:
2187///
2188///  * pure line-level fraud (`document_fraud_rate = None`): legacy behaviour;
2189///  * pure document-level fraud (`fraud_rate ≈ 0` and `document_fraud_rate` set):
2190///    fraud rings expressed at document granularity — realistic for PO/invoice
2191///    fraud schemes where one fraudulent document spawns multiple derived JEs;
2192///  * hybrid (both set): document-level scheme fraud plus unrelated line-level
2193///    slip-ups.
2194///
2195/// `propagate_to_document` does the inverse: when a JE is tagged as fraud by
2196/// the anomaly injector, its source document is also marked fraudulent.
2197#[derive(Debug, Clone, Serialize, Deserialize)]
2198pub struct FraudConfig {
2199    /// Enable fraud scenario generation
2200    #[serde(default)]
2201    pub enabled: bool,
2202    /// Line-level fraud rate: fraction of individual JE lines flagged as fraud (0.0 to 1.0).
2203    ///
2204    /// # Effective line-level prevalence
2205    ///
2206    /// If `document_fraud_rate = Some(d)` and `propagate_to_lines = true`,
2207    /// the observed line-level fraud prevalence is roughly:
2208    ///
2209    /// > `P(line is_fraud) ≈ fraud_rate + d × avg_lines_per_fraud_doc / total_lines`
2210    ///
2211    /// For a typical retail job (avg 3 lines per document, ~30 % of lines
2212    /// come from doc-flow-derived JEs) the combined rate lands near:
2213    ///
2214    /// > `fraud_rate + 0.3 × d`
2215    ///
2216    /// so setting `fraud_rate=0.02, document_fraud_rate=0.05, propagate_to_lines=true`
2217    /// produces ~3.5 % line-level fraud, not 2 %. To target a specific
2218    /// line-level prevalence X, choose `fraud_rate = X - 0.3 × d`.
2219    #[serde(default = "default_fraud_rate", alias = "fraudRate")]
2220    pub fraud_rate: f64,
2221    /// Document-level fraud rate: fraction of source documents (PO, vendor
2222    /// invoice, customer invoice, payment) flagged as fraud. `None` disables
2223    /// document-level injection; `Some(r)` marks ~r × document-count as fraud
2224    /// independently of the line-level rate.
2225    ///
2226    /// v4.4.2+ default: `Some(0.01)` — the SDK team reported
2227    /// `is_fraud_propagated: 0/72` regressed from `12/33` in 3.1.1 because
2228    /// the default had silently become None. A 1% document-fraud default
2229    /// restores the propagation signal (~0.3% of JE headers carry
2230    /// `is_fraud_propagated = true`) without meaningfully changing the
2231    /// line-level fraud prevalence. Set to `Some(0.0)` or `null` in your
2232    /// YAML to explicitly disable document-level injection.
2233    #[serde(default = "default_document_fraud_rate", alias = "documentFraudRate")]
2234    pub document_fraud_rate: Option<f64>,
2235    /// When true, flagging a document as fraudulent cascades `is_fraud = true`
2236    /// and `fraud_type` to every journal entry derived from that document,
2237    /// and records `fraud_source_document_id` on the JE header.
2238    /// Default: `true`.
2239    #[serde(default = "default_true", alias = "propagateToLines")]
2240    pub propagate_to_lines: bool,
2241    /// When true, tagging a JE as fraud via line-level anomaly injection also
2242    /// marks the JE's source document as fraudulent (if it can be resolved).
2243    /// Default: `true`.
2244    #[serde(default = "default_true", alias = "propagateToDocument")]
2245    pub propagate_to_document: bool,
2246    /// Fraud type distribution
2247    #[serde(default)]
2248    pub fraud_type_distribution: FraudTypeDistribution,
2249    /// Enable fraud clustering
2250    #[serde(default)]
2251    pub clustering_enabled: bool,
2252    /// Clustering factor
2253    #[serde(default = "default_clustering_factor")]
2254    pub clustering_factor: f64,
2255    /// Approval thresholds for threshold-adjacent fraud pattern
2256    #[serde(default = "default_approval_thresholds")]
2257    pub approval_thresholds: Vec<f64>,
2258    /// v5.30 B3 (#153) — per-business-process fraud rate overrides.
2259    ///
2260    /// Keys are business-process slugs (`"P2P"`, `"O2C"`, `"R2R"`, `"H2R"`,
2261    /// `"A2R"`); values are line-level fraud rates that **override** the
2262    /// global `fraud_rate` when a JE's selected business process matches a
2263    /// key. Unmatched processes fall back to `fraud_rate`.
2264    ///
2265    /// When empty (the default), per-process rates are disabled and every
2266    /// JE uses the global `fraud_rate` — preserving v5.29 byte-identical
2267    /// output for configs that don't opt in.
2268    ///
2269    /// # Why
2270    ///
2271    /// Real audit data shows process-specific fraud signatures (R2R
2272    /// manual-close and period-end accruals carry higher fraud
2273    /// concentration than P2P invoice-processing). The v5.29 global
2274    /// `fraud_rate` flattens this signal, leaving the GNN fraud detector
2275    /// at a uniform per-process AUC band (0.914-0.925 in the v5.29 retrain).
2276    ///
2277    /// # Example
2278    ///
2279    /// ```yaml
2280    /// fraud:
2281    ///   fraud_rate: 0.02         # baseline for unmapped processes
2282    ///   per_process_rates:
2283    ///     R2R: 0.06              # 3× baseline — period-close hot spot
2284    ///     P2P: 0.04              # 2× baseline — invoice fraud
2285    ///     O2C: 0.025             # 1.25× baseline — revenue manipulation
2286    ///     H2R: 0.015             # below baseline — payroll
2287    ///     A2R: 0.020             # baseline — asset accounting
2288    /// ```
2289    ///
2290    /// Aggregate effective line-level prevalence depends on the
2291    /// `business_processes` weights mix; calibrate to a target X by
2292    /// solving for the weighted average. For default v5.29 weights
2293    /// (P2P 0.35, O2C 0.35, R2R 0.20, H2R 0.05, A2R 0.05) the
2294    /// example above yields ~0.0335 line-level fraud.
2295    #[serde(default, alias = "perProcessRates")]
2296    pub per_process_rates: std::collections::HashMap<String, f64>,
2297    /// Behavioral-bias signatures stamped on fraud-labelled entries (weekend / round-dollar /
2298    /// off-hours / post-close). These are the canonical forensic signals a per-JE detector keys
2299    /// on; lowering them yields *subtler* fraud, raising them yields more obviously-fraudulent
2300    /// entries. Previously hardcoded — exposing them lets generators/experiments tune fraud
2301    /// detectability (the adversary's lever in co-training). See [`FraudBiasConfig`].
2302    #[serde(default, alias = "behavioralBias")]
2303    pub bias: FraudBiasConfig,
2304    /// Persistent fraud *campaigns* — counterparty-pinned, relocation-structured fraud that recurs
2305    /// across periods: a beneficiary (counterparty) account stays fixed while the booking leg
2306    /// rotates period-to-period. Turns the default i.i.d.-in-time fraud DGP into a campaign
2307    /// simulator so cross-period / relational / memory detectors can be benchmarked (FINDINGS
2308    /// §33/§36/§40). Off by default → byte-identical output. See [`FraudCampaignConfig`].
2309    #[serde(default, alias = "fraudCampaigns")]
2310    pub campaigns: FraudCampaignConfig,
2311    /// Fraud *difficulty* preset — a single knob spanning loud-forensic → residual-faint that
2312    /// co-sets the behavioral-bias signatures (the validated subtlety lever, FINDINGS §43/§44).
2313    /// `Standard` (default) uses the explicit `bias` field, preserving byte-identical output; the
2314    /// other levels override it. Gives benchmark builders a controllable hardness axis. See
2315    /// [`FraudDifficulty`] and [`FraudConfig::effective_bias`].
2316    #[serde(default)]
2317    pub difficulty: FraudDifficulty,
2318}
2319
2320/// Fraud difficulty preset — a single knob over fraud detectability, resolved to a
2321/// [`FraudBiasConfig`] by [`FraudConfig::effective_bias`]. Spans the co-training subtlety axis
2322/// (FINDINGS §43/§44): loud forensic signatures at one end, residual-faint (bias-off) at the other.
2323#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2324#[serde(rename_all = "snake_case")]
2325pub enum FraudDifficulty {
2326    /// Use the explicit `bias` config as-is (back-compat; byte-identical default).
2327    #[default]
2328    Standard,
2329    /// Loud forensic signatures — the easiest fraud to detect.
2330    Forensic,
2331    /// Faint signatures — harder, fewer forensic tells.
2332    Subtle,
2333    /// Behavioral bias off entirely — residual-faint fraud; the hardest, label-free-defeating case
2334    /// that motivates the supervised / co-training arm (FINDINGS §44).
2335    Adversarial,
2336}
2337
2338impl FraudConfig {
2339    /// The behavioral-bias config actually applied, after resolving [`FraudConfig::difficulty`].
2340    /// `Standard` returns the explicit `bias` field unchanged (byte-identical); the other presets
2341    /// override it, spanning loud-forensic → residual-faint.
2342    pub fn effective_bias(&self) -> FraudBiasConfig {
2343        match self.difficulty {
2344            FraudDifficulty::Standard => self.bias,
2345            FraudDifficulty::Forensic => FraudBiasConfig {
2346                enabled: true,
2347                weekend_bias: 0.55,
2348                round_dollar_bias: 0.65,
2349                off_hours_bias: 0.55,
2350                post_close_bias: 0.45,
2351            },
2352            FraudDifficulty::Subtle => FraudBiasConfig {
2353                enabled: true,
2354                weekend_bias: 0.10,
2355                round_dollar_bias: 0.10,
2356                off_hours_bias: 0.10,
2357                post_close_bias: 0.05,
2358            },
2359            FraudDifficulty::Adversarial => FraudBiasConfig {
2360                enabled: false,
2361                weekend_bias: 0.0,
2362                round_dollar_bias: 0.0,
2363                off_hours_bias: 0.0,
2364                post_close_bias: 0.0,
2365            },
2366        }
2367    }
2368}
2369
2370/// Persistent fraud-campaign configuration. A campaign restructures a handful of journal entries
2371/// per period into a counterparty-pinned, relocation-structured scheme: the beneficiary account
2372/// stays fixed across the campaign (the relocation-invariant handle, FINDINGS §36/§40) while the
2373/// booking leg rotates from a pool every `rotate_every_periods`. Off by default.
2374#[derive(Debug, Clone, Serialize, Deserialize)]
2375pub struct FraudCampaignConfig {
2376    /// Master switch — when `false` (default), no campaigns are planned and output is byte-identical.
2377    #[serde(default)]
2378    pub enabled: bool,
2379    /// Number of distinct persistent campaigns to plant.
2380    #[serde(default = "default_campaign_count")]
2381    pub count: u32,
2382    /// Fraud journal entries restructured into each campaign per period.
2383    #[serde(default = "default_campaign_per_period", alias = "perPeriodCount")]
2384    pub per_period_count: u32,
2385    /// Size of the rotating booking-leg account pool (the relocating leg).
2386    #[serde(default = "default_campaign_booking_pool", alias = "bookingLegPool")]
2387    pub booking_leg_pool: u32,
2388    /// Relocate the booking leg every N periods (1 = relocate every period).
2389    #[serde(
2390        default = "default_campaign_rotate_every",
2391        alias = "rotateEveryPeriods"
2392    )]
2393    pub rotate_every_periods: u32,
2394    /// Length of a campaign period in days (the JE timeline is bucketed by this).
2395    #[serde(default = "default_campaign_period_days", alias = "periodDays")]
2396    pub period_days: u32,
2397    /// Synthetic prior-year carry-forward register (the confirmation channel, FINDINGS §40/§59).
2398    /// When enabled, the engine emits a partial/noisy register of confirmed prior-period campaign
2399    /// findings — DataSynth knows the planted truth, so it can produce the `PRIOR_YEAR` confirmed
2400    /// findings an audit team would carry forward. Off by default. See [`CarryForwardConfig`].
2401    #[serde(default, alias = "carryForward")]
2402    pub carry_forward: CarryForwardConfig,
2403}
2404
2405/// Synthetic carry-forward (prior-year confirmed-findings) register config. The register confirms a
2406/// `confirmation_rate` fraction of the true campaign counterparties (as a real audit catches only
2407/// some prior fraud) and adds a `false_positive_rate` of legitimate counterparties (auditor errors).
2408/// The memory arm consumes the register; recall scales ~linearly with the confirmation rate (§59).
2409#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
2410pub struct CarryForwardConfig {
2411    /// Master switch — off by default (no register emitted; byte-identical).
2412    #[serde(default)]
2413    pub enabled: bool,
2414    /// Fraction of true campaign counterparties confirmed in the prior period (0..1).
2415    #[serde(default = "default_confirmation_rate", alias = "confirmationRate")]
2416    pub confirmation_rate: f64,
2417    /// Fraction (of the true-finding count) of legitimate counterparties wrongly confirmed (0..1).
2418    #[serde(default = "default_false_positive_rate", alias = "falsePositiveRate")]
2419    pub false_positive_rate: f64,
2420}
2421
2422fn default_confirmation_rate() -> f64 {
2423    0.6
2424}
2425fn default_false_positive_rate() -> f64 {
2426    0.05
2427}
2428
2429impl Default for CarryForwardConfig {
2430    fn default() -> Self {
2431        Self {
2432            enabled: false,
2433            confirmation_rate: default_confirmation_rate(),
2434            false_positive_rate: default_false_positive_rate(),
2435        }
2436    }
2437}
2438
2439fn default_campaign_count() -> u32 {
2440    1
2441}
2442fn default_campaign_per_period() -> u32 {
2443    2
2444}
2445fn default_campaign_booking_pool() -> u32 {
2446    6
2447}
2448fn default_campaign_rotate_every() -> u32 {
2449    1
2450}
2451fn default_campaign_period_days() -> u32 {
2452    30
2453}
2454
2455impl Default for FraudCampaignConfig {
2456    fn default() -> Self {
2457        Self {
2458            enabled: false,
2459            count: default_campaign_count(),
2460            per_period_count: default_campaign_per_period(),
2461            booking_leg_pool: default_campaign_booking_pool(),
2462            rotate_every_periods: default_campaign_rotate_every(),
2463            period_days: default_campaign_period_days(),
2464            carry_forward: CarryForwardConfig::default(),
2465        }
2466    }
2467}
2468
2469impl FraudCampaignConfig {
2470    /// Whether campaigns should actually be planned (enabled with a sane, non-degenerate spec).
2471    pub fn is_active(&self) -> bool {
2472        self.enabled
2473            && self.count >= 1
2474            && self.per_period_count >= 1
2475            && self.booking_leg_pool >= 1
2476            && self.rotate_every_periods >= 1
2477            && self.period_days >= 1
2478    }
2479}
2480
2481/// Probabilities for the four canonical fraud behavioral-bias signatures. Defaults match the
2482/// engine's historical hardcoded values (`datasynth_core::fraud_bias`), so output is byte-identical
2483/// unless a config overrides them.
2484#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
2485pub struct FraudBiasConfig {
2486    /// Master switch — when `false`, no behavioral bias is applied to fraud entries.
2487    #[serde(default = "default_true")]
2488    pub enabled: bool,
2489    /// P(fraud entry's posting_date shifted to a weekend). Default 0.30.
2490    #[serde(default = "default_weekend_bias", alias = "weekendBias")]
2491    pub weekend_bias: f64,
2492    /// P(fraud entry's amount snapped to a round target $1K/$5K/…/$100K, balance preserved). Default 0.40.
2493    #[serde(default = "default_round_dollar_bias", alias = "roundDollarBias")]
2494    pub round_dollar_bias: f64,
2495    /// P(fraud entry's created_at shifted to off-hours 22:00–05:59 UTC). Default 0.35.
2496    #[serde(default = "default_off_hours_bias", alias = "offHoursBias")]
2497    pub off_hours_bias: f64,
2498    /// P(fraud entry marked is_post_close). Default 0.25.
2499    #[serde(default = "default_post_close_bias", alias = "postCloseBias")]
2500    pub post_close_bias: f64,
2501}
2502
2503fn default_weekend_bias() -> f64 {
2504    0.30
2505}
2506fn default_round_dollar_bias() -> f64 {
2507    0.40
2508}
2509fn default_off_hours_bias() -> f64 {
2510    0.35
2511}
2512fn default_post_close_bias() -> f64 {
2513    0.25
2514}
2515
2516impl Default for FraudBiasConfig {
2517    fn default() -> Self {
2518        Self {
2519            enabled: true,
2520            weekend_bias: default_weekend_bias(),
2521            round_dollar_bias: default_round_dollar_bias(),
2522            off_hours_bias: default_off_hours_bias(),
2523            post_close_bias: default_post_close_bias(),
2524        }
2525    }
2526}
2527
2528impl FraudBiasConfig {
2529    /// Map the YAML-facing config to the core engine's bias config.
2530    pub fn to_core(&self) -> datasynth_core::fraud_bias::FraudBehavioralBiasConfig {
2531        datasynth_core::fraud_bias::FraudBehavioralBiasConfig {
2532            enabled: self.enabled,
2533            weekend_bias: self.weekend_bias,
2534            round_dollar_bias: self.round_dollar_bias,
2535            off_hours_bias: self.off_hours_bias,
2536            post_close_bias: self.post_close_bias,
2537        }
2538    }
2539}
2540
2541fn default_approval_thresholds() -> Vec<f64> {
2542    vec![1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0]
2543}
2544
2545fn default_fraud_rate() -> f64 {
2546    0.005
2547}
2548fn default_document_fraud_rate() -> Option<f64> {
2549    // v5.0.1: bumped 0.01 → 0.05 to deliver meaningful scheme-level
2550    // fraud propagation at typical line-level rates. The 1 % default
2551    // (set in v4.4.2 to restore `is_fraud_propagated > 0`) was too
2552    // conservative — at `fraud_rate = 0.08` it produced ~3.6 % observed
2553    // propagation against a 26.7 % target. The new 5 % default + the
2554    // additive formula `P(line is_fraud) ≈ fraud_rate + 0.3 × d` yields
2555    // ~9.5 % combined at fraud_rate=0.08 (closer to the spec target).
2556    // Set explicitly to `Some(0.0)` or `null` in YAML to disable, or to
2557    // a higher value (e.g. 0.20) for scheme-heavy fraud workloads.
2558    Some(0.05)
2559}
2560fn default_clustering_factor() -> f64 {
2561    3.0
2562}
2563
2564impl Default for FraudConfig {
2565    fn default() -> Self {
2566        Self {
2567            enabled: false,
2568            fraud_rate: default_fraud_rate(),
2569            document_fraud_rate: default_document_fraud_rate(),
2570            propagate_to_lines: true,
2571            propagate_to_document: true,
2572            fraud_type_distribution: FraudTypeDistribution::default(),
2573            clustering_enabled: false,
2574            clustering_factor: default_clustering_factor(),
2575            approval_thresholds: default_approval_thresholds(),
2576            per_process_rates: std::collections::HashMap::new(),
2577            bias: FraudBiasConfig::default(),
2578            campaigns: FraudCampaignConfig::default(),
2579            difficulty: FraudDifficulty::default(),
2580        }
2581    }
2582}
2583
2584/// Distribution of fraud types.
2585///
2586/// All fields default to `0.0` if absent from the YAML, so partial
2587/// distributions are accepted; the validator (`validate_sum_to_one`)
2588/// then enforces that the populated weights sum to `1.0 ± 0.01`.
2589#[derive(Debug, Clone, Serialize, Deserialize)]
2590#[serde(deny_unknown_fields)]
2591pub struct FraudTypeDistribution {
2592    #[serde(default)]
2593    pub suspense_account_abuse: f64,
2594    #[serde(default)]
2595    pub fictitious_transaction: f64,
2596    #[serde(default)]
2597    pub revenue_manipulation: f64,
2598    #[serde(default)]
2599    pub expense_capitalization: f64,
2600    #[serde(default)]
2601    pub split_transaction: f64,
2602    #[serde(default)]
2603    pub timing_anomaly: f64,
2604    #[serde(default)]
2605    pub unauthorized_access: f64,
2606    #[serde(default)]
2607    pub duplicate_payment: f64,
2608    /// Vendor kickback scheme.
2609    #[serde(default)]
2610    pub kickback_scheme: f64,
2611    /// Round-tripping funds through multiple entities or accounts.
2612    #[serde(default)]
2613    pub round_tripping: f64,
2614    /// Unauthorized customer/vendor discounts (sweethearting, side deals).
2615    #[serde(default)]
2616    pub unauthorized_discount: f64,
2617}
2618
2619impl Default for FraudTypeDistribution {
2620    fn default() -> Self {
2621        // Preserves the pre-extension default sum=1.0 over the original
2622        // eight fields.  The three additional fields (kickback_scheme,
2623        // round_tripping, unauthorized_discount) default to 0.0 so that
2624        // existing fraud packs / templates that explicitly enumerate the
2625        // original eight fields continue to merge to a 1.0 sum without
2626        // modification.  Users who want those fraud types must set them
2627        // explicitly (and rebalance the others).
2628        Self {
2629            suspense_account_abuse: 0.25,
2630            fictitious_transaction: 0.15,
2631            revenue_manipulation: 0.10,
2632            expense_capitalization: 0.10,
2633            split_transaction: 0.15,
2634            timing_anomaly: 0.10,
2635            unauthorized_access: 0.10,
2636            duplicate_payment: 0.05,
2637            kickback_scheme: 0.0,
2638            round_tripping: 0.0,
2639            unauthorized_discount: 0.0,
2640        }
2641    }
2642}
2643
2644/// Internal Controls System (ICS) configuration.
2645#[derive(Debug, Clone, Serialize, Deserialize)]
2646pub struct InternalControlsConfig {
2647    /// Enable internal controls system
2648    #[serde(default)]
2649    pub enabled: bool,
2650    /// Rate at which controls result in exceptions (0.0 - 1.0)
2651    #[serde(default = "default_exception_rate")]
2652    pub exception_rate: f64,
2653    /// Rate at which SoD violations occur (0.0 - 1.0)
2654    #[serde(default = "default_sod_violation_rate")]
2655    pub sod_violation_rate: f64,
2656    /// Export control master data to separate files
2657    #[serde(default = "default_true")]
2658    pub export_control_master_data: bool,
2659    /// SOX materiality threshold for marking transactions as SOX-relevant
2660    #[serde(default = "default_sox_materiality_threshold")]
2661    pub sox_materiality_threshold: f64,
2662    /// Enable COSO 2013 framework integration
2663    #[serde(default = "default_true")]
2664    pub coso_enabled: bool,
2665    /// Include entity-level controls in generation
2666    #[serde(default)]
2667    pub include_entity_level_controls: bool,
2668    /// Target maturity level for controls
2669    /// Valid values: "ad_hoc", "repeatable", "defined", "managed", "optimized", "mixed"
2670    #[serde(default = "default_target_maturity_level")]
2671    pub target_maturity_level: String,
2672}
2673
2674fn default_exception_rate() -> f64 {
2675    0.02
2676}
2677
2678fn default_sod_violation_rate() -> f64 {
2679    0.01
2680}
2681
2682fn default_sox_materiality_threshold() -> f64 {
2683    10000.0
2684}
2685
2686fn default_target_maturity_level() -> String {
2687    "mixed".to_string()
2688}
2689
2690impl Default for InternalControlsConfig {
2691    fn default() -> Self {
2692        Self {
2693            enabled: false,
2694            exception_rate: default_exception_rate(),
2695            sod_violation_rate: default_sod_violation_rate(),
2696            export_control_master_data: true,
2697            sox_materiality_threshold: default_sox_materiality_threshold(),
2698            coso_enabled: true,
2699            include_entity_level_controls: false,
2700            target_maturity_level: default_target_maturity_level(),
2701        }
2702    }
2703}
2704
2705/// Business process configuration.
2706#[derive(Debug, Clone, Serialize, Deserialize)]
2707pub struct BusinessProcessConfig {
2708    /// Order-to-Cash weight
2709    #[serde(default = "default_o2c")]
2710    pub o2c_weight: f64,
2711    /// Procure-to-Pay weight
2712    #[serde(default = "default_p2p")]
2713    pub p2p_weight: f64,
2714    /// Record-to-Report weight
2715    #[serde(default = "default_r2r")]
2716    pub r2r_weight: f64,
2717    /// Hire-to-Retire weight
2718    #[serde(default = "default_h2r")]
2719    pub h2r_weight: f64,
2720    /// Acquire-to-Retire weight
2721    #[serde(default = "default_a2r")]
2722    pub a2r_weight: f64,
2723}
2724
2725fn default_o2c() -> f64 {
2726    0.35
2727}
2728fn default_p2p() -> f64 {
2729    0.30
2730}
2731fn default_r2r() -> f64 {
2732    0.20
2733}
2734fn default_h2r() -> f64 {
2735    0.10
2736}
2737fn default_a2r() -> f64 {
2738    0.05
2739}
2740
2741impl Default for BusinessProcessConfig {
2742    fn default() -> Self {
2743        Self {
2744            o2c_weight: default_o2c(),
2745            p2p_weight: default_p2p(),
2746            r2r_weight: default_r2r(),
2747            h2r_weight: default_h2r(),
2748            a2r_weight: default_a2r(),
2749        }
2750    }
2751}
2752
2753/// User persona configuration.
2754#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2755pub struct UserPersonaConfig {
2756    /// Distribution of user personas
2757    #[serde(default)]
2758    pub persona_distribution: PersonaDistribution,
2759    /// Users per persona type
2760    #[serde(default)]
2761    pub users_per_persona: UsersPerPersona,
2762}
2763
2764/// Distribution of user personas for transaction generation.
2765#[derive(Debug, Clone, Serialize, Deserialize)]
2766pub struct PersonaDistribution {
2767    pub junior_accountant: f64,
2768    pub senior_accountant: f64,
2769    pub controller: f64,
2770    pub manager: f64,
2771    pub automated_system: f64,
2772}
2773
2774impl Default for PersonaDistribution {
2775    fn default() -> Self {
2776        Self {
2777            junior_accountant: 0.15,
2778            senior_accountant: 0.15,
2779            controller: 0.05,
2780            manager: 0.05,
2781            automated_system: 0.60,
2782        }
2783    }
2784}
2785
2786/// Number of users per persona type.
2787#[derive(Debug, Clone, Serialize, Deserialize)]
2788pub struct UsersPerPersona {
2789    pub junior_accountant: usize,
2790    pub senior_accountant: usize,
2791    pub controller: usize,
2792    pub manager: usize,
2793    pub automated_system: usize,
2794}
2795
2796impl Default for UsersPerPersona {
2797    fn default() -> Self {
2798        Self {
2799            junior_accountant: 10,
2800            senior_accountant: 5,
2801            controller: 2,
2802            manager: 3,
2803            automated_system: 20,
2804        }
2805    }
2806}
2807
2808/// Template configuration for realistic data generation.
2809///
2810/// # User-supplied template packs (v3.2.0+)
2811///
2812/// Set `path` to a directory (or single YAML/JSON file) to override or
2813/// extend the embedded default pools for vendor names, customer names,
2814/// material/asset descriptions, audit findings, bank names, and
2815/// department names. When `path` is `None` (the default), generators
2816/// use the compiled-in pools and output is byte-identical to v3.1.2.
2817///
2818/// See `crates/datasynth-core/src/templates/loader.rs::TemplateData`
2819/// for the full YAML schema. Use `datasynth-data templates export` to
2820/// dump the defaults as a starter pack.
2821#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2822pub struct TemplateConfig {
2823    /// Name generation settings
2824    #[serde(default)]
2825    pub names: NameTemplateConfig,
2826    /// Description generation settings
2827    #[serde(default)]
2828    pub descriptions: DescriptionTemplateConfig,
2829    /// Reference number settings
2830    #[serde(default)]
2831    pub references: ReferenceTemplateConfig,
2832    /// Optional path to a user-supplied template file or directory.
2833    /// When set, entries from the file(s) augment or replace the
2834    /// embedded defaults according to `merge_strategy`.
2835    ///
2836    /// `None` (default) = use embedded pools only (byte-identical to v3.1.2).
2837    #[serde(default, alias = "templatesPath")]
2838    pub path: Option<std::path::PathBuf>,
2839    /// How file-based entries combine with embedded defaults.
2840    ///
2841    /// - `extend` (default): append file entries to embedded pools,
2842    ///   de-duplicating. Safe for incremental overlays.
2843    /// - `replace`: discard embedded pools entirely and use only file
2844    ///   entries. Requires a fully-populated template file.
2845    /// - `merge_prefer_file`: replace individual categories when present
2846    ///   in the file; keep embedded for absent categories.
2847    #[serde(default, alias = "mergeStrategy")]
2848    pub merge_strategy: TemplateMergeStrategy,
2849}
2850
2851/// Strategy for combining user-supplied template files with embedded defaults.
2852#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2853#[serde(rename_all = "snake_case")]
2854pub enum TemplateMergeStrategy {
2855    /// Append file entries to embedded pools (default).
2856    #[default]
2857    Extend,
2858    /// Replace embedded pools entirely with file entries.
2859    Replace,
2860    /// Replace individual categories when present in file; keep embedded for absent ones.
2861    MergePreferFile,
2862}
2863
2864/// Name template configuration.
2865#[derive(Debug, Clone, Serialize, Deserialize)]
2866pub struct NameTemplateConfig {
2867    /// Distribution of name cultures
2868    #[serde(default)]
2869    pub culture_distribution: CultureDistribution,
2870    /// Email domain for generated users
2871    #[serde(default = "default_email_domain")]
2872    pub email_domain: String,
2873    /// Generate realistic display names
2874    #[serde(default = "default_true")]
2875    pub generate_realistic_names: bool,
2876}
2877
2878fn default_email_domain() -> String {
2879    "company.com".to_string()
2880}
2881
2882impl Default for NameTemplateConfig {
2883    fn default() -> Self {
2884        Self {
2885            culture_distribution: CultureDistribution::default(),
2886            email_domain: default_email_domain(),
2887            generate_realistic_names: true,
2888        }
2889    }
2890}
2891
2892/// Distribution of name cultures for generation.
2893#[derive(Debug, Clone, Serialize, Deserialize)]
2894pub struct CultureDistribution {
2895    pub western_us: f64,
2896    pub hispanic: f64,
2897    pub german: f64,
2898    pub french: f64,
2899    pub chinese: f64,
2900    pub japanese: f64,
2901    pub indian: f64,
2902}
2903
2904impl Default for CultureDistribution {
2905    fn default() -> Self {
2906        Self {
2907            western_us: 0.40,
2908            hispanic: 0.20,
2909            german: 0.10,
2910            french: 0.05,
2911            chinese: 0.10,
2912            japanese: 0.05,
2913            indian: 0.10,
2914        }
2915    }
2916}
2917
2918/// Description template configuration.
2919#[derive(Debug, Clone, Serialize, Deserialize)]
2920pub struct DescriptionTemplateConfig {
2921    /// Generate header text for journal entries
2922    #[serde(default = "default_true")]
2923    pub generate_header_text: bool,
2924    /// Generate line text for journal entry lines
2925    #[serde(default = "default_true")]
2926    pub generate_line_text: bool,
2927}
2928
2929impl Default for DescriptionTemplateConfig {
2930    fn default() -> Self {
2931        Self {
2932            generate_header_text: true,
2933            generate_line_text: true,
2934        }
2935    }
2936}
2937
2938/// Reference number template configuration.
2939#[derive(Debug, Clone, Serialize, Deserialize)]
2940pub struct ReferenceTemplateConfig {
2941    /// Generate reference numbers
2942    #[serde(default = "default_true")]
2943    pub generate_references: bool,
2944    /// Invoice prefix
2945    #[serde(default = "default_invoice_prefix")]
2946    pub invoice_prefix: String,
2947    /// Purchase order prefix
2948    #[serde(default = "default_po_prefix")]
2949    pub po_prefix: String,
2950    /// Sales order prefix
2951    #[serde(default = "default_so_prefix")]
2952    pub so_prefix: String,
2953}
2954
2955fn default_invoice_prefix() -> String {
2956    "INV".to_string()
2957}
2958fn default_po_prefix() -> String {
2959    "PO".to_string()
2960}
2961fn default_so_prefix() -> String {
2962    "SO".to_string()
2963}
2964
2965impl Default for ReferenceTemplateConfig {
2966    fn default() -> Self {
2967        Self {
2968            generate_references: true,
2969            invoice_prefix: default_invoice_prefix(),
2970            po_prefix: default_po_prefix(),
2971            so_prefix: default_so_prefix(),
2972        }
2973    }
2974}
2975
2976/// Approval workflow configuration.
2977#[derive(Debug, Clone, Serialize, Deserialize)]
2978pub struct ApprovalConfig {
2979    /// Enable approval workflow generation
2980    #[serde(default)]
2981    pub enabled: bool,
2982    /// Threshold below which transactions are auto-approved
2983    #[serde(default = "default_auto_approve_threshold")]
2984    pub auto_approve_threshold: f64,
2985    /// Rate at which approvals are rejected (0.0 to 1.0)
2986    #[serde(default = "default_rejection_rate")]
2987    pub rejection_rate: f64,
2988    /// Rate at which approvals require revision (0.0 to 1.0)
2989    #[serde(default = "default_revision_rate")]
2990    pub revision_rate: f64,
2991    /// Average delay in hours for approval processing
2992    #[serde(default = "default_approval_delay_hours")]
2993    pub average_approval_delay_hours: f64,
2994    /// Approval chain thresholds
2995    #[serde(default)]
2996    pub thresholds: Vec<ApprovalThresholdConfig>,
2997}
2998
2999fn default_auto_approve_threshold() -> f64 {
3000    1000.0
3001}
3002fn default_rejection_rate() -> f64 {
3003    0.02
3004}
3005fn default_revision_rate() -> f64 {
3006    0.05
3007}
3008fn default_approval_delay_hours() -> f64 {
3009    4.0
3010}
3011
3012impl Default for ApprovalConfig {
3013    fn default() -> Self {
3014        Self {
3015            enabled: false,
3016            auto_approve_threshold: default_auto_approve_threshold(),
3017            rejection_rate: default_rejection_rate(),
3018            revision_rate: default_revision_rate(),
3019            average_approval_delay_hours: default_approval_delay_hours(),
3020            thresholds: vec![
3021                ApprovalThresholdConfig {
3022                    amount: 1000.0,
3023                    level: 1,
3024                    roles: vec!["senior_accountant".to_string()],
3025                },
3026                ApprovalThresholdConfig {
3027                    amount: 10000.0,
3028                    level: 2,
3029                    roles: vec!["senior_accountant".to_string(), "controller".to_string()],
3030                },
3031                ApprovalThresholdConfig {
3032                    amount: 100000.0,
3033                    level: 3,
3034                    roles: vec![
3035                        "senior_accountant".to_string(),
3036                        "controller".to_string(),
3037                        "manager".to_string(),
3038                    ],
3039                },
3040                ApprovalThresholdConfig {
3041                    amount: 500000.0,
3042                    level: 4,
3043                    roles: vec![
3044                        "senior_accountant".to_string(),
3045                        "controller".to_string(),
3046                        "manager".to_string(),
3047                        "executive".to_string(),
3048                    ],
3049                },
3050            ],
3051        }
3052    }
3053}
3054
3055/// Configuration for a single approval threshold.
3056#[derive(Debug, Clone, Serialize, Deserialize)]
3057pub struct ApprovalThresholdConfig {
3058    /// Amount threshold
3059    pub amount: f64,
3060    /// Approval level required
3061    pub level: u8,
3062    /// Roles that can approve at this level
3063    pub roles: Vec<String>,
3064}
3065
3066/// Department configuration.
3067#[derive(Debug, Clone, Serialize, Deserialize)]
3068pub struct DepartmentConfig {
3069    /// Enable department assignment
3070    #[serde(default)]
3071    pub enabled: bool,
3072    /// Multiplier for department headcounts
3073    #[serde(default = "default_headcount_multiplier")]
3074    pub headcount_multiplier: f64,
3075    /// Custom department definitions (optional)
3076    #[serde(default)]
3077    pub custom_departments: Vec<CustomDepartmentConfig>,
3078}
3079
3080fn default_headcount_multiplier() -> f64 {
3081    1.0
3082}
3083
3084impl Default for DepartmentConfig {
3085    fn default() -> Self {
3086        Self {
3087            enabled: false,
3088            headcount_multiplier: default_headcount_multiplier(),
3089            custom_departments: Vec::new(),
3090        }
3091    }
3092}
3093
3094/// Custom department definition.
3095#[derive(Debug, Clone, Serialize, Deserialize)]
3096pub struct CustomDepartmentConfig {
3097    /// Department code
3098    pub code: String,
3099    /// Department name
3100    pub name: String,
3101    /// Associated cost center
3102    #[serde(default)]
3103    pub cost_center: Option<String>,
3104    /// Primary business processes
3105    #[serde(default)]
3106    pub primary_processes: Vec<String>,
3107    /// Parent department code
3108    #[serde(default)]
3109    pub parent_code: Option<String>,
3110}
3111
3112// ============================================================================
3113// Master Data Configuration
3114// ============================================================================
3115
3116/// Master data generation configuration.
3117#[derive(Debug, Clone, Default, Serialize, Deserialize)]
3118pub struct MasterDataConfig {
3119    /// Vendor master data settings
3120    #[serde(default)]
3121    pub vendors: VendorMasterConfig,
3122    /// Customer master data settings
3123    #[serde(default)]
3124    pub customers: CustomerMasterConfig,
3125    /// Material master data settings
3126    #[serde(default)]
3127    pub materials: MaterialMasterConfig,
3128    /// Fixed asset master data settings
3129    #[serde(default)]
3130    pub fixed_assets: FixedAssetMasterConfig,
3131    /// Employee master data settings
3132    #[serde(default)]
3133    pub employees: EmployeeMasterConfig,
3134    /// Cost center master data settings
3135    #[serde(default)]
3136    pub cost_centers: CostCenterMasterConfig,
3137}
3138
3139/// Vendor master data configuration.
3140#[derive(Debug, Clone, Serialize, Deserialize)]
3141pub struct VendorMasterConfig {
3142    /// Number of vendors to generate
3143    #[serde(default = "default_vendor_count")]
3144    pub count: usize,
3145    /// Percentage of vendors that are intercompany (0.0 to 1.0)
3146    #[serde(default = "default_intercompany_percent")]
3147    pub intercompany_percent: f64,
3148    /// Payment terms distribution
3149    #[serde(default)]
3150    pub payment_terms_distribution: PaymentTermsDistribution,
3151    /// Vendor behavior distribution
3152    #[serde(default)]
3153    pub behavior_distribution: VendorBehaviorDistribution,
3154    /// Generate bank account details
3155    #[serde(default = "default_true")]
3156    pub generate_bank_accounts: bool,
3157    /// Generate tax IDs
3158    #[serde(default = "default_true")]
3159    pub generate_tax_ids: bool,
3160}
3161
3162fn default_vendor_count() -> usize {
3163    500
3164}
3165
3166fn default_intercompany_percent() -> f64 {
3167    0.05
3168}
3169
3170impl Default for VendorMasterConfig {
3171    fn default() -> Self {
3172        Self {
3173            count: default_vendor_count(),
3174            intercompany_percent: default_intercompany_percent(),
3175            payment_terms_distribution: PaymentTermsDistribution::default(),
3176            behavior_distribution: VendorBehaviorDistribution::default(),
3177            generate_bank_accounts: true,
3178            generate_tax_ids: true,
3179        }
3180    }
3181}
3182
3183/// Payment terms distribution for vendors.
3184#[derive(Debug, Clone, Serialize, Deserialize)]
3185pub struct PaymentTermsDistribution {
3186    /// Net 30 days
3187    pub net_30: f64,
3188    /// Net 60 days
3189    pub net_60: f64,
3190    /// Net 90 days
3191    pub net_90: f64,
3192    /// 2% 10 Net 30 (early payment discount)
3193    pub two_ten_net_30: f64,
3194    /// Due on receipt
3195    pub due_on_receipt: f64,
3196    /// End of month
3197    pub end_of_month: f64,
3198}
3199
3200impl Default for PaymentTermsDistribution {
3201    fn default() -> Self {
3202        Self {
3203            net_30: 0.40,
3204            net_60: 0.20,
3205            net_90: 0.10,
3206            two_ten_net_30: 0.15,
3207            due_on_receipt: 0.05,
3208            end_of_month: 0.10,
3209        }
3210    }
3211}
3212
3213/// Vendor behavior distribution.
3214///
3215/// All fields default to `0.0` if absent from the YAML, so partial
3216/// distributions are accepted; the validator (`validate_sum_to_one`)
3217/// then enforces that the populated weights sum to `1.0 ± 0.01`.
3218#[derive(Debug, Clone, Serialize, Deserialize)]
3219#[serde(deny_unknown_fields)]
3220pub struct VendorBehaviorDistribution {
3221    /// Reliable vendors (consistent delivery, quality)
3222    #[serde(default)]
3223    pub reliable: f64,
3224    /// Sometimes late vendors
3225    #[serde(default)]
3226    pub sometimes_late: f64,
3227    /// Inconsistent quality vendors
3228    #[serde(default)]
3229    pub inconsistent_quality: f64,
3230    /// Premium vendors (high quality, premium pricing)
3231    #[serde(default)]
3232    pub premium: f64,
3233    /// Budget vendors (lower quality, lower pricing)
3234    #[serde(default)]
3235    pub budget: f64,
3236    /// Erratic vendors (variable behavior, unpredictable performance)
3237    #[serde(default)]
3238    pub erratic: f64,
3239    /// Problematic vendors (frequent issues, high risk for fraud scenarios)
3240    #[serde(default)]
3241    pub problematic: f64,
3242}
3243
3244impl Default for VendorBehaviorDistribution {
3245    fn default() -> Self {
3246        // Preserves the pre-extension default sum=1.0 over the original
3247        // five fields.  `erratic` and `problematic` default to 0.0 so
3248        // that existing configs/packs continue to merge to a 1.0 sum
3249        // without modification.
3250        Self {
3251            reliable: 0.50,
3252            sometimes_late: 0.20,
3253            inconsistent_quality: 0.10,
3254            premium: 0.10,
3255            budget: 0.10,
3256            erratic: 0.0,
3257            problematic: 0.0,
3258        }
3259    }
3260}
3261
3262/// Customer master data configuration.
3263#[derive(Debug, Clone, Serialize, Deserialize)]
3264pub struct CustomerMasterConfig {
3265    /// Number of customers to generate
3266    #[serde(default = "default_customer_count")]
3267    pub count: usize,
3268    /// Percentage of customers that are intercompany (0.0 to 1.0)
3269    #[serde(default = "default_intercompany_percent")]
3270    pub intercompany_percent: f64,
3271    /// Credit rating distribution
3272    #[serde(default)]
3273    pub credit_rating_distribution: CreditRatingDistribution,
3274    /// Payment behavior distribution
3275    #[serde(default)]
3276    pub payment_behavior_distribution: PaymentBehaviorDistribution,
3277    /// Generate credit limits based on rating
3278    #[serde(default = "default_true")]
3279    pub generate_credit_limits: bool,
3280}
3281
3282fn default_customer_count() -> usize {
3283    2000
3284}
3285
3286impl Default for CustomerMasterConfig {
3287    fn default() -> Self {
3288        Self {
3289            count: default_customer_count(),
3290            intercompany_percent: default_intercompany_percent(),
3291            credit_rating_distribution: CreditRatingDistribution::default(),
3292            payment_behavior_distribution: PaymentBehaviorDistribution::default(),
3293            generate_credit_limits: true,
3294        }
3295    }
3296}
3297
3298/// Credit rating distribution for customers.
3299///
3300/// Two parallel vocabularies are accepted:
3301///   * Bond-grade tiers: `aaa`, `aa`, `a`, `bbb`, `bb`, `b`, `below_b`
3302///   * Plain-English tiers: `excellent`, `good`, `fair`, `poor`
3303///
3304/// All fields default to `0.0` if absent; mix and match as needed.
3305/// The validator enforces that the populated weights sum to `1.0`.
3306#[derive(Debug, Clone, Serialize, Deserialize)]
3307#[serde(deny_unknown_fields)]
3308pub struct CreditRatingDistribution {
3309    /// AAA rating
3310    #[serde(default)]
3311    pub aaa: f64,
3312    /// AA rating
3313    #[serde(default)]
3314    pub aa: f64,
3315    /// A rating
3316    #[serde(default)]
3317    pub a: f64,
3318    /// BBB rating
3319    #[serde(default)]
3320    pub bbb: f64,
3321    /// BB rating
3322    #[serde(default)]
3323    pub bb: f64,
3324    /// B rating
3325    #[serde(default)]
3326    pub b: f64,
3327    /// Below B rating
3328    #[serde(default)]
3329    pub below_b: f64,
3330    /// Plain-English: excellent credit (≈ AAA/AA tier)
3331    #[serde(default)]
3332    pub excellent: f64,
3333    /// Plain-English: good credit (≈ A tier)
3334    #[serde(default)]
3335    pub good: f64,
3336    /// Plain-English: fair credit (≈ BBB/BB tier)
3337    #[serde(default)]
3338    pub fair: f64,
3339    /// Plain-English: poor credit (≈ B/below tier)
3340    #[serde(default)]
3341    pub poor: f64,
3342}
3343
3344impl Default for CreditRatingDistribution {
3345    fn default() -> Self {
3346        Self {
3347            aaa: 0.05,
3348            aa: 0.10,
3349            a: 0.20,
3350            bbb: 0.30,
3351            bb: 0.20,
3352            b: 0.10,
3353            below_b: 0.05,
3354            excellent: 0.0,
3355            good: 0.0,
3356            fair: 0.0,
3357            poor: 0.0,
3358        }
3359    }
3360}
3361
3362/// Payment behavior distribution for customers.
3363///
3364/// All fields default to `0.0` if absent from the YAML.  Validator
3365/// enforces that populated weights sum to `1.0 ± 0.01`.
3366#[derive(Debug, Clone, Serialize, Deserialize)]
3367#[serde(deny_unknown_fields)]
3368pub struct PaymentBehaviorDistribution {
3369    /// Always pays early
3370    #[serde(default)]
3371    pub early_payer: f64,
3372    /// Pays on time
3373    #[serde(default)]
3374    pub on_time: f64,
3375    /// Occasionally late
3376    #[serde(default)]
3377    pub occasional_late: f64,
3378    /// Frequently late
3379    #[serde(default)]
3380    pub frequent_late: f64,
3381    /// Takes early payment discounts
3382    #[serde(default)]
3383    pub discount_taker: f64,
3384}
3385
3386impl Default for PaymentBehaviorDistribution {
3387    fn default() -> Self {
3388        Self {
3389            early_payer: 0.10,
3390            on_time: 0.50,
3391            occasional_late: 0.25,
3392            frequent_late: 0.10,
3393            discount_taker: 0.05,
3394        }
3395    }
3396}
3397
3398/// Material master data configuration.
3399#[derive(Debug, Clone, Serialize, Deserialize)]
3400pub struct MaterialMasterConfig {
3401    /// Number of materials to generate
3402    #[serde(default = "default_material_count")]
3403    pub count: usize,
3404    /// Material type distribution
3405    #[serde(default)]
3406    pub type_distribution: MaterialTypeDistribution,
3407    /// Valuation method distribution
3408    #[serde(default)]
3409    pub valuation_distribution: ValuationMethodDistribution,
3410    /// Percentage of materials with BOM (bill of materials)
3411    #[serde(default = "default_bom_percent")]
3412    pub bom_percent: f64,
3413    /// Maximum BOM depth
3414    #[serde(default = "default_max_bom_depth")]
3415    pub max_bom_depth: u8,
3416}
3417
3418fn default_material_count() -> usize {
3419    5000
3420}
3421
3422fn default_bom_percent() -> f64 {
3423    0.20
3424}
3425
3426fn default_max_bom_depth() -> u8 {
3427    3
3428}
3429
3430impl Default for MaterialMasterConfig {
3431    fn default() -> Self {
3432        Self {
3433            count: default_material_count(),
3434            type_distribution: MaterialTypeDistribution::default(),
3435            valuation_distribution: ValuationMethodDistribution::default(),
3436            bom_percent: default_bom_percent(),
3437            max_bom_depth: default_max_bom_depth(),
3438        }
3439    }
3440}
3441
3442/// Material type distribution.
3443#[derive(Debug, Clone, Serialize, Deserialize)]
3444pub struct MaterialTypeDistribution {
3445    /// Raw materials
3446    pub raw_material: f64,
3447    /// Semi-finished goods
3448    pub semi_finished: f64,
3449    /// Finished goods
3450    pub finished_good: f64,
3451    /// Trading goods (purchased for resale)
3452    pub trading_good: f64,
3453    /// Operating supplies
3454    pub operating_supply: f64,
3455    /// Services
3456    pub service: f64,
3457}
3458
3459impl Default for MaterialTypeDistribution {
3460    fn default() -> Self {
3461        Self {
3462            raw_material: 0.30,
3463            semi_finished: 0.15,
3464            finished_good: 0.25,
3465            trading_good: 0.15,
3466            operating_supply: 0.10,
3467            service: 0.05,
3468        }
3469    }
3470}
3471
3472/// Valuation method distribution for materials.
3473#[derive(Debug, Clone, Serialize, Deserialize)]
3474pub struct ValuationMethodDistribution {
3475    /// Standard cost
3476    pub standard_cost: f64,
3477    /// Moving average
3478    pub moving_average: f64,
3479    /// FIFO (First In, First Out)
3480    pub fifo: f64,
3481    /// LIFO (Last In, First Out)
3482    pub lifo: f64,
3483}
3484
3485impl Default for ValuationMethodDistribution {
3486    fn default() -> Self {
3487        Self {
3488            standard_cost: 0.50,
3489            moving_average: 0.30,
3490            fifo: 0.15,
3491            lifo: 0.05,
3492        }
3493    }
3494}
3495
3496/// Fixed asset master data configuration.
3497#[derive(Debug, Clone, Serialize, Deserialize)]
3498pub struct FixedAssetMasterConfig {
3499    /// Number of fixed assets to generate
3500    #[serde(default = "default_asset_count")]
3501    pub count: usize,
3502    /// Asset class distribution
3503    #[serde(default)]
3504    pub class_distribution: AssetClassDistribution,
3505    /// Depreciation method distribution
3506    #[serde(default)]
3507    pub depreciation_distribution: DepreciationMethodDistribution,
3508    /// Percentage of assets that are fully depreciated
3509    #[serde(default = "default_fully_depreciated_percent")]
3510    pub fully_depreciated_percent: f64,
3511    /// Generate acquisition history
3512    #[serde(default = "default_true")]
3513    pub generate_acquisition_history: bool,
3514}
3515
3516fn default_asset_count() -> usize {
3517    800
3518}
3519
3520fn default_fully_depreciated_percent() -> f64 {
3521    0.15
3522}
3523
3524impl Default for FixedAssetMasterConfig {
3525    fn default() -> Self {
3526        Self {
3527            count: default_asset_count(),
3528            class_distribution: AssetClassDistribution::default(),
3529            depreciation_distribution: DepreciationMethodDistribution::default(),
3530            fully_depreciated_percent: default_fully_depreciated_percent(),
3531            generate_acquisition_history: true,
3532        }
3533    }
3534}
3535
3536/// Asset class distribution.
3537#[derive(Debug, Clone, Serialize, Deserialize)]
3538pub struct AssetClassDistribution {
3539    /// Buildings and structures
3540    pub buildings: f64,
3541    /// Machinery and equipment
3542    pub machinery: f64,
3543    /// Vehicles
3544    pub vehicles: f64,
3545    /// IT equipment
3546    pub it_equipment: f64,
3547    /// Furniture and fixtures
3548    pub furniture: f64,
3549    /// Land (non-depreciable)
3550    pub land: f64,
3551    /// Leasehold improvements
3552    pub leasehold: f64,
3553}
3554
3555impl Default for AssetClassDistribution {
3556    fn default() -> Self {
3557        Self {
3558            buildings: 0.15,
3559            machinery: 0.30,
3560            vehicles: 0.15,
3561            it_equipment: 0.20,
3562            furniture: 0.10,
3563            land: 0.05,
3564            leasehold: 0.05,
3565        }
3566    }
3567}
3568
3569/// Depreciation method distribution.
3570#[derive(Debug, Clone, Serialize, Deserialize)]
3571pub struct DepreciationMethodDistribution {
3572    /// Straight line
3573    pub straight_line: f64,
3574    /// Declining balance
3575    pub declining_balance: f64,
3576    /// Double declining balance
3577    pub double_declining: f64,
3578    /// Sum of years' digits
3579    pub sum_of_years: f64,
3580    /// Units of production
3581    pub units_of_production: f64,
3582}
3583
3584impl Default for DepreciationMethodDistribution {
3585    fn default() -> Self {
3586        Self {
3587            straight_line: 0.60,
3588            declining_balance: 0.20,
3589            double_declining: 0.10,
3590            sum_of_years: 0.05,
3591            units_of_production: 0.05,
3592        }
3593    }
3594}
3595
3596/// Employee master data configuration.
3597#[derive(Debug, Clone, Serialize, Deserialize)]
3598pub struct EmployeeMasterConfig {
3599    /// Number of employees to generate
3600    #[serde(default = "default_employee_count")]
3601    pub count: usize,
3602    /// Generate organizational hierarchy
3603    #[serde(default = "default_true")]
3604    pub generate_hierarchy: bool,
3605    /// Maximum hierarchy depth
3606    #[serde(default = "default_hierarchy_depth")]
3607    pub max_hierarchy_depth: u8,
3608    /// Average span of control (direct reports per manager)
3609    #[serde(default = "default_span_of_control")]
3610    pub average_span_of_control: f64,
3611    /// Approval limit distribution by job level
3612    #[serde(default)]
3613    pub approval_limits: ApprovalLimitDistribution,
3614    /// Department distribution
3615    #[serde(default)]
3616    pub department_distribution: EmployeeDepartmentDistribution,
3617}
3618
3619fn default_employee_count() -> usize {
3620    1500
3621}
3622
3623fn default_hierarchy_depth() -> u8 {
3624    6
3625}
3626
3627fn default_span_of_control() -> f64 {
3628    5.0
3629}
3630
3631impl Default for EmployeeMasterConfig {
3632    fn default() -> Self {
3633        Self {
3634            count: default_employee_count(),
3635            generate_hierarchy: true,
3636            max_hierarchy_depth: default_hierarchy_depth(),
3637            average_span_of_control: default_span_of_control(),
3638            approval_limits: ApprovalLimitDistribution::default(),
3639            department_distribution: EmployeeDepartmentDistribution::default(),
3640        }
3641    }
3642}
3643
3644/// Approval limit distribution by job level.
3645#[derive(Debug, Clone, Serialize, Deserialize)]
3646pub struct ApprovalLimitDistribution {
3647    /// Staff level approval limit
3648    #[serde(default = "default_staff_limit")]
3649    pub staff: f64,
3650    /// Senior staff approval limit
3651    #[serde(default = "default_senior_limit")]
3652    pub senior: f64,
3653    /// Manager approval limit
3654    #[serde(default = "default_manager_limit")]
3655    pub manager: f64,
3656    /// Director approval limit
3657    #[serde(default = "default_director_limit")]
3658    pub director: f64,
3659    /// VP approval limit
3660    #[serde(default = "default_vp_limit")]
3661    pub vp: f64,
3662    /// Executive approval limit
3663    #[serde(default = "default_executive_limit")]
3664    pub executive: f64,
3665}
3666
3667fn default_staff_limit() -> f64 {
3668    1000.0
3669}
3670fn default_senior_limit() -> f64 {
3671    5000.0
3672}
3673fn default_manager_limit() -> f64 {
3674    25000.0
3675}
3676fn default_director_limit() -> f64 {
3677    100000.0
3678}
3679fn default_vp_limit() -> f64 {
3680    500000.0
3681}
3682fn default_executive_limit() -> f64 {
3683    f64::INFINITY
3684}
3685
3686impl Default for ApprovalLimitDistribution {
3687    fn default() -> Self {
3688        Self {
3689            staff: default_staff_limit(),
3690            senior: default_senior_limit(),
3691            manager: default_manager_limit(),
3692            director: default_director_limit(),
3693            vp: default_vp_limit(),
3694            executive: default_executive_limit(),
3695        }
3696    }
3697}
3698
3699/// Employee distribution across departments.
3700#[derive(Debug, Clone, Serialize, Deserialize)]
3701pub struct EmployeeDepartmentDistribution {
3702    /// Finance and Accounting
3703    pub finance: f64,
3704    /// Procurement
3705    pub procurement: f64,
3706    /// Sales
3707    pub sales: f64,
3708    /// Warehouse and Logistics
3709    pub warehouse: f64,
3710    /// IT
3711    pub it: f64,
3712    /// Human Resources
3713    pub hr: f64,
3714    /// Operations
3715    pub operations: f64,
3716    /// Executive
3717    pub executive: f64,
3718}
3719
3720impl Default for EmployeeDepartmentDistribution {
3721    fn default() -> Self {
3722        Self {
3723            finance: 0.12,
3724            procurement: 0.10,
3725            sales: 0.25,
3726            warehouse: 0.15,
3727            it: 0.10,
3728            hr: 0.05,
3729            operations: 0.20,
3730            executive: 0.03,
3731        }
3732    }
3733}
3734
3735/// Cost center master data configuration.
3736#[derive(Debug, Clone, Serialize, Deserialize)]
3737pub struct CostCenterMasterConfig {
3738    /// Number of cost centers to generate
3739    #[serde(default = "default_cost_center_count")]
3740    pub count: usize,
3741    /// Generate cost center hierarchy
3742    #[serde(default = "default_true")]
3743    pub generate_hierarchy: bool,
3744    /// Maximum hierarchy depth
3745    #[serde(default = "default_cc_hierarchy_depth")]
3746    pub max_hierarchy_depth: u8,
3747}
3748
3749fn default_cost_center_count() -> usize {
3750    50
3751}
3752
3753fn default_cc_hierarchy_depth() -> u8 {
3754    3
3755}
3756
3757impl Default for CostCenterMasterConfig {
3758    fn default() -> Self {
3759        Self {
3760            count: default_cost_center_count(),
3761            generate_hierarchy: true,
3762            max_hierarchy_depth: default_cc_hierarchy_depth(),
3763        }
3764    }
3765}
3766
3767// ============================================================================
3768// Document Flow Configuration
3769// ============================================================================
3770
3771/// Document flow generation configuration.
3772#[derive(Debug, Clone, Serialize, Deserialize)]
3773pub struct DocumentFlowConfig {
3774    /// P2P (Procure-to-Pay) flow configuration
3775    #[serde(default)]
3776    pub p2p: P2PFlowConfig,
3777    /// O2C (Order-to-Cash) flow configuration
3778    #[serde(default)]
3779    pub o2c: O2CFlowConfig,
3780    /// Generate document reference chains
3781    #[serde(default = "default_true")]
3782    pub generate_document_references: bool,
3783    /// Export document flow graph
3784    #[serde(default)]
3785    pub export_flow_graph: bool,
3786}
3787
3788impl Default for DocumentFlowConfig {
3789    fn default() -> Self {
3790        Self {
3791            p2p: P2PFlowConfig::default(),
3792            o2c: O2CFlowConfig::default(),
3793            generate_document_references: true,
3794            export_flow_graph: false,
3795        }
3796    }
3797}
3798
3799/// P2P (Procure-to-Pay) flow configuration.
3800#[derive(Debug, Clone, Serialize, Deserialize)]
3801pub struct P2PFlowConfig {
3802    /// Enable P2P document flow generation
3803    #[serde(default = "default_true")]
3804    pub enabled: bool,
3805    /// Three-way match success rate (PO-GR-Invoice)
3806    #[serde(default = "default_three_way_match_rate")]
3807    pub three_way_match_rate: f64,
3808    /// Rate of partial deliveries
3809    #[serde(default = "default_partial_delivery_rate")]
3810    pub partial_delivery_rate: f64,
3811    /// Rate of price variances between PO and Invoice
3812    #[serde(default = "default_price_variance_rate")]
3813    pub price_variance_rate: f64,
3814    /// Maximum price variance percentage
3815    #[serde(default = "default_max_price_variance")]
3816    pub max_price_variance_percent: f64,
3817    /// Rate of quantity variances between PO/GR and Invoice
3818    #[serde(default = "default_quantity_variance_rate")]
3819    pub quantity_variance_rate: f64,
3820    /// Average days from PO to goods receipt
3821    #[serde(default = "default_po_to_gr_days")]
3822    pub average_po_to_gr_days: u32,
3823    /// Average days from GR to invoice
3824    #[serde(default = "default_gr_to_invoice_days")]
3825    pub average_gr_to_invoice_days: u32,
3826    /// Average days from invoice to payment
3827    #[serde(default = "default_invoice_to_payment_days")]
3828    pub average_invoice_to_payment_days: u32,
3829    /// PO line count distribution
3830    #[serde(default)]
3831    pub line_count_distribution: DocumentLineCountDistribution,
3832    /// Payment behavior configuration
3833    #[serde(default)]
3834    pub payment_behavior: P2PPaymentBehaviorConfig,
3835    /// Rate of over-deliveries (quantity received exceeds PO quantity)
3836    #[serde(default)]
3837    pub over_delivery_rate: Option<f64>,
3838    /// Rate of early payment discounts being taken
3839    #[serde(default)]
3840    pub early_payment_discount_rate: Option<f64>,
3841}
3842
3843fn default_three_way_match_rate() -> f64 {
3844    0.95
3845}
3846
3847fn default_partial_delivery_rate() -> f64 {
3848    0.15
3849}
3850
3851fn default_price_variance_rate() -> f64 {
3852    0.08
3853}
3854
3855fn default_max_price_variance() -> f64 {
3856    0.05
3857}
3858
3859fn default_quantity_variance_rate() -> f64 {
3860    0.05
3861}
3862
3863fn default_po_to_gr_days() -> u32 {
3864    14
3865}
3866
3867fn default_gr_to_invoice_days() -> u32 {
3868    5
3869}
3870
3871fn default_invoice_to_payment_days() -> u32 {
3872    30
3873}
3874
3875impl Default for P2PFlowConfig {
3876    fn default() -> Self {
3877        Self {
3878            enabled: true,
3879            three_way_match_rate: default_three_way_match_rate(),
3880            partial_delivery_rate: default_partial_delivery_rate(),
3881            price_variance_rate: default_price_variance_rate(),
3882            max_price_variance_percent: default_max_price_variance(),
3883            quantity_variance_rate: default_quantity_variance_rate(),
3884            average_po_to_gr_days: default_po_to_gr_days(),
3885            average_gr_to_invoice_days: default_gr_to_invoice_days(),
3886            average_invoice_to_payment_days: default_invoice_to_payment_days(),
3887            line_count_distribution: DocumentLineCountDistribution::default(),
3888            payment_behavior: P2PPaymentBehaviorConfig::default(),
3889            over_delivery_rate: None,
3890            early_payment_discount_rate: None,
3891        }
3892    }
3893}
3894
3895// ============================================================================
3896// P2P Payment Behavior Configuration
3897// ============================================================================
3898
3899/// P2P payment behavior configuration.
3900#[derive(Debug, Clone, Serialize, Deserialize)]
3901pub struct P2PPaymentBehaviorConfig {
3902    /// Rate of late payments (beyond due date)
3903    #[serde(default = "default_p2p_late_payment_rate")]
3904    pub late_payment_rate: f64,
3905    /// Distribution of late payment days
3906    #[serde(default)]
3907    pub late_payment_days_distribution: LatePaymentDaysDistribution,
3908    /// Rate of partial payments
3909    #[serde(default = "default_p2p_partial_payment_rate")]
3910    pub partial_payment_rate: f64,
3911    /// Rate of payment corrections (NSF, chargebacks, reversals)
3912    #[serde(default = "default_p2p_payment_correction_rate")]
3913    pub payment_correction_rate: f64,
3914    /// Average days until partial payment remainder is paid
3915    #[serde(default = "default_p2p_avg_days_until_remainder")]
3916    pub avg_days_until_remainder: u32,
3917}
3918
3919fn default_p2p_late_payment_rate() -> f64 {
3920    0.15
3921}
3922
3923fn default_p2p_partial_payment_rate() -> f64 {
3924    0.05
3925}
3926
3927fn default_p2p_payment_correction_rate() -> f64 {
3928    0.02
3929}
3930
3931fn default_p2p_avg_days_until_remainder() -> u32 {
3932    30
3933}
3934
3935impl Default for P2PPaymentBehaviorConfig {
3936    fn default() -> Self {
3937        Self {
3938            late_payment_rate: default_p2p_late_payment_rate(),
3939            late_payment_days_distribution: LatePaymentDaysDistribution::default(),
3940            partial_payment_rate: default_p2p_partial_payment_rate(),
3941            payment_correction_rate: default_p2p_payment_correction_rate(),
3942            avg_days_until_remainder: default_p2p_avg_days_until_remainder(),
3943        }
3944    }
3945}
3946
3947/// Distribution of late payment days for P2P.
3948#[derive(Debug, Clone, Serialize, Deserialize)]
3949pub struct LatePaymentDaysDistribution {
3950    /// 1-7 days late (slightly late)
3951    #[serde(default = "default_slightly_late")]
3952    pub slightly_late_1_to_7: f64,
3953    /// 8-14 days late
3954    #[serde(default = "default_late_8_14")]
3955    pub late_8_to_14: f64,
3956    /// 15-30 days late (very late)
3957    #[serde(default = "default_very_late")]
3958    pub very_late_15_to_30: f64,
3959    /// 31-60 days late (severely late)
3960    #[serde(default = "default_severely_late")]
3961    pub severely_late_31_to_60: f64,
3962    /// Over 60 days late (extremely late)
3963    #[serde(default = "default_extremely_late")]
3964    pub extremely_late_over_60: f64,
3965}
3966
3967fn default_slightly_late() -> f64 {
3968    0.50
3969}
3970
3971fn default_late_8_14() -> f64 {
3972    0.25
3973}
3974
3975fn default_very_late() -> f64 {
3976    0.15
3977}
3978
3979fn default_severely_late() -> f64 {
3980    0.07
3981}
3982
3983fn default_extremely_late() -> f64 {
3984    0.03
3985}
3986
3987impl Default for LatePaymentDaysDistribution {
3988    fn default() -> Self {
3989        Self {
3990            slightly_late_1_to_7: default_slightly_late(),
3991            late_8_to_14: default_late_8_14(),
3992            very_late_15_to_30: default_very_late(),
3993            severely_late_31_to_60: default_severely_late(),
3994            extremely_late_over_60: default_extremely_late(),
3995        }
3996    }
3997}
3998
3999/// O2C (Order-to-Cash) flow configuration.
4000#[derive(Debug, Clone, Serialize, Deserialize)]
4001pub struct O2CFlowConfig {
4002    /// Enable O2C document flow generation
4003    #[serde(default = "default_true")]
4004    pub enabled: bool,
4005    /// Credit check failure rate
4006    #[serde(default = "default_credit_check_failure_rate")]
4007    pub credit_check_failure_rate: f64,
4008    /// Rate of partial shipments
4009    #[serde(default = "default_partial_shipment_rate")]
4010    pub partial_shipment_rate: f64,
4011    /// Rate of returns
4012    #[serde(default = "default_return_rate")]
4013    pub return_rate: f64,
4014    /// Bad debt write-off rate
4015    #[serde(default = "default_bad_debt_rate")]
4016    pub bad_debt_rate: f64,
4017    /// Average days from SO to delivery
4018    #[serde(default = "default_so_to_delivery_days")]
4019    pub average_so_to_delivery_days: u32,
4020    /// Average days from delivery to invoice
4021    #[serde(default = "default_delivery_to_invoice_days")]
4022    pub average_delivery_to_invoice_days: u32,
4023    /// Average days from invoice to receipt
4024    #[serde(default = "default_invoice_to_receipt_days")]
4025    pub average_invoice_to_receipt_days: u32,
4026    /// SO line count distribution
4027    #[serde(default)]
4028    pub line_count_distribution: DocumentLineCountDistribution,
4029    /// Cash discount configuration
4030    #[serde(default)]
4031    pub cash_discount: CashDiscountConfig,
4032    /// Payment behavior configuration
4033    #[serde(default)]
4034    pub payment_behavior: O2CPaymentBehaviorConfig,
4035    /// Rate of late payments
4036    #[serde(default)]
4037    pub late_payment_rate: Option<f64>,
4038}
4039
4040fn default_credit_check_failure_rate() -> f64 {
4041    0.02
4042}
4043
4044fn default_partial_shipment_rate() -> f64 {
4045    0.10
4046}
4047
4048fn default_return_rate() -> f64 {
4049    0.03
4050}
4051
4052fn default_bad_debt_rate() -> f64 {
4053    0.01
4054}
4055
4056fn default_so_to_delivery_days() -> u32 {
4057    7
4058}
4059
4060fn default_delivery_to_invoice_days() -> u32 {
4061    1
4062}
4063
4064fn default_invoice_to_receipt_days() -> u32 {
4065    45
4066}
4067
4068impl Default for O2CFlowConfig {
4069    fn default() -> Self {
4070        Self {
4071            enabled: true,
4072            credit_check_failure_rate: default_credit_check_failure_rate(),
4073            partial_shipment_rate: default_partial_shipment_rate(),
4074            return_rate: default_return_rate(),
4075            bad_debt_rate: default_bad_debt_rate(),
4076            average_so_to_delivery_days: default_so_to_delivery_days(),
4077            average_delivery_to_invoice_days: default_delivery_to_invoice_days(),
4078            average_invoice_to_receipt_days: default_invoice_to_receipt_days(),
4079            line_count_distribution: DocumentLineCountDistribution::default(),
4080            cash_discount: CashDiscountConfig::default(),
4081            payment_behavior: O2CPaymentBehaviorConfig::default(),
4082            late_payment_rate: None,
4083        }
4084    }
4085}
4086
4087// ============================================================================
4088// O2C Payment Behavior Configuration
4089// ============================================================================
4090
4091/// O2C payment behavior configuration.
4092#[derive(Debug, Clone, Serialize, Deserialize, Default)]
4093pub struct O2CPaymentBehaviorConfig {
4094    /// Dunning (Mahnung) configuration
4095    #[serde(default)]
4096    pub dunning: DunningConfig,
4097    /// Partial payment configuration
4098    #[serde(default)]
4099    pub partial_payments: PartialPaymentConfig,
4100    /// Short payment configuration (unauthorized deductions)
4101    #[serde(default)]
4102    pub short_payments: ShortPaymentConfig,
4103    /// On-account payment configuration (unapplied payments)
4104    #[serde(default)]
4105    pub on_account_payments: OnAccountPaymentConfig,
4106    /// Payment correction configuration (NSF, chargebacks)
4107    #[serde(default)]
4108    pub payment_corrections: PaymentCorrectionConfig,
4109}
4110
4111/// Dunning (Mahnungen) configuration for AR collections.
4112#[derive(Debug, Clone, Serialize, Deserialize)]
4113pub struct DunningConfig {
4114    /// Enable dunning process
4115    #[serde(default)]
4116    pub enabled: bool,
4117    /// Days overdue for level 1 dunning (1st reminder)
4118    #[serde(default = "default_dunning_level_1_days")]
4119    pub level_1_days_overdue: u32,
4120    /// Days overdue for level 2 dunning (2nd reminder)
4121    #[serde(default = "default_dunning_level_2_days")]
4122    pub level_2_days_overdue: u32,
4123    /// Days overdue for level 3 dunning (final notice)
4124    #[serde(default = "default_dunning_level_3_days")]
4125    pub level_3_days_overdue: u32,
4126    /// Days overdue for collection handover
4127    #[serde(default = "default_collection_days")]
4128    pub collection_days_overdue: u32,
4129    /// Payment rates after each dunning level
4130    #[serde(default)]
4131    pub payment_after_dunning_rates: DunningPaymentRates,
4132    /// Rate of invoices blocked from dunning (disputes)
4133    #[serde(default = "default_dunning_block_rate")]
4134    pub dunning_block_rate: f64,
4135    /// Interest rate per year for overdue amounts
4136    #[serde(default = "default_dunning_interest_rate")]
4137    pub interest_rate_per_year: f64,
4138    /// Fixed dunning charge per letter
4139    #[serde(default = "default_dunning_charge")]
4140    pub dunning_charge: f64,
4141}
4142
4143fn default_dunning_level_1_days() -> u32 {
4144    14
4145}
4146
4147fn default_dunning_level_2_days() -> u32 {
4148    28
4149}
4150
4151fn default_dunning_level_3_days() -> u32 {
4152    42
4153}
4154
4155fn default_collection_days() -> u32 {
4156    60
4157}
4158
4159fn default_dunning_block_rate() -> f64 {
4160    0.05
4161}
4162
4163fn default_dunning_interest_rate() -> f64 {
4164    0.09
4165}
4166
4167fn default_dunning_charge() -> f64 {
4168    25.0
4169}
4170
4171impl Default for DunningConfig {
4172    fn default() -> Self {
4173        Self {
4174            enabled: false,
4175            level_1_days_overdue: default_dunning_level_1_days(),
4176            level_2_days_overdue: default_dunning_level_2_days(),
4177            level_3_days_overdue: default_dunning_level_3_days(),
4178            collection_days_overdue: default_collection_days(),
4179            payment_after_dunning_rates: DunningPaymentRates::default(),
4180            dunning_block_rate: default_dunning_block_rate(),
4181            interest_rate_per_year: default_dunning_interest_rate(),
4182            dunning_charge: default_dunning_charge(),
4183        }
4184    }
4185}
4186
4187/// Payment rates after each dunning level.
4188#[derive(Debug, Clone, Serialize, Deserialize)]
4189pub struct DunningPaymentRates {
4190    /// Rate that pays after level 1 reminder
4191    #[serde(default = "default_after_level_1")]
4192    pub after_level_1: f64,
4193    /// Rate that pays after level 2 reminder
4194    #[serde(default = "default_after_level_2")]
4195    pub after_level_2: f64,
4196    /// Rate that pays after level 3 final notice
4197    #[serde(default = "default_after_level_3")]
4198    pub after_level_3: f64,
4199    /// Rate that pays during collection
4200    #[serde(default = "default_during_collection")]
4201    pub during_collection: f64,
4202    /// Rate that never pays (becomes bad debt)
4203    #[serde(default = "default_never_pay")]
4204    pub never_pay: f64,
4205}
4206
4207fn default_after_level_1() -> f64 {
4208    0.40
4209}
4210
4211fn default_after_level_2() -> f64 {
4212    0.30
4213}
4214
4215fn default_after_level_3() -> f64 {
4216    0.15
4217}
4218
4219fn default_during_collection() -> f64 {
4220    0.05
4221}
4222
4223fn default_never_pay() -> f64 {
4224    0.10
4225}
4226
4227impl Default for DunningPaymentRates {
4228    fn default() -> Self {
4229        Self {
4230            after_level_1: default_after_level_1(),
4231            after_level_2: default_after_level_2(),
4232            after_level_3: default_after_level_3(),
4233            during_collection: default_during_collection(),
4234            never_pay: default_never_pay(),
4235        }
4236    }
4237}
4238
4239/// Partial payment configuration.
4240#[derive(Debug, Clone, Serialize, Deserialize)]
4241pub struct PartialPaymentConfig {
4242    /// Rate of invoices paid partially
4243    #[serde(default = "default_partial_payment_rate")]
4244    pub rate: f64,
4245    /// Distribution of partial payment percentages
4246    #[serde(default)]
4247    pub percentage_distribution: PartialPaymentPercentageDistribution,
4248    /// Average days until remainder is paid
4249    #[serde(default = "default_avg_days_until_remainder")]
4250    pub avg_days_until_remainder: u32,
4251}
4252
4253fn default_partial_payment_rate() -> f64 {
4254    0.08
4255}
4256
4257fn default_avg_days_until_remainder() -> u32 {
4258    30
4259}
4260
4261impl Default for PartialPaymentConfig {
4262    fn default() -> Self {
4263        Self {
4264            rate: default_partial_payment_rate(),
4265            percentage_distribution: PartialPaymentPercentageDistribution::default(),
4266            avg_days_until_remainder: default_avg_days_until_remainder(),
4267        }
4268    }
4269}
4270
4271/// Distribution of partial payment percentages.
4272#[derive(Debug, Clone, Serialize, Deserialize)]
4273pub struct PartialPaymentPercentageDistribution {
4274    /// Pay 25% of invoice
4275    #[serde(default = "default_partial_25")]
4276    pub pay_25_percent: f64,
4277    /// Pay 50% of invoice
4278    #[serde(default = "default_partial_50")]
4279    pub pay_50_percent: f64,
4280    /// Pay 75% of invoice
4281    #[serde(default = "default_partial_75")]
4282    pub pay_75_percent: f64,
4283    /// Pay random percentage
4284    #[serde(default = "default_partial_random")]
4285    pub pay_random_percent: f64,
4286}
4287
4288fn default_partial_25() -> f64 {
4289    0.15
4290}
4291
4292fn default_partial_50() -> f64 {
4293    0.50
4294}
4295
4296fn default_partial_75() -> f64 {
4297    0.25
4298}
4299
4300fn default_partial_random() -> f64 {
4301    0.10
4302}
4303
4304impl Default for PartialPaymentPercentageDistribution {
4305    fn default() -> Self {
4306        Self {
4307            pay_25_percent: default_partial_25(),
4308            pay_50_percent: default_partial_50(),
4309            pay_75_percent: default_partial_75(),
4310            pay_random_percent: default_partial_random(),
4311        }
4312    }
4313}
4314
4315/// Short payment configuration (unauthorized deductions).
4316#[derive(Debug, Clone, Serialize, Deserialize)]
4317pub struct ShortPaymentConfig {
4318    /// Rate of payments that are short
4319    #[serde(default = "default_short_payment_rate")]
4320    pub rate: f64,
4321    /// Distribution of short payment reasons
4322    #[serde(default)]
4323    pub reason_distribution: ShortPaymentReasonDistribution,
4324    /// Maximum percentage that can be short
4325    #[serde(default = "default_max_short_percent")]
4326    pub max_short_percent: f64,
4327}
4328
4329fn default_short_payment_rate() -> f64 {
4330    0.03
4331}
4332
4333fn default_max_short_percent() -> f64 {
4334    0.10
4335}
4336
4337impl Default for ShortPaymentConfig {
4338    fn default() -> Self {
4339        Self {
4340            rate: default_short_payment_rate(),
4341            reason_distribution: ShortPaymentReasonDistribution::default(),
4342            max_short_percent: default_max_short_percent(),
4343        }
4344    }
4345}
4346
4347/// Distribution of short payment reasons.
4348#[derive(Debug, Clone, Serialize, Deserialize)]
4349pub struct ShortPaymentReasonDistribution {
4350    /// Pricing dispute
4351    #[serde(default = "default_pricing_dispute")]
4352    pub pricing_dispute: f64,
4353    /// Quality issue
4354    #[serde(default = "default_quality_issue")]
4355    pub quality_issue: f64,
4356    /// Quantity discrepancy
4357    #[serde(default = "default_quantity_discrepancy")]
4358    pub quantity_discrepancy: f64,
4359    /// Unauthorized deduction
4360    #[serde(default = "default_unauthorized_deduction")]
4361    pub unauthorized_deduction: f64,
4362    /// Early payment discount taken incorrectly
4363    #[serde(default = "default_incorrect_discount")]
4364    pub incorrect_discount: f64,
4365}
4366
4367fn default_pricing_dispute() -> f64 {
4368    0.30
4369}
4370
4371fn default_quality_issue() -> f64 {
4372    0.20
4373}
4374
4375fn default_quantity_discrepancy() -> f64 {
4376    0.20
4377}
4378
4379fn default_unauthorized_deduction() -> f64 {
4380    0.15
4381}
4382
4383fn default_incorrect_discount() -> f64 {
4384    0.15
4385}
4386
4387impl Default for ShortPaymentReasonDistribution {
4388    fn default() -> Self {
4389        Self {
4390            pricing_dispute: default_pricing_dispute(),
4391            quality_issue: default_quality_issue(),
4392            quantity_discrepancy: default_quantity_discrepancy(),
4393            unauthorized_deduction: default_unauthorized_deduction(),
4394            incorrect_discount: default_incorrect_discount(),
4395        }
4396    }
4397}
4398
4399/// On-account payment configuration (unapplied payments).
4400#[derive(Debug, Clone, Serialize, Deserialize)]
4401pub struct OnAccountPaymentConfig {
4402    /// Rate of payments that are on-account (unapplied)
4403    #[serde(default = "default_on_account_rate")]
4404    pub rate: f64,
4405    /// Average days until on-account payments are applied
4406    #[serde(default = "default_avg_days_until_applied")]
4407    pub avg_days_until_applied: u32,
4408}
4409
4410fn default_on_account_rate() -> f64 {
4411    0.02
4412}
4413
4414fn default_avg_days_until_applied() -> u32 {
4415    14
4416}
4417
4418impl Default for OnAccountPaymentConfig {
4419    fn default() -> Self {
4420        Self {
4421            rate: default_on_account_rate(),
4422            avg_days_until_applied: default_avg_days_until_applied(),
4423        }
4424    }
4425}
4426
4427/// Payment correction configuration.
4428#[derive(Debug, Clone, Serialize, Deserialize)]
4429pub struct PaymentCorrectionConfig {
4430    /// Rate of payments requiring correction
4431    #[serde(default = "default_payment_correction_rate")]
4432    pub rate: f64,
4433    /// Distribution of correction types
4434    #[serde(default)]
4435    pub type_distribution: PaymentCorrectionTypeDistribution,
4436}
4437
4438fn default_payment_correction_rate() -> f64 {
4439    0.02
4440}
4441
4442impl Default for PaymentCorrectionConfig {
4443    fn default() -> Self {
4444        Self {
4445            rate: default_payment_correction_rate(),
4446            type_distribution: PaymentCorrectionTypeDistribution::default(),
4447        }
4448    }
4449}
4450
4451/// Distribution of payment correction types.
4452#[derive(Debug, Clone, Serialize, Deserialize)]
4453pub struct PaymentCorrectionTypeDistribution {
4454    /// NSF (Non-sufficient funds) / bounced check
4455    #[serde(default = "default_nsf_rate")]
4456    pub nsf: f64,
4457    /// Chargeback
4458    #[serde(default = "default_chargeback_rate")]
4459    pub chargeback: f64,
4460    /// Wrong amount applied
4461    #[serde(default = "default_wrong_amount_rate")]
4462    pub wrong_amount: f64,
4463    /// Wrong customer applied
4464    #[serde(default = "default_wrong_customer_rate")]
4465    pub wrong_customer: f64,
4466    /// Duplicate payment
4467    #[serde(default = "default_duplicate_payment_rate")]
4468    pub duplicate_payment: f64,
4469}
4470
4471fn default_nsf_rate() -> f64 {
4472    0.30
4473}
4474
4475fn default_chargeback_rate() -> f64 {
4476    0.20
4477}
4478
4479fn default_wrong_amount_rate() -> f64 {
4480    0.20
4481}
4482
4483fn default_wrong_customer_rate() -> f64 {
4484    0.15
4485}
4486
4487fn default_duplicate_payment_rate() -> f64 {
4488    0.15
4489}
4490
4491impl Default for PaymentCorrectionTypeDistribution {
4492    fn default() -> Self {
4493        Self {
4494            nsf: default_nsf_rate(),
4495            chargeback: default_chargeback_rate(),
4496            wrong_amount: default_wrong_amount_rate(),
4497            wrong_customer: default_wrong_customer_rate(),
4498            duplicate_payment: default_duplicate_payment_rate(),
4499        }
4500    }
4501}
4502
4503/// Document line count distribution.
4504#[derive(Debug, Clone, Serialize, Deserialize)]
4505pub struct DocumentLineCountDistribution {
4506    /// Minimum number of lines
4507    #[serde(default = "default_min_lines")]
4508    pub min_lines: u32,
4509    /// Maximum number of lines
4510    #[serde(default = "default_max_lines")]
4511    pub max_lines: u32,
4512    /// Most common line count (mode)
4513    #[serde(default = "default_mode_lines")]
4514    pub mode_lines: u32,
4515}
4516
4517fn default_min_lines() -> u32 {
4518    1
4519}
4520
4521fn default_max_lines() -> u32 {
4522    20
4523}
4524
4525fn default_mode_lines() -> u32 {
4526    3
4527}
4528
4529impl Default for DocumentLineCountDistribution {
4530    fn default() -> Self {
4531        Self {
4532            min_lines: default_min_lines(),
4533            max_lines: default_max_lines(),
4534            mode_lines: default_mode_lines(),
4535        }
4536    }
4537}
4538
4539/// Cash discount configuration.
4540#[derive(Debug, Clone, Serialize, Deserialize)]
4541pub struct CashDiscountConfig {
4542    /// Percentage of invoices eligible for cash discount
4543    #[serde(default = "default_discount_eligible_rate")]
4544    pub eligible_rate: f64,
4545    /// Rate at which customers take the discount
4546    #[serde(default = "default_discount_taken_rate")]
4547    pub taken_rate: f64,
4548    /// Standard discount percentage
4549    #[serde(default = "default_discount_percent")]
4550    pub discount_percent: f64,
4551    /// Days within which discount must be taken
4552    #[serde(default = "default_discount_days")]
4553    pub discount_days: u32,
4554}
4555
4556fn default_discount_eligible_rate() -> f64 {
4557    0.30
4558}
4559
4560fn default_discount_taken_rate() -> f64 {
4561    0.60
4562}
4563
4564fn default_discount_percent() -> f64 {
4565    0.02
4566}
4567
4568fn default_discount_days() -> u32 {
4569    10
4570}
4571
4572impl Default for CashDiscountConfig {
4573    fn default() -> Self {
4574        Self {
4575            eligible_rate: default_discount_eligible_rate(),
4576            taken_rate: default_discount_taken_rate(),
4577            discount_percent: default_discount_percent(),
4578            discount_days: default_discount_days(),
4579        }
4580    }
4581}
4582
4583// ============================================================================
4584// Intercompany Configuration
4585// ============================================================================
4586
4587/// Intercompany transaction configuration.
4588#[derive(Debug, Clone, Serialize, Deserialize)]
4589pub struct IntercompanyConfig {
4590    /// Enable intercompany transaction generation
4591    #[serde(default)]
4592    pub enabled: bool,
4593    /// Rate of transactions that are intercompany
4594    #[serde(default = "default_ic_transaction_rate")]
4595    pub ic_transaction_rate: f64,
4596    /// Transfer pricing method
4597    #[serde(default)]
4598    pub transfer_pricing_method: TransferPricingMethod,
4599    /// Transfer pricing markup percentage (for cost-plus)
4600    #[serde(default = "default_markup_percent")]
4601    pub markup_percent: f64,
4602    /// Generate matched IC pairs (offsetting entries)
4603    #[serde(default = "default_true")]
4604    pub generate_matched_pairs: bool,
4605    /// IC transaction type distribution
4606    #[serde(default)]
4607    pub transaction_type_distribution: ICTransactionTypeDistribution,
4608    /// Generate elimination entries for consolidation
4609    #[serde(default)]
4610    pub generate_eliminations: bool,
4611}
4612
4613fn default_ic_transaction_rate() -> f64 {
4614    0.15
4615}
4616
4617fn default_markup_percent() -> f64 {
4618    0.05
4619}
4620
4621impl Default for IntercompanyConfig {
4622    fn default() -> Self {
4623        Self {
4624            enabled: false,
4625            ic_transaction_rate: default_ic_transaction_rate(),
4626            transfer_pricing_method: TransferPricingMethod::default(),
4627            markup_percent: default_markup_percent(),
4628            generate_matched_pairs: true,
4629            transaction_type_distribution: ICTransactionTypeDistribution::default(),
4630            generate_eliminations: false,
4631        }
4632    }
4633}
4634
4635/// Transfer pricing method.
4636#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
4637#[serde(rename_all = "snake_case")]
4638pub enum TransferPricingMethod {
4639    /// Cost plus a markup
4640    #[default]
4641    CostPlus,
4642    /// Comparable uncontrolled price
4643    ComparableUncontrolled,
4644    /// Resale price method
4645    ResalePrice,
4646    /// Transactional net margin method
4647    TransactionalNetMargin,
4648    /// Profit split method
4649    ProfitSplit,
4650}
4651
4652/// IC transaction type distribution.
4653#[derive(Debug, Clone, Serialize, Deserialize)]
4654pub struct ICTransactionTypeDistribution {
4655    /// Goods sales between entities
4656    pub goods_sale: f64,
4657    /// Services provided
4658    pub service_provided: f64,
4659    /// Intercompany loans
4660    pub loan: f64,
4661    /// Dividends
4662    pub dividend: f64,
4663    /// Management fees
4664    pub management_fee: f64,
4665    /// Royalties
4666    pub royalty: f64,
4667    /// Cost sharing
4668    pub cost_sharing: f64,
4669}
4670
4671impl Default for ICTransactionTypeDistribution {
4672    fn default() -> Self {
4673        Self {
4674            goods_sale: 0.35,
4675            service_provided: 0.20,
4676            loan: 0.10,
4677            dividend: 0.05,
4678            management_fee: 0.15,
4679            royalty: 0.10,
4680            cost_sharing: 0.05,
4681        }
4682    }
4683}
4684
4685// ============================================================================
4686// Balance Configuration
4687// ============================================================================
4688
4689/// Balance and trial balance configuration.
4690#[derive(Debug, Clone, Serialize, Deserialize)]
4691pub struct BalanceConfig {
4692    /// Generate opening balances
4693    #[serde(default)]
4694    pub generate_opening_balances: bool,
4695    /// Generate trial balances
4696    #[serde(default = "default_true")]
4697    pub generate_trial_balances: bool,
4698    /// Target gross margin (for revenue/COGS coherence)
4699    #[serde(default = "default_gross_margin")]
4700    pub target_gross_margin: f64,
4701    /// Target DSO (Days Sales Outstanding)
4702    #[serde(default = "default_dso")]
4703    pub target_dso_days: u32,
4704    /// Target DPO (Days Payable Outstanding)
4705    #[serde(default = "default_dpo")]
4706    pub target_dpo_days: u32,
4707    /// Target current ratio
4708    #[serde(default = "default_current_ratio")]
4709    pub target_current_ratio: f64,
4710    /// Target debt-to-equity ratio
4711    #[serde(default = "default_debt_equity")]
4712    pub target_debt_to_equity: f64,
4713    /// Validate balance sheet equation (A = L + E)
4714    #[serde(default = "default_true")]
4715    pub validate_balance_equation: bool,
4716    /// Reconcile subledgers to GL control accounts
4717    #[serde(default = "default_true")]
4718    pub reconcile_subledgers: bool,
4719}
4720
4721fn default_gross_margin() -> f64 {
4722    0.35
4723}
4724
4725fn default_dso() -> u32 {
4726    45
4727}
4728
4729fn default_dpo() -> u32 {
4730    30
4731}
4732
4733fn default_current_ratio() -> f64 {
4734    1.5
4735}
4736
4737fn default_debt_equity() -> f64 {
4738    0.5
4739}
4740
4741impl Default for BalanceConfig {
4742    fn default() -> Self {
4743        Self {
4744            generate_opening_balances: false,
4745            generate_trial_balances: true,
4746            target_gross_margin: default_gross_margin(),
4747            target_dso_days: default_dso(),
4748            target_dpo_days: default_dpo(),
4749            target_current_ratio: default_current_ratio(),
4750            target_debt_to_equity: default_debt_equity(),
4751            validate_balance_equation: true,
4752            reconcile_subledgers: true,
4753        }
4754    }
4755}
4756
4757// ==========================================================================
4758// OCPM (Object-Centric Process Mining) Configuration
4759// ==========================================================================
4760
4761/// OCPM (Object-Centric Process Mining) configuration.
4762///
4763/// Controls generation of OCEL 2.0 compatible event logs with
4764/// many-to-many event-to-object relationships.
4765#[derive(Debug, Clone, Serialize, Deserialize)]
4766pub struct OcpmConfig {
4767    /// Enable OCPM event log generation
4768    #[serde(default)]
4769    pub enabled: bool,
4770
4771    /// Generate lifecycle events (Start/Complete pairs vs atomic events)
4772    #[serde(default = "default_true")]
4773    pub generate_lifecycle_events: bool,
4774
4775    /// Include object-to-object relationships in output
4776    #[serde(default = "default_true")]
4777    pub include_object_relationships: bool,
4778
4779    /// Compute and export process variants
4780    #[serde(default = "default_true")]
4781    pub compute_variants: bool,
4782
4783    /// Maximum variants to track (0 = unlimited)
4784    #[serde(default)]
4785    pub max_variants: usize,
4786
4787    /// P2P process configuration
4788    #[serde(default)]
4789    pub p2p_process: OcpmProcessConfig,
4790
4791    /// O2C process configuration
4792    #[serde(default)]
4793    pub o2c_process: OcpmProcessConfig,
4794
4795    /// Output format configuration
4796    #[serde(default)]
4797    pub output: OcpmOutputConfig,
4798}
4799
4800impl Default for OcpmConfig {
4801    fn default() -> Self {
4802        Self {
4803            enabled: false,
4804            generate_lifecycle_events: true,
4805            include_object_relationships: true,
4806            compute_variants: true,
4807            max_variants: 0,
4808            p2p_process: OcpmProcessConfig::default(),
4809            o2c_process: OcpmProcessConfig::default(),
4810            output: OcpmOutputConfig::default(),
4811        }
4812    }
4813}
4814
4815/// Process-specific OCPM configuration.
4816#[derive(Debug, Clone, Serialize, Deserialize)]
4817pub struct OcpmProcessConfig {
4818    /// Rework probability (0.0-1.0)
4819    #[serde(default = "default_rework_probability")]
4820    pub rework_probability: f64,
4821
4822    /// Skip step probability (0.0-1.0)
4823    #[serde(default = "default_skip_probability")]
4824    pub skip_step_probability: f64,
4825
4826    /// Out-of-order step probability (0.0-1.0)
4827    #[serde(default = "default_out_of_order_probability")]
4828    pub out_of_order_probability: f64,
4829}
4830
4831// Defaults deliberately produce variant counts and Inductive-Miner fitness
4832// in the range seen in real ERP data (dozens of variants, ~0.7–0.9 fitness).
4833// Lowering them all to 0 yields a single-variant happy-path log.
4834fn default_rework_probability() -> f64 {
4835    0.15
4836}
4837
4838fn default_skip_probability() -> f64 {
4839    0.10
4840}
4841
4842fn default_out_of_order_probability() -> f64 {
4843    0.08
4844}
4845
4846impl Default for OcpmProcessConfig {
4847    fn default() -> Self {
4848        Self {
4849            rework_probability: default_rework_probability(),
4850            skip_step_probability: default_skip_probability(),
4851            out_of_order_probability: default_out_of_order_probability(),
4852        }
4853    }
4854}
4855
4856/// OCPM output format configuration.
4857#[derive(Debug, Clone, Serialize, Deserialize)]
4858pub struct OcpmOutputConfig {
4859    /// Export OCEL 2.0 JSON format
4860    #[serde(default = "default_true")]
4861    pub ocel_json: bool,
4862
4863    /// Export OCEL 2.0 XML format
4864    #[serde(default)]
4865    pub ocel_xml: bool,
4866
4867    /// Export XES 2.0 XML format (IEEE standard for process mining tools)
4868    #[serde(default)]
4869    pub xes: bool,
4870
4871    /// Include lifecycle transitions in XES output (start/complete pairs)
4872    #[serde(default = "default_true")]
4873    pub xes_include_lifecycle: bool,
4874
4875    /// Include resource attributes in XES output
4876    #[serde(default = "default_true")]
4877    pub xes_include_resources: bool,
4878
4879    /// Export flattened CSV for each object type
4880    #[serde(default = "default_true")]
4881    pub flattened_csv: bool,
4882
4883    /// Export event-object relationship table
4884    #[serde(default = "default_true")]
4885    pub event_object_csv: bool,
4886
4887    /// Export object-object relationship table
4888    #[serde(default = "default_true")]
4889    pub object_relationship_csv: bool,
4890
4891    /// Export process variants summary
4892    #[serde(default = "default_true")]
4893    pub variants_csv: bool,
4894
4895    /// Export reference process models (canonical P2P, O2C, R2R)
4896    #[serde(default)]
4897    pub export_reference_models: bool,
4898}
4899
4900impl Default for OcpmOutputConfig {
4901    fn default() -> Self {
4902        Self {
4903            ocel_json: true,
4904            ocel_xml: false,
4905            xes: false,
4906            xes_include_lifecycle: true,
4907            xes_include_resources: true,
4908            flattened_csv: true,
4909            event_object_csv: true,
4910            object_relationship_csv: true,
4911            variants_csv: true,
4912            export_reference_models: false,
4913        }
4914    }
4915}
4916
4917/// Audit engagement and workpaper generation configuration.
4918#[derive(Debug, Clone, Serialize, Deserialize)]
4919pub struct AuditGenerationConfig {
4920    /// Enable audit engagement generation
4921    #[serde(default)]
4922    pub enabled: bool,
4923
4924    /// Gate for workpaper generation (v3.3.2+).
4925    /// When `false`, workpapers and dependent evidence are skipped
4926    /// while engagements / risk assessments / findings still generate.
4927    #[serde(default = "default_true")]
4928    pub generate_workpapers: bool,
4929
4930    /// Engagement type distribution (v3.3.2+). Drives per-engagement
4931    /// type draw via `AuditEngagementGenerator::draw_engagement_type`.
4932    #[serde(default)]
4933    pub engagement_types: AuditEngagementTypesConfig,
4934
4935    /// Workpaper configuration (v3.3.2+). `average_per_phase` maps onto
4936    /// `WorkpaperGenerator.workpapers_per_section` as a ±50% band
4937    /// around the average. Sampling / ISA / cross-reference flags are
4938    /// surfaced for downstream formatting overlays.
4939    #[serde(default)]
4940    pub workpapers: WorkpaperConfig,
4941
4942    /// Audit team configuration (v3.3.2+). `min_team_size` /
4943    /// `max_team_size` map directly onto
4944    /// `AuditEngagementGenerator.team_size_range`.
4945    /// `specialist_probability` is reserved for v3.4 (explicit
4946    /// specialist-role support).
4947    #[serde(default)]
4948    pub team: AuditTeamConfig,
4949
4950    /// Review workflow configuration (v3.3.2+).
4951    /// `average_review_delay_days` drives both
4952    /// `first_review_delay_range` and `second_review_delay_range` as
4953    /// a ±1-day band around the average. `rework_probability` and
4954    /// `require_partner_signoff` are reserved for v3.4 workflow
4955    /// modeling.
4956    #[serde(default)]
4957    pub review: ReviewWorkflowConfig,
4958
4959    /// FSM-driven audit generation configuration.
4960    #[serde(default)]
4961    pub fsm: Option<AuditFsmConfig>,
4962
4963    /// v3.3.0: IT general controls (access logs, change management
4964    /// records) emitted alongside audit engagements. Requires both
4965    /// `audit.enabled = true` and `audit.it_controls.enabled = true`
4966    /// to take effect — the latter defaults to `false` so current
4967    /// archives are byte-identical to v3.2.1.
4968    #[serde(default)]
4969    pub it_controls: ItControlsConfig,
4970}
4971
4972/// IT general controls config (v3.3.0+).
4973#[derive(Debug, Clone, Serialize, Deserialize)]
4974pub struct ItControlsConfig {
4975    /// Master switch — when `false`, no access logs or change records
4976    /// are generated.
4977    #[serde(default)]
4978    pub enabled: bool,
4979    /// Number of access-log entries per engagement (approximate — the
4980    /// generator may round or scale based on company size).
4981    #[serde(default = "default_access_log_count")]
4982    pub access_logs_per_engagement: usize,
4983    /// Number of change-management records per engagement.
4984    #[serde(default = "default_change_record_count")]
4985    pub change_records_per_engagement: usize,
4986}
4987
4988fn default_access_log_count() -> usize {
4989    500
4990}
4991fn default_change_record_count() -> usize {
4992    50
4993}
4994
4995impl Default for ItControlsConfig {
4996    fn default() -> Self {
4997        Self {
4998            enabled: false,
4999            access_logs_per_engagement: default_access_log_count(),
5000            change_records_per_engagement: default_change_record_count(),
5001        }
5002    }
5003}
5004
5005impl Default for AuditGenerationConfig {
5006    fn default() -> Self {
5007        Self {
5008            enabled: false,
5009            generate_workpapers: true,
5010            engagement_types: AuditEngagementTypesConfig::default(),
5011            workpapers: WorkpaperConfig::default(),
5012            team: AuditTeamConfig::default(),
5013            review: ReviewWorkflowConfig::default(),
5014            fsm: None,
5015            it_controls: ItControlsConfig::default(),
5016        }
5017    }
5018}
5019
5020/// FSM-driven audit generation configuration.
5021#[derive(Debug, Clone, Serialize, Deserialize)]
5022pub struct AuditFsmConfig {
5023    /// Enable FSM-driven audit generation.
5024    #[serde(default)]
5025    pub enabled: bool,
5026
5027    /// Blueprint source: "builtin:fsa", "builtin:ia", or a file path.
5028    #[serde(default = "default_audit_fsm_blueprint")]
5029    pub blueprint: String,
5030
5031    /// Overlay source: "builtin:default", "builtin:thorough", "builtin:rushed", or a file path.
5032    #[serde(default = "default_audit_fsm_overlay")]
5033    pub overlay: String,
5034
5035    /// Depth level override.
5036    #[serde(default)]
5037    pub depth: Option<String>,
5038
5039    /// Discriminator filter.
5040    #[serde(default)]
5041    pub discriminators: std::collections::HashMap<String, Vec<String>>,
5042
5043    /// Event trail output config.
5044    #[serde(default)]
5045    pub event_trail: AuditEventTrailConfig,
5046
5047    /// RNG seed override.
5048    #[serde(default)]
5049    pub seed: Option<u64>,
5050}
5051
5052impl Default for AuditFsmConfig {
5053    fn default() -> Self {
5054        Self {
5055            enabled: false,
5056            blueprint: default_audit_fsm_blueprint(),
5057            overlay: default_audit_fsm_overlay(),
5058            depth: None,
5059            discriminators: std::collections::HashMap::new(),
5060            event_trail: AuditEventTrailConfig::default(),
5061            seed: None,
5062        }
5063    }
5064}
5065
5066fn default_audit_fsm_blueprint() -> String {
5067    "builtin:fsa".to_string()
5068}
5069
5070fn default_audit_fsm_overlay() -> String {
5071    "builtin:default".to_string()
5072}
5073
5074/// Event trail output configuration for FSM-driven audit generation.
5075#[derive(Debug, Clone, Serialize, Deserialize)]
5076pub struct AuditEventTrailConfig {
5077    /// Emit a flat event log.
5078    #[serde(default = "default_true")]
5079    pub flat_log: bool,
5080    /// Project events to OCEL 2.0 format.
5081    #[serde(default)]
5082    pub ocel_projection: bool,
5083}
5084
5085impl Default for AuditEventTrailConfig {
5086    fn default() -> Self {
5087        Self {
5088            flat_log: true,
5089            ocel_projection: false,
5090        }
5091    }
5092}
5093
5094/// Engagement type distribution configuration.
5095#[derive(Debug, Clone, Serialize, Deserialize)]
5096pub struct AuditEngagementTypesConfig {
5097    /// Financial statement audit probability
5098    #[serde(default = "default_financial_audit_prob")]
5099    pub financial_statement: f64,
5100    /// SOX/ICFR audit probability
5101    #[serde(default = "default_sox_audit_prob")]
5102    pub sox_icfr: f64,
5103    /// Integrated audit probability
5104    #[serde(default = "default_integrated_audit_prob")]
5105    pub integrated: f64,
5106    /// Review engagement probability
5107    #[serde(default = "default_review_prob")]
5108    pub review: f64,
5109    /// Agreed-upon procedures probability
5110    #[serde(default = "default_aup_prob")]
5111    pub agreed_upon_procedures: f64,
5112}
5113
5114fn default_financial_audit_prob() -> f64 {
5115    0.40
5116}
5117fn default_sox_audit_prob() -> f64 {
5118    0.20
5119}
5120fn default_integrated_audit_prob() -> f64 {
5121    0.25
5122}
5123fn default_review_prob() -> f64 {
5124    0.10
5125}
5126fn default_aup_prob() -> f64 {
5127    0.05
5128}
5129
5130impl Default for AuditEngagementTypesConfig {
5131    fn default() -> Self {
5132        Self {
5133            financial_statement: default_financial_audit_prob(),
5134            sox_icfr: default_sox_audit_prob(),
5135            integrated: default_integrated_audit_prob(),
5136            review: default_review_prob(),
5137            agreed_upon_procedures: default_aup_prob(),
5138        }
5139    }
5140}
5141
5142/// Workpaper generation configuration.
5143#[derive(Debug, Clone, Serialize, Deserialize)]
5144pub struct WorkpaperConfig {
5145    /// Average workpapers per engagement phase
5146    #[serde(default = "default_workpapers_per_phase")]
5147    pub average_per_phase: usize,
5148
5149    /// Include ISA compliance references
5150    #[serde(default = "default_true")]
5151    pub include_isa_references: bool,
5152
5153    /// Generate sample details
5154    #[serde(default = "default_true")]
5155    pub include_sample_details: bool,
5156
5157    /// Include cross-references between workpapers
5158    #[serde(default = "default_true")]
5159    pub include_cross_references: bool,
5160
5161    /// Sampling configuration
5162    #[serde(default)]
5163    pub sampling: SamplingConfig,
5164}
5165
5166fn default_workpapers_per_phase() -> usize {
5167    5
5168}
5169
5170impl Default for WorkpaperConfig {
5171    fn default() -> Self {
5172        Self {
5173            average_per_phase: default_workpapers_per_phase(),
5174            include_isa_references: true,
5175            include_sample_details: true,
5176            include_cross_references: true,
5177            sampling: SamplingConfig::default(),
5178        }
5179    }
5180}
5181
5182/// Sampling method configuration.
5183#[derive(Debug, Clone, Serialize, Deserialize)]
5184pub struct SamplingConfig {
5185    /// Statistical sampling rate (0.0-1.0)
5186    #[serde(default = "default_statistical_rate")]
5187    pub statistical_rate: f64,
5188    /// Judgmental sampling rate (0.0-1.0)
5189    #[serde(default = "default_judgmental_rate")]
5190    pub judgmental_rate: f64,
5191    /// Haphazard sampling rate (0.0-1.0)
5192    #[serde(default = "default_haphazard_rate")]
5193    pub haphazard_rate: f64,
5194    /// 100% examination rate (0.0-1.0)
5195    #[serde(default = "default_complete_examination_rate")]
5196    pub complete_examination_rate: f64,
5197}
5198
5199fn default_statistical_rate() -> f64 {
5200    0.40
5201}
5202fn default_judgmental_rate() -> f64 {
5203    0.30
5204}
5205fn default_haphazard_rate() -> f64 {
5206    0.20
5207}
5208fn default_complete_examination_rate() -> f64 {
5209    0.10
5210}
5211
5212impl Default for SamplingConfig {
5213    fn default() -> Self {
5214        Self {
5215            statistical_rate: default_statistical_rate(),
5216            judgmental_rate: default_judgmental_rate(),
5217            haphazard_rate: default_haphazard_rate(),
5218            complete_examination_rate: default_complete_examination_rate(),
5219        }
5220    }
5221}
5222
5223/// Audit team configuration.
5224#[derive(Debug, Clone, Serialize, Deserialize)]
5225pub struct AuditTeamConfig {
5226    /// Minimum team size
5227    #[serde(default = "default_min_team_size")]
5228    pub min_team_size: usize,
5229    /// Maximum team size
5230    #[serde(default = "default_max_team_size")]
5231    pub max_team_size: usize,
5232    /// Probability of having a specialist on the team
5233    #[serde(default = "default_specialist_probability")]
5234    pub specialist_probability: f64,
5235}
5236
5237fn default_min_team_size() -> usize {
5238    3
5239}
5240fn default_max_team_size() -> usize {
5241    8
5242}
5243fn default_specialist_probability() -> f64 {
5244    0.30
5245}
5246
5247impl Default for AuditTeamConfig {
5248    fn default() -> Self {
5249        Self {
5250            min_team_size: default_min_team_size(),
5251            max_team_size: default_max_team_size(),
5252            specialist_probability: default_specialist_probability(),
5253        }
5254    }
5255}
5256
5257/// Review workflow configuration.
5258#[derive(Debug, Clone, Serialize, Deserialize)]
5259pub struct ReviewWorkflowConfig {
5260    /// Average days between preparer completion and first review
5261    #[serde(default = "default_review_delay_days")]
5262    pub average_review_delay_days: u32,
5263    /// Probability of review notes requiring rework
5264    #[serde(default = "default_rework_probability_review")]
5265    pub rework_probability: f64,
5266    /// Require partner sign-off for all workpapers
5267    #[serde(default = "default_true")]
5268    pub require_partner_signoff: bool,
5269}
5270
5271fn default_review_delay_days() -> u32 {
5272    2
5273}
5274fn default_rework_probability_review() -> f64 {
5275    0.15
5276}
5277
5278impl Default for ReviewWorkflowConfig {
5279    fn default() -> Self {
5280        Self {
5281            average_review_delay_days: default_review_delay_days(),
5282            rework_probability: default_rework_probability_review(),
5283            require_partner_signoff: true,
5284        }
5285    }
5286}
5287
5288// =============================================================================
5289// Data Quality Configuration
5290// =============================================================================
5291
5292/// Data quality variation settings for realistic flakiness injection.
5293#[derive(Debug, Clone, Serialize, Deserialize)]
5294pub struct DataQualitySchemaConfig {
5295    /// Enable data quality variations
5296    #[serde(default)]
5297    pub enabled: bool,
5298    /// Preset to use (overrides individual settings if set)
5299    #[serde(default)]
5300    pub preset: DataQualityPreset,
5301    /// Missing value injection settings
5302    #[serde(default)]
5303    pub missing_values: MissingValuesSchemaConfig,
5304    /// Typo injection settings
5305    #[serde(default)]
5306    pub typos: TypoSchemaConfig,
5307    /// Format variation settings
5308    #[serde(default)]
5309    pub format_variations: FormatVariationSchemaConfig,
5310    /// Duplicate injection settings
5311    #[serde(default)]
5312    pub duplicates: DuplicateSchemaConfig,
5313    /// Encoding issue settings
5314    #[serde(default)]
5315    pub encoding_issues: EncodingIssueSchemaConfig,
5316    /// Generate quality issue labels for ML training
5317    #[serde(default)]
5318    pub generate_labels: bool,
5319    /// Per-sink quality profiles (different settings for CSV vs JSON etc.)
5320    #[serde(default)]
5321    pub sink_profiles: SinkQualityProfiles,
5322}
5323
5324impl Default for DataQualitySchemaConfig {
5325    fn default() -> Self {
5326        Self {
5327            enabled: false,
5328            preset: DataQualityPreset::None,
5329            missing_values: MissingValuesSchemaConfig::default(),
5330            typos: TypoSchemaConfig::default(),
5331            format_variations: FormatVariationSchemaConfig::default(),
5332            duplicates: DuplicateSchemaConfig::default(),
5333            encoding_issues: EncodingIssueSchemaConfig::default(),
5334            generate_labels: true,
5335            sink_profiles: SinkQualityProfiles::default(),
5336        }
5337    }
5338}
5339
5340impl DataQualitySchemaConfig {
5341    /// Creates a config for a specific preset profile.
5342    pub fn with_preset(preset: DataQualityPreset) -> Self {
5343        let mut config = Self {
5344            preset,
5345            ..Default::default()
5346        };
5347        config.apply_preset();
5348        config
5349    }
5350
5351    /// Applies the preset settings to the individual configuration fields.
5352    /// Call this after deserializing if preset is not Custom or None.
5353    pub fn apply_preset(&mut self) {
5354        if !self.preset.overrides_settings() {
5355            return;
5356        }
5357
5358        self.enabled = true;
5359
5360        // Missing values
5361        self.missing_values.enabled = self.preset.missing_rate() > 0.0;
5362        self.missing_values.rate = self.preset.missing_rate();
5363
5364        // Typos
5365        self.typos.enabled = self.preset.typo_rate() > 0.0;
5366        self.typos.char_error_rate = self.preset.typo_rate();
5367
5368        // Duplicates
5369        self.duplicates.enabled = self.preset.duplicate_rate() > 0.0;
5370        self.duplicates.exact_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
5371        self.duplicates.near_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
5372        self.duplicates.fuzzy_duplicate_ratio = self.preset.duplicate_rate() * 0.2;
5373
5374        // Format variations
5375        self.format_variations.enabled = self.preset.format_variations_enabled();
5376
5377        // Encoding issues
5378        self.encoding_issues.enabled = self.preset.encoding_issues_enabled();
5379        self.encoding_issues.rate = self.preset.encoding_issue_rate();
5380
5381        // OCR errors for typos in legacy preset
5382        if self.preset.ocr_errors_enabled() {
5383            self.typos.type_weights.ocr_errors = 0.3;
5384        }
5385    }
5386
5387    /// Returns the effective missing value rate (considering preset).
5388    pub fn effective_missing_rate(&self) -> f64 {
5389        if self.preset.overrides_settings() {
5390            self.preset.missing_rate()
5391        } else {
5392            self.missing_values.rate
5393        }
5394    }
5395
5396    /// Returns the effective typo rate (considering preset).
5397    pub fn effective_typo_rate(&self) -> f64 {
5398        if self.preset.overrides_settings() {
5399            self.preset.typo_rate()
5400        } else {
5401            self.typos.char_error_rate
5402        }
5403    }
5404
5405    /// Returns the effective duplicate rate (considering preset).
5406    pub fn effective_duplicate_rate(&self) -> f64 {
5407        if self.preset.overrides_settings() {
5408            self.preset.duplicate_rate()
5409        } else {
5410            self.duplicates.exact_duplicate_ratio
5411                + self.duplicates.near_duplicate_ratio
5412                + self.duplicates.fuzzy_duplicate_ratio
5413        }
5414    }
5415
5416    /// Creates a clean profile config.
5417    pub fn clean() -> Self {
5418        Self::with_preset(DataQualityPreset::Clean)
5419    }
5420
5421    /// Creates a noisy profile config.
5422    pub fn noisy() -> Self {
5423        Self::with_preset(DataQualityPreset::Noisy)
5424    }
5425
5426    /// Creates a legacy profile config.
5427    pub fn legacy() -> Self {
5428        Self::with_preset(DataQualityPreset::Legacy)
5429    }
5430}
5431
5432/// Preset configurations for common data quality scenarios.
5433#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5434#[serde(rename_all = "snake_case")]
5435pub enum DataQualityPreset {
5436    /// No data quality variations (clean data)
5437    #[default]
5438    None,
5439    /// Minimal variations (very clean data with rare issues)
5440    Minimal,
5441    /// Normal variations (realistic enterprise data quality)
5442    Normal,
5443    /// High variations (messy data for stress testing)
5444    High,
5445    /// Custom (use individual settings)
5446    Custom,
5447
5448    // ========================================
5449    // ML-Oriented Profiles (Phase 2.1)
5450    // ========================================
5451    /// Clean profile for ML training - minimal data quality issues
5452    /// Missing: 0.1%, Typos: 0.05%, Duplicates: 0%, Format: None
5453    Clean,
5454    /// Noisy profile simulating typical production data issues
5455    /// Missing: 5%, Typos: 2%, Duplicates: 1%, Format: Medium
5456    Noisy,
5457    /// Legacy profile simulating migrated/OCR'd historical data
5458    /// Missing: 10%, Typos: 5%, Duplicates: 3%, Format: Heavy + OCR
5459    Legacy,
5460}
5461
5462impl DataQualityPreset {
5463    /// Returns the missing value rate for this preset.
5464    pub fn missing_rate(&self) -> f64 {
5465        match self {
5466            DataQualityPreset::None => 0.0,
5467            DataQualityPreset::Minimal => 0.005,
5468            DataQualityPreset::Normal => 0.02,
5469            DataQualityPreset::High => 0.08,
5470            DataQualityPreset::Custom => 0.01, // Use config value
5471            DataQualityPreset::Clean => 0.001,
5472            DataQualityPreset::Noisy => 0.05,
5473            DataQualityPreset::Legacy => 0.10,
5474        }
5475    }
5476
5477    /// Returns the typo rate for this preset.
5478    pub fn typo_rate(&self) -> f64 {
5479        match self {
5480            DataQualityPreset::None => 0.0,
5481            DataQualityPreset::Minimal => 0.0005,
5482            DataQualityPreset::Normal => 0.002,
5483            DataQualityPreset::High => 0.01,
5484            DataQualityPreset::Custom => 0.001, // Use config value
5485            DataQualityPreset::Clean => 0.0005,
5486            DataQualityPreset::Noisy => 0.02,
5487            DataQualityPreset::Legacy => 0.05,
5488        }
5489    }
5490
5491    /// Returns the duplicate rate for this preset.
5492    pub fn duplicate_rate(&self) -> f64 {
5493        match self {
5494            DataQualityPreset::None => 0.0,
5495            DataQualityPreset::Minimal => 0.001,
5496            DataQualityPreset::Normal => 0.005,
5497            DataQualityPreset::High => 0.02,
5498            DataQualityPreset::Custom => 0.0, // Use config value
5499            DataQualityPreset::Clean => 0.0,
5500            DataQualityPreset::Noisy => 0.01,
5501            DataQualityPreset::Legacy => 0.03,
5502        }
5503    }
5504
5505    /// Returns whether format variations are enabled for this preset.
5506    pub fn format_variations_enabled(&self) -> bool {
5507        match self {
5508            DataQualityPreset::None | DataQualityPreset::Clean => false,
5509            DataQualityPreset::Minimal => true,
5510            DataQualityPreset::Normal => true,
5511            DataQualityPreset::High => true,
5512            DataQualityPreset::Custom => true,
5513            DataQualityPreset::Noisy => true,
5514            DataQualityPreset::Legacy => true,
5515        }
5516    }
5517
5518    /// Returns whether OCR-style errors are enabled for this preset.
5519    pub fn ocr_errors_enabled(&self) -> bool {
5520        matches!(self, DataQualityPreset::Legacy | DataQualityPreset::High)
5521    }
5522
5523    /// Returns whether encoding issues are enabled for this preset.
5524    pub fn encoding_issues_enabled(&self) -> bool {
5525        matches!(
5526            self,
5527            DataQualityPreset::Legacy | DataQualityPreset::High | DataQualityPreset::Noisy
5528        )
5529    }
5530
5531    /// Returns the encoding issue rate for this preset.
5532    pub fn encoding_issue_rate(&self) -> f64 {
5533        match self {
5534            DataQualityPreset::None | DataQualityPreset::Clean | DataQualityPreset::Minimal => 0.0,
5535            DataQualityPreset::Normal => 0.002,
5536            DataQualityPreset::High => 0.01,
5537            DataQualityPreset::Custom => 0.0,
5538            DataQualityPreset::Noisy => 0.005,
5539            DataQualityPreset::Legacy => 0.02,
5540        }
5541    }
5542
5543    /// Returns true if this preset overrides individual settings.
5544    pub fn overrides_settings(&self) -> bool {
5545        !matches!(self, DataQualityPreset::Custom | DataQualityPreset::None)
5546    }
5547
5548    /// Returns a human-readable description of this preset.
5549    pub fn description(&self) -> &'static str {
5550        match self {
5551            DataQualityPreset::None => "No data quality issues (pristine data)",
5552            DataQualityPreset::Minimal => "Very rare data quality issues",
5553            DataQualityPreset::Normal => "Realistic enterprise data quality",
5554            DataQualityPreset::High => "Messy data for stress testing",
5555            DataQualityPreset::Custom => "Custom settings from configuration",
5556            DataQualityPreset::Clean => "ML-ready clean data with minimal issues",
5557            DataQualityPreset::Noisy => "Typical production data with moderate issues",
5558            DataQualityPreset::Legacy => "Legacy/migrated data with heavy issues and OCR errors",
5559        }
5560    }
5561}
5562
5563/// Missing value injection configuration.
5564#[derive(Debug, Clone, Serialize, Deserialize)]
5565pub struct MissingValuesSchemaConfig {
5566    /// Enable missing value injection
5567    #[serde(default)]
5568    pub enabled: bool,
5569    /// Global missing rate (0.0 to 1.0)
5570    #[serde(default = "default_missing_rate")]
5571    pub rate: f64,
5572    /// Missing value strategy
5573    #[serde(default)]
5574    pub strategy: MissingValueStrategy,
5575    /// Field-specific rates (field name -> rate)
5576    #[serde(default)]
5577    pub field_rates: std::collections::HashMap<String, f64>,
5578    /// Fields that should never have missing values
5579    #[serde(default)]
5580    pub protected_fields: Vec<String>,
5581}
5582
5583fn default_missing_rate() -> f64 {
5584    0.01
5585}
5586
5587impl Default for MissingValuesSchemaConfig {
5588    fn default() -> Self {
5589        Self {
5590            enabled: false,
5591            rate: default_missing_rate(),
5592            strategy: MissingValueStrategy::Mcar,
5593            field_rates: std::collections::HashMap::new(),
5594            protected_fields: vec![
5595                "document_id".to_string(),
5596                "company_code".to_string(),
5597                "posting_date".to_string(),
5598            ],
5599        }
5600    }
5601}
5602
5603/// Missing value strategy types.
5604#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5605#[serde(rename_all = "snake_case")]
5606pub enum MissingValueStrategy {
5607    /// Missing Completely At Random - equal probability for all values
5608    #[default]
5609    Mcar,
5610    /// Missing At Random - depends on other observed values
5611    Mar,
5612    /// Missing Not At Random - depends on the value itself
5613    Mnar,
5614    /// Systematic - entire field groups missing together
5615    Systematic,
5616}
5617
5618/// Typo injection configuration.
5619#[derive(Debug, Clone, Serialize, Deserialize)]
5620pub struct TypoSchemaConfig {
5621    /// Enable typo injection
5622    #[serde(default)]
5623    pub enabled: bool,
5624    /// Character error rate (per character, not per field)
5625    #[serde(default = "default_typo_rate")]
5626    pub char_error_rate: f64,
5627    /// Typo type weights
5628    #[serde(default)]
5629    pub type_weights: TypoTypeWeights,
5630    /// Fields that should never have typos
5631    #[serde(default)]
5632    pub protected_fields: Vec<String>,
5633}
5634
5635fn default_typo_rate() -> f64 {
5636    0.001
5637}
5638
5639impl Default for TypoSchemaConfig {
5640    fn default() -> Self {
5641        Self {
5642            enabled: false,
5643            char_error_rate: default_typo_rate(),
5644            type_weights: TypoTypeWeights::default(),
5645            protected_fields: vec![
5646                "document_id".to_string(),
5647                "gl_account".to_string(),
5648                "company_code".to_string(),
5649            ],
5650        }
5651    }
5652}
5653
5654/// Weights for different typo types.
5655#[derive(Debug, Clone, Serialize, Deserialize)]
5656pub struct TypoTypeWeights {
5657    /// Keyboard-adjacent substitution (e.g., 'a' -> 's')
5658    #[serde(default = "default_substitution_weight")]
5659    pub substitution: f64,
5660    /// Adjacent character transposition (e.g., 'ab' -> 'ba')
5661    #[serde(default = "default_transposition_weight")]
5662    pub transposition: f64,
5663    /// Character insertion
5664    #[serde(default = "default_insertion_weight")]
5665    pub insertion: f64,
5666    /// Character deletion
5667    #[serde(default = "default_deletion_weight")]
5668    pub deletion: f64,
5669    /// OCR-style errors (e.g., '0' -> 'O')
5670    #[serde(default = "default_ocr_weight")]
5671    pub ocr_errors: f64,
5672    /// Homophone substitution (e.g., 'their' -> 'there')
5673    #[serde(default = "default_homophone_weight")]
5674    pub homophones: f64,
5675}
5676
5677fn default_substitution_weight() -> f64 {
5678    0.35
5679}
5680fn default_transposition_weight() -> f64 {
5681    0.25
5682}
5683fn default_insertion_weight() -> f64 {
5684    0.10
5685}
5686fn default_deletion_weight() -> f64 {
5687    0.15
5688}
5689fn default_ocr_weight() -> f64 {
5690    0.10
5691}
5692fn default_homophone_weight() -> f64 {
5693    0.05
5694}
5695
5696impl Default for TypoTypeWeights {
5697    fn default() -> Self {
5698        Self {
5699            substitution: default_substitution_weight(),
5700            transposition: default_transposition_weight(),
5701            insertion: default_insertion_weight(),
5702            deletion: default_deletion_weight(),
5703            ocr_errors: default_ocr_weight(),
5704            homophones: default_homophone_weight(),
5705        }
5706    }
5707}
5708
5709/// Format variation configuration.
5710#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5711pub struct FormatVariationSchemaConfig {
5712    /// Enable format variations
5713    #[serde(default)]
5714    pub enabled: bool,
5715    /// Date format variation settings
5716    #[serde(default)]
5717    pub dates: DateFormatVariationConfig,
5718    /// Amount format variation settings
5719    #[serde(default)]
5720    pub amounts: AmountFormatVariationConfig,
5721    /// Identifier format variation settings
5722    #[serde(default)]
5723    pub identifiers: IdentifierFormatVariationConfig,
5724}
5725
5726/// Date format variation configuration.
5727#[derive(Debug, Clone, Serialize, Deserialize)]
5728pub struct DateFormatVariationConfig {
5729    /// Enable date format variations
5730    #[serde(default)]
5731    pub enabled: bool,
5732    /// Overall variation rate
5733    #[serde(default = "default_date_variation_rate")]
5734    pub rate: f64,
5735    /// Include ISO format (2024-01-15)
5736    #[serde(default = "default_true")]
5737    pub iso_format: bool,
5738    /// Include US format (01/15/2024)
5739    #[serde(default)]
5740    pub us_format: bool,
5741    /// Include EU format (15.01.2024)
5742    #[serde(default)]
5743    pub eu_format: bool,
5744    /// Include long format (January 15, 2024)
5745    #[serde(default)]
5746    pub long_format: bool,
5747}
5748
5749fn default_date_variation_rate() -> f64 {
5750    0.05
5751}
5752
5753impl Default for DateFormatVariationConfig {
5754    fn default() -> Self {
5755        Self {
5756            enabled: false,
5757            rate: default_date_variation_rate(),
5758            iso_format: true,
5759            us_format: false,
5760            eu_format: false,
5761            long_format: false,
5762        }
5763    }
5764}
5765
5766/// Amount format variation configuration.
5767#[derive(Debug, Clone, Serialize, Deserialize)]
5768pub struct AmountFormatVariationConfig {
5769    /// Enable amount format variations
5770    #[serde(default)]
5771    pub enabled: bool,
5772    /// Overall variation rate
5773    #[serde(default = "default_amount_variation_rate")]
5774    pub rate: f64,
5775    /// Include US comma format (1,234.56)
5776    #[serde(default)]
5777    pub us_comma_format: bool,
5778    /// Include EU format (1.234,56)
5779    #[serde(default)]
5780    pub eu_format: bool,
5781    /// Include currency prefix ($1,234.56)
5782    #[serde(default)]
5783    pub currency_prefix: bool,
5784    /// Include accounting format with parentheses for negatives
5785    #[serde(default)]
5786    pub accounting_format: bool,
5787}
5788
5789fn default_amount_variation_rate() -> f64 {
5790    0.02
5791}
5792
5793impl Default for AmountFormatVariationConfig {
5794    fn default() -> Self {
5795        Self {
5796            enabled: false,
5797            rate: default_amount_variation_rate(),
5798            us_comma_format: false,
5799            eu_format: false,
5800            currency_prefix: false,
5801            accounting_format: false,
5802        }
5803    }
5804}
5805
5806/// Identifier format variation configuration.
5807#[derive(Debug, Clone, Serialize, Deserialize)]
5808pub struct IdentifierFormatVariationConfig {
5809    /// Enable identifier format variations
5810    #[serde(default)]
5811    pub enabled: bool,
5812    /// Overall variation rate
5813    #[serde(default = "default_identifier_variation_rate")]
5814    pub rate: f64,
5815    /// Case variations (uppercase, lowercase, mixed)
5816    #[serde(default)]
5817    pub case_variations: bool,
5818    /// Padding variations (leading zeros)
5819    #[serde(default)]
5820    pub padding_variations: bool,
5821    /// Separator variations (dash vs underscore)
5822    #[serde(default)]
5823    pub separator_variations: bool,
5824}
5825
5826fn default_identifier_variation_rate() -> f64 {
5827    0.02
5828}
5829
5830impl Default for IdentifierFormatVariationConfig {
5831    fn default() -> Self {
5832        Self {
5833            enabled: false,
5834            rate: default_identifier_variation_rate(),
5835            case_variations: false,
5836            padding_variations: false,
5837            separator_variations: false,
5838        }
5839    }
5840}
5841
5842/// Duplicate injection configuration.
5843#[derive(Debug, Clone, Serialize, Deserialize)]
5844pub struct DuplicateSchemaConfig {
5845    /// Enable duplicate injection
5846    #[serde(default)]
5847    pub enabled: bool,
5848    /// Overall duplicate rate
5849    #[serde(default = "default_duplicate_rate")]
5850    pub rate: f64,
5851    /// Exact duplicate proportion (out of duplicates)
5852    #[serde(default = "default_exact_duplicate_ratio")]
5853    pub exact_duplicate_ratio: f64,
5854    /// Near duplicate proportion (slight variations)
5855    #[serde(default = "default_near_duplicate_ratio")]
5856    pub near_duplicate_ratio: f64,
5857    /// Fuzzy duplicate proportion (typos in key fields)
5858    #[serde(default = "default_fuzzy_duplicate_ratio")]
5859    pub fuzzy_duplicate_ratio: f64,
5860    /// Maximum date offset for near/fuzzy duplicates (days)
5861    #[serde(default = "default_max_date_offset")]
5862    pub max_date_offset_days: u32,
5863    /// Maximum amount variance for near duplicates (fraction)
5864    #[serde(default = "default_max_amount_variance")]
5865    pub max_amount_variance: f64,
5866}
5867
5868fn default_duplicate_rate() -> f64 {
5869    0.005
5870}
5871fn default_exact_duplicate_ratio() -> f64 {
5872    0.4
5873}
5874fn default_near_duplicate_ratio() -> f64 {
5875    0.35
5876}
5877fn default_fuzzy_duplicate_ratio() -> f64 {
5878    0.25
5879}
5880fn default_max_date_offset() -> u32 {
5881    3
5882}
5883fn default_max_amount_variance() -> f64 {
5884    0.01
5885}
5886
5887impl Default for DuplicateSchemaConfig {
5888    fn default() -> Self {
5889        Self {
5890            enabled: false,
5891            rate: default_duplicate_rate(),
5892            exact_duplicate_ratio: default_exact_duplicate_ratio(),
5893            near_duplicate_ratio: default_near_duplicate_ratio(),
5894            fuzzy_duplicate_ratio: default_fuzzy_duplicate_ratio(),
5895            max_date_offset_days: default_max_date_offset(),
5896            max_amount_variance: default_max_amount_variance(),
5897        }
5898    }
5899}
5900
5901/// Encoding issue configuration.
5902#[derive(Debug, Clone, Serialize, Deserialize)]
5903pub struct EncodingIssueSchemaConfig {
5904    /// Enable encoding issue injection
5905    #[serde(default)]
5906    pub enabled: bool,
5907    /// Overall encoding issue rate
5908    #[serde(default = "default_encoding_rate")]
5909    pub rate: f64,
5910    /// Include mojibake (UTF-8/Latin-1 confusion)
5911    #[serde(default)]
5912    pub mojibake: bool,
5913    /// Include HTML entity corruption
5914    #[serde(default)]
5915    pub html_entities: bool,
5916    /// Include BOM issues
5917    #[serde(default)]
5918    pub bom_issues: bool,
5919}
5920
5921fn default_encoding_rate() -> f64 {
5922    0.001
5923}
5924
5925impl Default for EncodingIssueSchemaConfig {
5926    fn default() -> Self {
5927        Self {
5928            enabled: false,
5929            rate: default_encoding_rate(),
5930            mojibake: false,
5931            html_entities: false,
5932            bom_issues: false,
5933        }
5934    }
5935}
5936
5937/// Per-sink quality profiles for different output formats.
5938#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5939pub struct SinkQualityProfiles {
5940    /// CSV-specific quality settings
5941    #[serde(default)]
5942    pub csv: Option<SinkQualityOverride>,
5943    /// JSON-specific quality settings
5944    #[serde(default)]
5945    pub json: Option<SinkQualityOverride>,
5946    /// Parquet-specific quality settings
5947    #[serde(default)]
5948    pub parquet: Option<SinkQualityOverride>,
5949}
5950
5951/// Quality setting overrides for a specific sink type.
5952#[derive(Debug, Clone, Serialize, Deserialize)]
5953pub struct SinkQualityOverride {
5954    /// Override enabled state
5955    pub enabled: Option<bool>,
5956    /// Override missing value rate
5957    pub missing_rate: Option<f64>,
5958    /// Override typo rate
5959    pub typo_rate: Option<f64>,
5960    /// Override format variation rate
5961    pub format_variation_rate: Option<f64>,
5962    /// Override duplicate rate
5963    pub duplicate_rate: Option<f64>,
5964}
5965
5966// =============================================================================
5967// Accounting Standards Configuration
5968// =============================================================================
5969
5970/// Accounting standards framework configuration for generating standards-compliant data.
5971///
5972/// Supports US GAAP, IFRS, and French GAAP (PCG) frameworks with specific standards:
5973/// - ASC 606/IFRS 15/PCG: Revenue Recognition
5974/// - ASC 842/IFRS 16/PCG: Leases
5975/// - ASC 820/IFRS 13/PCG: Fair Value Measurement
5976/// - ASC 360/IAS 36/PCG: Impairment
5977#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5978pub struct AccountingStandardsConfig {
5979    /// Enable accounting standards generation
5980    #[serde(default)]
5981    pub enabled: bool,
5982
5983    /// Accounting framework to use.
5984    /// When `None`, the country pack's `accounting.framework` is used as fallback;
5985    /// if that is also absent the orchestrator defaults to US GAAP.
5986    #[serde(default, skip_serializing_if = "Option::is_none")]
5987    pub framework: Option<AccountingFrameworkConfig>,
5988
5989    /// Revenue recognition configuration (ASC 606/IFRS 15)
5990    #[serde(default)]
5991    pub revenue_recognition: RevenueRecognitionConfig,
5992
5993    /// Lease accounting configuration (ASC 842/IFRS 16)
5994    #[serde(default)]
5995    pub leases: LeaseAccountingConfig,
5996
5997    /// Fair value measurement configuration (ASC 820/IFRS 13)
5998    #[serde(default)]
5999    pub fair_value: FairValueConfig,
6000
6001    /// Impairment testing configuration (ASC 360/IAS 36)
6002    #[serde(default)]
6003    pub impairment: ImpairmentConfig,
6004
6005    /// Business combination configuration (IFRS 3 / ASC 805)
6006    #[serde(default)]
6007    pub business_combinations: BusinessCombinationsConfig,
6008
6009    /// Expected Credit Loss configuration (IFRS 9 / ASC 326)
6010    #[serde(default)]
6011    pub expected_credit_loss: EclConfig,
6012
6013    /// Generate framework differences for dual reporting
6014    #[serde(default)]
6015    pub generate_differences: bool,
6016}
6017
6018/// Accounting framework selection.
6019#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
6020#[serde(rename_all = "snake_case")]
6021pub enum AccountingFrameworkConfig {
6022    /// US Generally Accepted Accounting Principles
6023    #[default]
6024    UsGaap,
6025    /// International Financial Reporting Standards
6026    Ifrs,
6027    /// Generate data for both frameworks with reconciliation
6028    DualReporting,
6029    /// French GAAP (Plan Comptable Général – PCG)
6030    FrenchGaap,
6031    /// German GAAP (Handelsgesetzbuch – HGB, §238-263)
6032    GermanGaap,
6033}
6034
6035/// Revenue recognition configuration (ASC 606/IFRS 15).
6036#[derive(Debug, Clone, Serialize, Deserialize)]
6037pub struct RevenueRecognitionConfig {
6038    /// Enable revenue recognition generation
6039    #[serde(default)]
6040    pub enabled: bool,
6041
6042    /// Generate customer contracts
6043    #[serde(default = "default_true")]
6044    pub generate_contracts: bool,
6045
6046    /// Average number of performance obligations per contract
6047    #[serde(default = "default_avg_obligations")]
6048    pub avg_obligations_per_contract: f64,
6049
6050    /// Rate of contracts with variable consideration
6051    #[serde(default = "default_variable_consideration_rate")]
6052    pub variable_consideration_rate: f64,
6053
6054    /// Rate of over-time revenue recognition (vs point-in-time)
6055    #[serde(default = "default_over_time_rate")]
6056    pub over_time_recognition_rate: f64,
6057
6058    /// Number of contracts to generate
6059    #[serde(default = "default_contract_count")]
6060    pub contract_count: usize,
6061}
6062
6063fn default_avg_obligations() -> f64 {
6064    2.0
6065}
6066
6067fn default_variable_consideration_rate() -> f64 {
6068    0.15
6069}
6070
6071fn default_over_time_rate() -> f64 {
6072    0.30
6073}
6074
6075fn default_contract_count() -> usize {
6076    100
6077}
6078
6079impl Default for RevenueRecognitionConfig {
6080    fn default() -> Self {
6081        Self {
6082            enabled: false,
6083            generate_contracts: true,
6084            avg_obligations_per_contract: default_avg_obligations(),
6085            variable_consideration_rate: default_variable_consideration_rate(),
6086            over_time_recognition_rate: default_over_time_rate(),
6087            contract_count: default_contract_count(),
6088        }
6089    }
6090}
6091
6092/// Lease accounting configuration (ASC 842/IFRS 16).
6093#[derive(Debug, Clone, Serialize, Deserialize)]
6094pub struct LeaseAccountingConfig {
6095    /// Enable lease accounting generation
6096    #[serde(default)]
6097    pub enabled: bool,
6098
6099    /// Number of leases to generate
6100    #[serde(default = "default_lease_count")]
6101    pub lease_count: usize,
6102
6103    /// Percentage of finance leases (vs operating)
6104    #[serde(default = "default_finance_lease_pct")]
6105    pub finance_lease_percent: f64,
6106
6107    /// Average lease term in months
6108    #[serde(default = "default_avg_lease_term")]
6109    pub avg_lease_term_months: u32,
6110
6111    /// Generate amortization schedules
6112    #[serde(default = "default_true")]
6113    pub generate_amortization: bool,
6114
6115    /// Real estate lease percentage
6116    #[serde(default = "default_real_estate_pct")]
6117    pub real_estate_percent: f64,
6118}
6119
6120fn default_lease_count() -> usize {
6121    50
6122}
6123
6124fn default_finance_lease_pct() -> f64 {
6125    0.30
6126}
6127
6128fn default_avg_lease_term() -> u32 {
6129    60
6130}
6131
6132fn default_real_estate_pct() -> f64 {
6133    0.40
6134}
6135
6136impl Default for LeaseAccountingConfig {
6137    fn default() -> Self {
6138        Self {
6139            enabled: false,
6140            lease_count: default_lease_count(),
6141            finance_lease_percent: default_finance_lease_pct(),
6142            avg_lease_term_months: default_avg_lease_term(),
6143            generate_amortization: true,
6144            real_estate_percent: default_real_estate_pct(),
6145        }
6146    }
6147}
6148
6149/// Fair value measurement configuration (ASC 820/IFRS 13).
6150#[derive(Debug, Clone, Serialize, Deserialize)]
6151pub struct FairValueConfig {
6152    /// Enable fair value measurement generation
6153    #[serde(default)]
6154    pub enabled: bool,
6155
6156    /// Number of fair value measurements to generate
6157    #[serde(default = "default_fv_count")]
6158    pub measurement_count: usize,
6159
6160    /// Level 1 (quoted prices) percentage
6161    #[serde(default = "default_level1_pct")]
6162    pub level1_percent: f64,
6163
6164    /// Level 2 (observable inputs) percentage
6165    #[serde(default = "default_level2_pct")]
6166    pub level2_percent: f64,
6167
6168    /// Level 3 (unobservable inputs) percentage
6169    #[serde(default = "default_level3_pct")]
6170    pub level3_percent: f64,
6171
6172    /// Include sensitivity analysis for Level 3
6173    #[serde(default)]
6174    pub include_sensitivity_analysis: bool,
6175}
6176
6177fn default_fv_count() -> usize {
6178    25
6179}
6180
6181fn default_level1_pct() -> f64 {
6182    0.40
6183}
6184
6185fn default_level2_pct() -> f64 {
6186    0.35
6187}
6188
6189fn default_level3_pct() -> f64 {
6190    0.25
6191}
6192
6193impl Default for FairValueConfig {
6194    fn default() -> Self {
6195        Self {
6196            enabled: false,
6197            measurement_count: default_fv_count(),
6198            level1_percent: default_level1_pct(),
6199            level2_percent: default_level2_pct(),
6200            level3_percent: default_level3_pct(),
6201            include_sensitivity_analysis: false,
6202        }
6203    }
6204}
6205
6206/// Impairment testing configuration (ASC 360/IAS 36).
6207#[derive(Debug, Clone, Serialize, Deserialize)]
6208pub struct ImpairmentConfig {
6209    /// Enable impairment testing generation
6210    #[serde(default)]
6211    pub enabled: bool,
6212
6213    /// Number of impairment tests to generate
6214    #[serde(default = "default_impairment_count")]
6215    pub test_count: usize,
6216
6217    /// Rate of tests resulting in impairment
6218    #[serde(default = "default_impairment_rate")]
6219    pub impairment_rate: f64,
6220
6221    /// Generate cash flow projections
6222    #[serde(default = "default_true")]
6223    pub generate_projections: bool,
6224
6225    /// Include goodwill impairment tests
6226    #[serde(default)]
6227    pub include_goodwill: bool,
6228}
6229
6230fn default_impairment_count() -> usize {
6231    15
6232}
6233
6234fn default_impairment_rate() -> f64 {
6235    0.10
6236}
6237
6238impl Default for ImpairmentConfig {
6239    fn default() -> Self {
6240        Self {
6241            enabled: false,
6242            test_count: default_impairment_count(),
6243            impairment_rate: default_impairment_rate(),
6244            generate_projections: true,
6245            include_goodwill: false,
6246        }
6247    }
6248}
6249
6250// =============================================================================
6251// Business Combinations Configuration (IFRS 3 / ASC 805)
6252// =============================================================================
6253
6254/// Configuration for generating business combination (acquisition) data.
6255#[derive(Debug, Clone, Serialize, Deserialize)]
6256pub struct BusinessCombinationsConfig {
6257    /// Enable business combination generation
6258    #[serde(default)]
6259    pub enabled: bool,
6260
6261    /// Number of acquisitions to generate per company (1-5)
6262    #[serde(default = "default_bc_acquisition_count")]
6263    pub acquisition_count: usize,
6264}
6265
6266fn default_bc_acquisition_count() -> usize {
6267    2
6268}
6269
6270impl Default for BusinessCombinationsConfig {
6271    fn default() -> Self {
6272        Self {
6273            enabled: false,
6274            acquisition_count: default_bc_acquisition_count(),
6275        }
6276    }
6277}
6278
6279// =============================================================================
6280// ECL Configuration (IFRS 9 / ASC 326)
6281// =============================================================================
6282
6283/// Configuration for Expected Credit Loss generation.
6284#[derive(Debug, Clone, Serialize, Deserialize)]
6285pub struct EclConfig {
6286    /// Enable ECL generation.
6287    #[serde(default)]
6288    pub enabled: bool,
6289
6290    /// Weight for base economic scenario (0–1).
6291    #[serde(default = "default_ecl_base_weight")]
6292    pub base_scenario_weight: f64,
6293
6294    /// Multiplier for base scenario (typically 1.0).
6295    #[serde(default = "default_ecl_base_multiplier")]
6296    pub base_scenario_multiplier: f64,
6297
6298    /// Weight for optimistic economic scenario (0–1).
6299    #[serde(default = "default_ecl_optimistic_weight")]
6300    pub optimistic_scenario_weight: f64,
6301
6302    /// Multiplier for optimistic scenario (< 1.0 means lower losses).
6303    #[serde(default = "default_ecl_optimistic_multiplier")]
6304    pub optimistic_scenario_multiplier: f64,
6305
6306    /// Weight for pessimistic economic scenario (0–1).
6307    #[serde(default = "default_ecl_pessimistic_weight")]
6308    pub pessimistic_scenario_weight: f64,
6309
6310    /// Multiplier for pessimistic scenario (> 1.0 means higher losses).
6311    #[serde(default = "default_ecl_pessimistic_multiplier")]
6312    pub pessimistic_scenario_multiplier: f64,
6313}
6314
6315fn default_ecl_base_weight() -> f64 {
6316    0.50
6317}
6318fn default_ecl_base_multiplier() -> f64 {
6319    1.0
6320}
6321fn default_ecl_optimistic_weight() -> f64 {
6322    0.30
6323}
6324fn default_ecl_optimistic_multiplier() -> f64 {
6325    0.8
6326}
6327fn default_ecl_pessimistic_weight() -> f64 {
6328    0.20
6329}
6330fn default_ecl_pessimistic_multiplier() -> f64 {
6331    1.4
6332}
6333
6334impl Default for EclConfig {
6335    fn default() -> Self {
6336        Self {
6337            enabled: false,
6338            base_scenario_weight: default_ecl_base_weight(),
6339            base_scenario_multiplier: default_ecl_base_multiplier(),
6340            optimistic_scenario_weight: default_ecl_optimistic_weight(),
6341            optimistic_scenario_multiplier: default_ecl_optimistic_multiplier(),
6342            pessimistic_scenario_weight: default_ecl_pessimistic_weight(),
6343            pessimistic_scenario_multiplier: default_ecl_pessimistic_multiplier(),
6344        }
6345    }
6346}
6347
6348// =============================================================================
6349// Audit Standards Configuration
6350// =============================================================================
6351
6352/// Audit standards framework configuration for generating standards-compliant audit data.
6353///
6354/// Supports ISA (International Standards on Auditing) and PCAOB standards:
6355/// - ISA 200-720: Complete coverage of audit standards
6356/// - ISA 520: Analytical Procedures
6357/// - ISA 505: External Confirmations
6358/// - ISA 700/705/706/701: Audit Reports
6359/// - PCAOB AS 2201: ICFR Auditing
6360#[derive(Debug, Clone, Serialize, Deserialize, Default)]
6361pub struct AuditStandardsConfig {
6362    /// Enable audit standards generation
6363    #[serde(default)]
6364    pub enabled: bool,
6365
6366    /// ISA compliance configuration
6367    #[serde(default)]
6368    pub isa_compliance: IsaComplianceConfig,
6369
6370    /// Analytical procedures configuration (ISA 520)
6371    #[serde(default)]
6372    pub analytical_procedures: AnalyticalProceduresConfig,
6373
6374    /// External confirmations configuration (ISA 505)
6375    #[serde(default)]
6376    pub confirmations: ConfirmationsConfig,
6377
6378    /// Audit opinion configuration (ISA 700/705/706/701)
6379    #[serde(default)]
6380    pub opinion: AuditOpinionConfig,
6381
6382    /// Generate complete audit trail with traceability
6383    #[serde(default)]
6384    pub generate_audit_trail: bool,
6385
6386    /// SOX 302/404 compliance configuration
6387    #[serde(default)]
6388    pub sox: SoxComplianceConfig,
6389
6390    /// PCAOB-specific configuration
6391    #[serde(default)]
6392    pub pcaob: PcaobConfig,
6393}
6394
6395/// ISA compliance level configuration.
6396#[derive(Debug, Clone, Serialize, Deserialize)]
6397pub struct IsaComplianceConfig {
6398    /// Enable ISA compliance tracking
6399    #[serde(default)]
6400    pub enabled: bool,
6401
6402    /// Compliance level: "basic", "standard", "comprehensive"
6403    #[serde(default = "default_compliance_level")]
6404    pub compliance_level: String,
6405
6406    /// Generate ISA requirement mappings
6407    #[serde(default = "default_true")]
6408    pub generate_isa_mappings: bool,
6409
6410    /// Generate ISA coverage summary
6411    #[serde(default = "default_true")]
6412    pub generate_coverage_summary: bool,
6413
6414    /// Include PCAOB standard mappings (for dual framework)
6415    #[serde(default)]
6416    pub include_pcaob: bool,
6417
6418    /// Framework to use: "isa", "pcaob", "dual"
6419    #[serde(default = "default_audit_framework")]
6420    pub framework: String,
6421}
6422
6423fn default_compliance_level() -> String {
6424    "standard".to_string()
6425}
6426
6427fn default_audit_framework() -> String {
6428    "isa".to_string()
6429}
6430
6431impl Default for IsaComplianceConfig {
6432    fn default() -> Self {
6433        Self {
6434            enabled: false,
6435            compliance_level: default_compliance_level(),
6436            generate_isa_mappings: true,
6437            generate_coverage_summary: true,
6438            include_pcaob: false,
6439            framework: default_audit_framework(),
6440        }
6441    }
6442}
6443
6444/// Analytical procedures configuration (ISA 520).
6445#[derive(Debug, Clone, Serialize, Deserialize)]
6446pub struct AnalyticalProceduresConfig {
6447    /// Enable analytical procedures generation
6448    #[serde(default)]
6449    pub enabled: bool,
6450
6451    /// Number of procedures per account/area
6452    #[serde(default = "default_procedures_per_account")]
6453    pub procedures_per_account: usize,
6454
6455    /// Probability of variance exceeding threshold
6456    #[serde(default = "default_variance_probability")]
6457    pub variance_probability: f64,
6458
6459    /// Include variance investigations
6460    #[serde(default = "default_true")]
6461    pub generate_investigations: bool,
6462
6463    /// Include financial ratio analysis
6464    #[serde(default = "default_true")]
6465    pub include_ratio_analysis: bool,
6466}
6467
6468fn default_procedures_per_account() -> usize {
6469    3
6470}
6471
6472fn default_variance_probability() -> f64 {
6473    0.20
6474}
6475
6476impl Default for AnalyticalProceduresConfig {
6477    fn default() -> Self {
6478        Self {
6479            enabled: false,
6480            procedures_per_account: default_procedures_per_account(),
6481            variance_probability: default_variance_probability(),
6482            generate_investigations: true,
6483            include_ratio_analysis: true,
6484        }
6485    }
6486}
6487
6488/// External confirmations configuration (ISA 505).
6489#[derive(Debug, Clone, Serialize, Deserialize)]
6490pub struct ConfirmationsConfig {
6491    /// Enable confirmation generation
6492    #[serde(default)]
6493    pub enabled: bool,
6494
6495    /// Number of confirmations to generate
6496    #[serde(default = "default_confirmation_count")]
6497    pub confirmation_count: usize,
6498
6499    /// Positive response rate
6500    #[serde(default = "default_positive_response_rate")]
6501    pub positive_response_rate: f64,
6502
6503    /// Exception rate (responses with differences)
6504    #[serde(default = "default_exception_rate_confirm")]
6505    pub exception_rate: f64,
6506
6507    /// Non-response rate
6508    #[serde(default = "default_non_response_rate")]
6509    pub non_response_rate: f64,
6510
6511    /// Generate alternative procedures for non-responses
6512    #[serde(default = "default_true")]
6513    pub generate_alternative_procedures: bool,
6514}
6515
6516fn default_confirmation_count() -> usize {
6517    50
6518}
6519
6520fn default_positive_response_rate() -> f64 {
6521    0.85
6522}
6523
6524fn default_exception_rate_confirm() -> f64 {
6525    0.10
6526}
6527
6528fn default_non_response_rate() -> f64 {
6529    0.05
6530}
6531
6532impl Default for ConfirmationsConfig {
6533    fn default() -> Self {
6534        Self {
6535            enabled: false,
6536            confirmation_count: default_confirmation_count(),
6537            positive_response_rate: default_positive_response_rate(),
6538            exception_rate: default_exception_rate_confirm(),
6539            non_response_rate: default_non_response_rate(),
6540            generate_alternative_procedures: true,
6541        }
6542    }
6543}
6544
6545/// Audit opinion configuration (ISA 700/705/706/701).
6546#[derive(Debug, Clone, Serialize, Deserialize)]
6547pub struct AuditOpinionConfig {
6548    /// Enable audit opinion generation
6549    #[serde(default)]
6550    pub enabled: bool,
6551
6552    /// Generate Key Audit Matters (KAM) / Critical Audit Matters (CAM)
6553    #[serde(default = "default_true")]
6554    pub generate_kam: bool,
6555
6556    /// Average number of KAMs/CAMs per opinion
6557    #[serde(default = "default_kam_count")]
6558    pub average_kam_count: usize,
6559
6560    /// Rate of modified opinions
6561    #[serde(default = "default_modified_opinion_rate")]
6562    pub modified_opinion_rate: f64,
6563
6564    /// Include emphasis of matter paragraphs
6565    #[serde(default)]
6566    pub include_emphasis_of_matter: bool,
6567
6568    /// Include going concern conclusions
6569    #[serde(default = "default_true")]
6570    pub include_going_concern: bool,
6571}
6572
6573fn default_kam_count() -> usize {
6574    3
6575}
6576
6577fn default_modified_opinion_rate() -> f64 {
6578    0.05
6579}
6580
6581impl Default for AuditOpinionConfig {
6582    fn default() -> Self {
6583        Self {
6584            enabled: false,
6585            generate_kam: true,
6586            average_kam_count: default_kam_count(),
6587            modified_opinion_rate: default_modified_opinion_rate(),
6588            include_emphasis_of_matter: false,
6589            include_going_concern: true,
6590        }
6591    }
6592}
6593
6594/// SOX compliance configuration (Sections 302/404).
6595#[derive(Debug, Clone, Serialize, Deserialize)]
6596pub struct SoxComplianceConfig {
6597    /// Enable SOX compliance generation
6598    #[serde(default)]
6599    pub enabled: bool,
6600
6601    /// Generate Section 302 CEO/CFO certifications
6602    #[serde(default = "default_true")]
6603    pub generate_302_certifications: bool,
6604
6605    /// Generate Section 404 ICFR assessments
6606    #[serde(default = "default_true")]
6607    pub generate_404_assessments: bool,
6608
6609    /// Materiality threshold for SOX testing
6610    #[serde(default = "default_sox_materiality_threshold")]
6611    pub materiality_threshold: f64,
6612
6613    /// Rate of material weaknesses
6614    #[serde(default = "default_material_weakness_rate")]
6615    pub material_weakness_rate: f64,
6616
6617    /// Rate of significant deficiencies
6618    #[serde(default = "default_significant_deficiency_rate")]
6619    pub significant_deficiency_rate: f64,
6620}
6621
6622fn default_material_weakness_rate() -> f64 {
6623    0.02
6624}
6625
6626fn default_significant_deficiency_rate() -> f64 {
6627    0.08
6628}
6629
6630impl Default for SoxComplianceConfig {
6631    fn default() -> Self {
6632        Self {
6633            enabled: false,
6634            generate_302_certifications: true,
6635            generate_404_assessments: true,
6636            materiality_threshold: default_sox_materiality_threshold(),
6637            material_weakness_rate: default_material_weakness_rate(),
6638            significant_deficiency_rate: default_significant_deficiency_rate(),
6639        }
6640    }
6641}
6642
6643/// PCAOB-specific configuration.
6644#[derive(Debug, Clone, Serialize, Deserialize)]
6645pub struct PcaobConfig {
6646    /// Enable PCAOB-specific elements
6647    #[serde(default)]
6648    pub enabled: bool,
6649
6650    /// Treat as PCAOB audit (vs ISA-only)
6651    #[serde(default)]
6652    pub is_pcaob_audit: bool,
6653
6654    /// Generate Critical Audit Matters (CAM)
6655    #[serde(default = "default_true")]
6656    pub generate_cam: bool,
6657
6658    /// Include ICFR opinion (for integrated audits)
6659    #[serde(default)]
6660    pub include_icfr_opinion: bool,
6661
6662    /// Generate PCAOB-ISA standard mappings
6663    #[serde(default)]
6664    pub generate_standard_mappings: bool,
6665}
6666
6667impl Default for PcaobConfig {
6668    fn default() -> Self {
6669        Self {
6670            enabled: false,
6671            is_pcaob_audit: false,
6672            generate_cam: true,
6673            include_icfr_opinion: false,
6674            generate_standard_mappings: false,
6675        }
6676    }
6677}
6678
6679// =============================================================================
6680// Advanced Distribution Configuration
6681// =============================================================================
6682
6683/// Advanced distribution configuration for realistic data generation.
6684///
6685/// This section enables sophisticated distribution models including:
6686/// - Mixture models (multi-modal distributions)
6687/// - Cross-field correlations
6688/// - Conditional distributions
6689/// - Regime changes and economic cycles
6690/// - Statistical validation
6691#[derive(Debug, Clone, Serialize, Deserialize, Default)]
6692pub struct AdvancedDistributionConfig {
6693    /// Enable advanced distribution features.
6694    #[serde(default)]
6695    pub enabled: bool,
6696
6697    /// Mixture model configuration for amounts.
6698    #[serde(default)]
6699    pub amounts: MixtureDistributionSchemaConfig,
6700
6701    /// Cross-field correlation configuration.
6702    #[serde(default)]
6703    pub correlations: CorrelationSchemaConfig,
6704
6705    /// Conditional distribution configurations.
6706    #[serde(default)]
6707    pub conditional: Vec<ConditionalDistributionSchemaConfig>,
6708
6709    /// Regime change configuration.
6710    #[serde(default)]
6711    pub regime_changes: RegimeChangeSchemaConfig,
6712
6713    /// Industry-specific distribution profile.
6714    ///
6715    /// Accepts either the legacy bare-name form (`industry_profile: retail`) or
6716    /// the SP3 extended struct form with optional `priors` sub-section.
6717    #[serde(default)]
6718    pub industry_profile: Option<IndustryProfileField>,
6719
6720    /// Statistical validation configuration.
6721    #[serde(default)]
6722    pub validation: StatisticalValidationSchemaConfig,
6723
6724    /// v3.4.4+ — Pareto heavy-tailed distribution for monetary amounts.
6725    /// When set and `enabled`, overrides `amounts` mixture model for the
6726    /// non-fraud amount-sampling path (fraud patterns remain orthogonal).
6727    /// Useful for capex, strategic contracts, and any domain where a small
6728    /// number of very large values dominates the tail.
6729    #[serde(default)]
6730    pub pareto: Option<ParetoSchemaConfig>,
6731}
6732
6733/// Schema-level Pareto distribution configuration (v3.4.4+).
6734///
6735/// Thin wrapper around `datasynth_core::distributions::ParetoConfig` that
6736/// adds an `enabled` gate and serde-friendly field names.
6737#[derive(Debug, Clone, Serialize, Deserialize)]
6738pub struct ParetoSchemaConfig {
6739    /// Enable Pareto sampling. When true, replaces the `amounts` mixture
6740    /// model for the non-fraud amount-sampling path.
6741    #[serde(default)]
6742    pub enabled: bool,
6743
6744    /// Shape parameter (tail heaviness). Lower values → heavier tail.
6745    /// Typical range: 1.5-3.0. Default: 2.0.
6746    #[serde(default = "default_pareto_alpha")]
6747    pub alpha: f64,
6748
6749    /// Scale / minimum value. All samples are >= x_min.
6750    /// Typical: 1000 (for capex) to 100,000 (for large contracts). Default: 100.
6751    #[serde(default = "default_pareto_x_min")]
6752    pub x_min: f64,
6753
6754    /// Optional upper clamp. `None` = unbounded (recommended for realistic
6755    /// heavy tails).
6756    #[serde(default)]
6757    pub max_value: Option<f64>,
6758
6759    /// Decimal places for rounding. Default: 2.
6760    #[serde(default = "default_pareto_decimal_places")]
6761    pub decimal_places: u8,
6762}
6763
6764fn default_pareto_alpha() -> f64 {
6765    2.0
6766}
6767
6768fn default_pareto_x_min() -> f64 {
6769    100.0
6770}
6771
6772fn default_pareto_decimal_places() -> u8 {
6773    2
6774}
6775
6776impl Default for ParetoSchemaConfig {
6777    fn default() -> Self {
6778        Self {
6779            enabled: false,
6780            alpha: default_pareto_alpha(),
6781            x_min: default_pareto_x_min(),
6782            max_value: None,
6783            decimal_places: default_pareto_decimal_places(),
6784        }
6785    }
6786}
6787
6788impl ParetoSchemaConfig {
6789    /// Convert this schema config into a `datasynth_core::distributions::ParetoConfig`.
6790    pub fn to_core_config(&self) -> datasynth_core::distributions::ParetoConfig {
6791        datasynth_core::distributions::ParetoConfig {
6792            alpha: self.alpha,
6793            x_min: self.x_min,
6794            max_value: self.max_value,
6795            decimal_places: self.decimal_places,
6796        }
6797    }
6798}
6799
6800/// Industry profile types for pre-configured distribution settings.
6801#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
6802#[serde(rename_all = "snake_case")]
6803pub enum IndustryProfileType {
6804    /// Retail industry profile (POS sales, inventory, seasonal)
6805    Retail,
6806    /// Manufacturing industry profile (raw materials, maintenance, capital)
6807    Manufacturing,
6808    /// Financial services profile (wire transfers, ACH, fee income)
6809    FinancialServices,
6810    /// Healthcare profile (claims, procedures, supplies)
6811    Healthcare,
6812    /// Technology profile (subscriptions, services, R&D)
6813    Technology,
6814}
6815
6816impl IndustryProfileType {
6817    /// Return the lowercase ASCII slug used for bundled-priors filenames.
6818    ///
6819    /// E.g. `IndustryProfileType::FinancialServices => "financial_services"`.
6820    pub fn slug(self) -> &'static str {
6821        match self {
6822            Self::Retail => "retail",
6823            Self::Manufacturing => "manufacturing",
6824            Self::FinancialServices => "financial_services",
6825            // Matches SP2's bundle naming (corpus uses "Health", not "Healthcare").
6826            Self::Healthcare => "health",
6827            Self::Technology => "technology",
6828        }
6829    }
6830}
6831
6832// ---------------------------------------------------------------------------
6833// SP3 — IndustryProfileField: backward-compatible wrapper
6834// ---------------------------------------------------------------------------
6835
6836/// The value of `distributions.industry_profile` in config YAML.
6837///
6838/// Accepts both the legacy bare-name form:
6839/// ```yaml
6840/// distributions:
6841///   industry_profile: retail
6842/// ```
6843/// and the new SP3 extended struct form with optional `priors` sub-section:
6844/// ```yaml
6845/// distributions:
6846///   industry_profile:
6847///     name: retail
6848///     priors:
6849///       enabled: true
6850///       source: bundled
6851/// ```
6852#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6853#[serde(untagged)]
6854pub enum IndustryProfileField {
6855    /// Legacy form: `industry_profile: retail`.
6856    Name(IndustryProfileType),
6857    /// New form: `industry_profile: { name: retail, priors: { ... } }`.
6858    Full(IndustryProfileFull),
6859}
6860
6861impl IndustryProfileField {
6862    /// Return the bare `IndustryProfileType` regardless of which form was used.
6863    pub fn profile_type(&self) -> IndustryProfileType {
6864        match self {
6865            IndustryProfileField::Name(t) => *t,
6866            IndustryProfileField::Full(f) => f.name,
6867        }
6868    }
6869
6870    /// Return the optional `priors` sub-section, if present.
6871    pub fn priors(&self) -> Option<&IndustryPriorsConfig> {
6872        match self {
6873            IndustryProfileField::Name(_) => None,
6874            IndustryProfileField::Full(f) => f.priors.as_ref(),
6875        }
6876    }
6877}
6878
6879/// Extended industry profile struct used when `priors` is needed (SP3).
6880#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6881pub struct IndustryProfileFull {
6882    /// The industry variant (same values as the bare-name legacy form).
6883    pub name: IndustryProfileType,
6884    /// Optional SP3 priors sub-section.
6885    #[serde(default, skip_serializing_if = "Option::is_none")]
6886    pub priors: Option<IndustryPriorsConfig>,
6887}
6888
6889/// SP3 — configuration for industry-prior injection.
6890///
6891/// When `enabled = true`, the generator uses pre-baked statistical priors
6892/// for the given industry. `source` selects whether to use bundled priors or
6893/// load from a user-supplied file (requires `path`).
6894#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
6895pub struct IndustryPriorsConfig {
6896    /// Enable prior injection. When false the rest of the struct is ignored.
6897    #[serde(default)]
6898    pub enabled: bool,
6899
6900    /// Where to load the priors from.
6901    #[serde(default)]
6902    pub source: PriorsSource,
6903
6904    /// Path to the priors file. Required when `source = file`.
6905    #[serde(default, skip_serializing_if = "Option::is_none")]
6906    pub path: Option<std::path::PathBuf>,
6907
6908    /// SP3.4 — enable online velocity-rule calibrator. Adds per-line overhead
6909    /// when `true`; default `false` keeps v5.12/v5.13-without-calibration behavior.
6910    #[serde(default)]
6911    pub velocity_calibration: bool,
6912}
6913
6914/// Source of industry priors.
6915#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)]
6916#[serde(rename_all = "lowercase")]
6917pub enum PriorsSource {
6918    /// Use the priors bundled with the binary (default).
6919    #[default]
6920    Bundled,
6921    /// Load priors from a user-supplied file (requires `path`).
6922    File,
6923}
6924
6925/// Mixture model distribution configuration.
6926#[derive(Debug, Clone, Serialize, Deserialize)]
6927pub struct MixtureDistributionSchemaConfig {
6928    /// Enable mixture model for amount generation.
6929    #[serde(default)]
6930    pub enabled: bool,
6931
6932    /// Distribution type: "gaussian" or "lognormal".
6933    #[serde(default = "default_mixture_type")]
6934    pub distribution_type: MixtureDistributionType,
6935
6936    /// Mixture components with weights.
6937    #[serde(default)]
6938    pub components: Vec<MixtureComponentConfig>,
6939
6940    /// Minimum value constraint.
6941    #[serde(default = "default_min_amount")]
6942    pub min_value: f64,
6943
6944    /// Maximum value constraint (optional).
6945    #[serde(default)]
6946    pub max_value: Option<f64>,
6947
6948    /// Decimal places for rounding.
6949    #[serde(default = "default_decimal_places")]
6950    pub decimal_places: u8,
6951}
6952
6953fn default_mixture_type() -> MixtureDistributionType {
6954    MixtureDistributionType::LogNormal
6955}
6956
6957fn default_min_amount() -> f64 {
6958    0.01
6959}
6960
6961fn default_decimal_places() -> u8 {
6962    2
6963}
6964
6965impl Default for MixtureDistributionSchemaConfig {
6966    fn default() -> Self {
6967        Self {
6968            enabled: false,
6969            distribution_type: MixtureDistributionType::LogNormal,
6970            components: Vec::new(),
6971            min_value: 0.01,
6972            max_value: None,
6973            decimal_places: 2,
6974        }
6975    }
6976}
6977
6978impl MixtureDistributionSchemaConfig {
6979    /// Convert this schema-level config into a `LogNormalMixtureConfig`
6980    /// suitable for `LogNormalMixtureSampler::new`. Returns `None` if there
6981    /// are no components (schema default is an empty list, which cannot
6982    /// drive a sampler).
6983    ///
6984    /// Callers should gate this with `self.enabled` before invoking.
6985    pub fn to_log_normal_config(
6986        &self,
6987    ) -> Option<datasynth_core::distributions::LogNormalMixtureConfig> {
6988        if self.components.is_empty() {
6989            return None;
6990        }
6991        Some(datasynth_core::distributions::LogNormalMixtureConfig {
6992            components: self
6993                .components
6994                .iter()
6995                .map(|c| match &c.label {
6996                    Some(lbl) => datasynth_core::distributions::LogNormalComponent::with_label(
6997                        c.weight,
6998                        c.mu,
6999                        c.sigma,
7000                        lbl.clone(),
7001                    ),
7002                    None => datasynth_core::distributions::LogNormalComponent::new(
7003                        c.weight, c.mu, c.sigma,
7004                    ),
7005                })
7006                .collect(),
7007            min_value: self.min_value,
7008            max_value: self.max_value,
7009            decimal_places: self.decimal_places,
7010        })
7011    }
7012
7013    /// Convert this schema-level config into a `GaussianMixtureConfig`.
7014    /// Returns `None` if there are no components.
7015    pub fn to_gaussian_config(
7016        &self,
7017    ) -> Option<datasynth_core::distributions::GaussianMixtureConfig> {
7018        if self.components.is_empty() {
7019            return None;
7020        }
7021        Some(datasynth_core::distributions::GaussianMixtureConfig {
7022            components: self
7023                .components
7024                .iter()
7025                .map(|c| {
7026                    datasynth_core::distributions::GaussianComponent::new(c.weight, c.mu, c.sigma)
7027                })
7028                .collect(),
7029            allow_negative: true,
7030            min_value: Some(self.min_value),
7031            max_value: self.max_value,
7032        })
7033    }
7034}
7035
7036/// Mixture distribution type.
7037#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7038#[serde(rename_all = "snake_case")]
7039pub enum MixtureDistributionType {
7040    /// Gaussian (normal) mixture
7041    Gaussian,
7042    /// Log-normal mixture (for positive amounts)
7043    #[default]
7044    LogNormal,
7045}
7046
7047/// Configuration for a single mixture component.
7048#[derive(Debug, Clone, Serialize, Deserialize)]
7049pub struct MixtureComponentConfig {
7050    /// Weight of this component (must sum to 1.0 across all components).
7051    pub weight: f64,
7052
7053    /// Location parameter (mean for Gaussian, mu for log-normal).
7054    pub mu: f64,
7055
7056    /// Scale parameter (std dev for Gaussian, sigma for log-normal).
7057    pub sigma: f64,
7058
7059    /// Optional label for this component (e.g., "routine", "significant", "major").
7060    #[serde(default)]
7061    pub label: Option<String>,
7062}
7063
7064/// Cross-field correlation configuration.
7065#[derive(Debug, Clone, Serialize, Deserialize)]
7066pub struct CorrelationSchemaConfig {
7067    /// Enable correlation modeling.
7068    #[serde(default)]
7069    pub enabled: bool,
7070
7071    /// Copula type for dependency modeling.
7072    #[serde(default)]
7073    pub copula_type: CopulaSchemaType,
7074
7075    /// Field definitions for correlation.
7076    #[serde(default)]
7077    pub fields: Vec<CorrelatedFieldConfig>,
7078
7079    /// Correlation matrix (upper triangular, row-major).
7080    /// For n fields, this should have n*(n-1)/2 values.
7081    #[serde(default)]
7082    pub matrix: Vec<f64>,
7083
7084    /// Expected correlations for validation.
7085    #[serde(default)]
7086    pub expected_correlations: Vec<ExpectedCorrelationConfig>,
7087}
7088
7089impl Default for CorrelationSchemaConfig {
7090    fn default() -> Self {
7091        Self {
7092            enabled: false,
7093            copula_type: CopulaSchemaType::Gaussian,
7094            fields: Vec::new(),
7095            matrix: Vec::new(),
7096            expected_correlations: Vec::new(),
7097        }
7098    }
7099}
7100
7101impl CorrelationSchemaConfig {
7102    /// v3.5.4+: extract the correlation for a specific field pair from
7103    /// either the upper-triangular flat matrix (n*(n-1)/2 values) or a
7104    /// full symmetric n×n matrix (n*n values). Returns `None` when the
7105    /// named fields aren't both present or the matrix shape doesn't
7106    /// match.
7107    pub fn correlation_between(&self, field_a: &str, field_b: &str) -> Option<f64> {
7108        let idx_a = self.fields.iter().position(|f| f.name == field_a)?;
7109        let idx_b = self.fields.iter().position(|f| f.name == field_b)?;
7110        if idx_a == idx_b {
7111            return Some(1.0);
7112        }
7113        let (i, j) = if idx_a < idx_b {
7114            (idx_a, idx_b)
7115        } else {
7116            (idx_b, idx_a)
7117        };
7118        let n = self.fields.len();
7119        // Full n×n symmetric matrix?
7120        if self.matrix.len() == n * n {
7121            return self.matrix.get(idx_a * n + idx_b).copied();
7122        }
7123        // Upper triangular flat (row-major, excluding diagonal)?
7124        let expected_tri = n * (n - 1) / 2;
7125        if self.matrix.len() == expected_tri {
7126            // Row i, col j where j > i: flat index is
7127            //   sum_{k=0..i}((n-1-k)) + (j - i - 1)
7128            // = i*(n-1) - i*(i-1)/2 + (j - i - 1)
7129            let flat = i * (n - 1) - i * (i.saturating_sub(1)) / 2 + (j - i - 1);
7130            return self.matrix.get(flat).copied();
7131        }
7132        None
7133    }
7134
7135    /// Convert this schema config to a core `CopulaConfig` when the
7136    /// declared field pair `(field_a, field_b)` has a valid correlation
7137    /// entry. Returns `None` when disabled, fields missing, or matrix
7138    /// malformed.
7139    pub fn to_core_config_for_pair(
7140        &self,
7141        field_a: &str,
7142        field_b: &str,
7143    ) -> Option<datasynth_core::distributions::CopulaConfig> {
7144        if !self.enabled {
7145            return None;
7146        }
7147        let rho = self.correlation_between(field_a, field_b)?;
7148        use datasynth_core::distributions::{CopulaConfig, CopulaType};
7149        let copula_type = match self.copula_type {
7150            CopulaSchemaType::Gaussian => CopulaType::Gaussian,
7151            CopulaSchemaType::Clayton => CopulaType::Clayton,
7152            CopulaSchemaType::Gumbel => CopulaType::Gumbel,
7153            CopulaSchemaType::Frank => CopulaType::Frank,
7154            CopulaSchemaType::StudentT => CopulaType::StudentT,
7155        };
7156        // Gaussian / StudentT interpret theta as correlation; others
7157        // as a shape parameter. Minimal v3.5.4 only wires Gaussian in
7158        // the runtime, but the converter is general so follow-ups can
7159        // light up the other copulas.
7160        let theta = rho.clamp(-0.999, 0.999);
7161        Some(CopulaConfig {
7162            copula_type,
7163            theta,
7164            degrees_of_freedom: 4.0,
7165        })
7166    }
7167}
7168
7169/// Copula type for dependency modeling.
7170#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7171#[serde(rename_all = "snake_case")]
7172pub enum CopulaSchemaType {
7173    /// Gaussian copula (symmetric, no tail dependence)
7174    #[default]
7175    Gaussian,
7176    /// Clayton copula (lower tail dependence)
7177    Clayton,
7178    /// Gumbel copula (upper tail dependence)
7179    Gumbel,
7180    /// Frank copula (symmetric, no tail dependence)
7181    Frank,
7182    /// Student-t copula (both tail dependencies)
7183    StudentT,
7184}
7185
7186/// Configuration for a correlated field.
7187#[derive(Debug, Clone, Serialize, Deserialize)]
7188pub struct CorrelatedFieldConfig {
7189    /// Field name.
7190    pub name: String,
7191
7192    /// Marginal distribution type.
7193    #[serde(default)]
7194    pub distribution: MarginalDistributionConfig,
7195}
7196
7197/// Marginal distribution configuration.
7198#[derive(Debug, Clone, Serialize, Deserialize)]
7199#[serde(tag = "type", rename_all = "snake_case")]
7200pub enum MarginalDistributionConfig {
7201    /// Normal distribution.
7202    Normal {
7203        /// Mean
7204        mu: f64,
7205        /// Standard deviation
7206        sigma: f64,
7207    },
7208    /// Log-normal distribution.
7209    LogNormal {
7210        /// Location parameter
7211        mu: f64,
7212        /// Scale parameter
7213        sigma: f64,
7214    },
7215    /// Uniform distribution.
7216    Uniform {
7217        /// Minimum value
7218        min: f64,
7219        /// Maximum value
7220        max: f64,
7221    },
7222    /// Discrete uniform distribution.
7223    DiscreteUniform {
7224        /// Minimum integer value
7225        min: i32,
7226        /// Maximum integer value
7227        max: i32,
7228    },
7229}
7230
7231impl Default for MarginalDistributionConfig {
7232    fn default() -> Self {
7233        Self::Normal {
7234            mu: 0.0,
7235            sigma: 1.0,
7236        }
7237    }
7238}
7239
7240/// Expected correlation for validation.
7241#[derive(Debug, Clone, Serialize, Deserialize)]
7242pub struct ExpectedCorrelationConfig {
7243    /// First field name.
7244    pub field1: String,
7245    /// Second field name.
7246    pub field2: String,
7247    /// Expected correlation coefficient.
7248    pub expected_r: f64,
7249    /// Acceptable tolerance.
7250    #[serde(default = "default_correlation_tolerance")]
7251    pub tolerance: f64,
7252}
7253
7254fn default_correlation_tolerance() -> f64 {
7255    0.10
7256}
7257
7258/// Conditional distribution configuration.
7259#[derive(Debug, Clone, Serialize, Deserialize)]
7260pub struct ConditionalDistributionSchemaConfig {
7261    /// Output field name to generate.
7262    pub output_field: String,
7263
7264    /// Input field name that conditions the distribution.
7265    pub input_field: String,
7266
7267    /// Breakpoints defining distribution changes.
7268    #[serde(default)]
7269    pub breakpoints: Vec<ConditionalBreakpointConfig>,
7270
7271    /// Default distribution when below all breakpoints.
7272    #[serde(default)]
7273    pub default_distribution: ConditionalDistributionParamsConfig,
7274
7275    /// Minimum output value constraint.
7276    #[serde(default)]
7277    pub min_value: Option<f64>,
7278
7279    /// Maximum output value constraint.
7280    #[serde(default)]
7281    pub max_value: Option<f64>,
7282
7283    /// Decimal places for output rounding.
7284    #[serde(default = "default_decimal_places")]
7285    pub decimal_places: u8,
7286}
7287
7288/// Breakpoint for conditional distribution.
7289#[derive(Debug, Clone, Serialize, Deserialize)]
7290pub struct ConditionalBreakpointConfig {
7291    /// Input value threshold.
7292    pub threshold: f64,
7293
7294    /// Distribution to use when input >= threshold.
7295    pub distribution: ConditionalDistributionParamsConfig,
7296}
7297
7298impl ConditionalDistributionSchemaConfig {
7299    /// Convert this schema config into a core
7300    /// `ConditionalDistributionConfig` suitable for
7301    /// `ConditionalSampler::new`. v3.5.3+.
7302    pub fn to_core_config(&self) -> datasynth_core::distributions::ConditionalDistributionConfig {
7303        use datasynth_core::distributions::{
7304            Breakpoint, ConditionalDistributionConfig, ConditionalDistributionParams,
7305        };
7306
7307        let default_distribution = convert_conditional_params(&self.default_distribution);
7308        let breakpoints: Vec<Breakpoint> = self
7309            .breakpoints
7310            .iter()
7311            .map(|bp| Breakpoint {
7312                threshold: bp.threshold,
7313                distribution: convert_conditional_params(&bp.distribution),
7314            })
7315            .collect();
7316
7317        // Use a sentinel default_distribution when the schema default is
7318        // its factory default (Fixed { value: 0.0 })  and we have
7319        // breakpoints — we don't want to clobber data for values below
7320        // the first breakpoint.
7321        let final_default = if breakpoints.is_empty() {
7322            default_distribution
7323        } else {
7324            match default_distribution {
7325                ConditionalDistributionParams::Fixed { value: 0.0 } => {
7326                    // Reuse the first breakpoint's distribution as the
7327                    // default to avoid surprising zeros.
7328                    breakpoints[0].distribution.clone()
7329                }
7330                other => other,
7331            }
7332        };
7333
7334        ConditionalDistributionConfig {
7335            output_field: self.output_field.clone(),
7336            input_field: self.input_field.clone(),
7337            breakpoints,
7338            default_distribution: final_default,
7339            min_value: self.min_value,
7340            max_value: self.max_value,
7341            decimal_places: self.decimal_places,
7342        }
7343    }
7344}
7345
7346fn convert_conditional_params(
7347    p: &ConditionalDistributionParamsConfig,
7348) -> datasynth_core::distributions::ConditionalDistributionParams {
7349    use datasynth_core::distributions::ConditionalDistributionParams as Core;
7350    match p {
7351        ConditionalDistributionParamsConfig::Fixed { value } => Core::Fixed { value: *value },
7352        ConditionalDistributionParamsConfig::Normal { mu, sigma } => Core::Normal {
7353            mu: *mu,
7354            sigma: *sigma,
7355        },
7356        ConditionalDistributionParamsConfig::LogNormal { mu, sigma } => Core::LogNormal {
7357            mu: *mu,
7358            sigma: *sigma,
7359        },
7360        ConditionalDistributionParamsConfig::Uniform { min, max } => Core::Uniform {
7361            min: *min,
7362            max: *max,
7363        },
7364        ConditionalDistributionParamsConfig::Beta {
7365            alpha,
7366            beta,
7367            min,
7368            max,
7369        } => Core::Beta {
7370            alpha: *alpha,
7371            beta: *beta,
7372            min: *min,
7373            max: *max,
7374        },
7375        ConditionalDistributionParamsConfig::Discrete { values, weights } => Core::Discrete {
7376            values: values.clone(),
7377            weights: weights.clone(),
7378        },
7379    }
7380}
7381
7382/// Distribution parameters for conditional distributions.
7383#[derive(Debug, Clone, Serialize, Deserialize)]
7384#[serde(tag = "type", rename_all = "snake_case")]
7385pub enum ConditionalDistributionParamsConfig {
7386    /// Fixed value.
7387    Fixed {
7388        /// The fixed value
7389        value: f64,
7390    },
7391    /// Normal distribution.
7392    Normal {
7393        /// Mean
7394        mu: f64,
7395        /// Standard deviation
7396        sigma: f64,
7397    },
7398    /// Log-normal distribution.
7399    LogNormal {
7400        /// Location parameter
7401        mu: f64,
7402        /// Scale parameter
7403        sigma: f64,
7404    },
7405    /// Uniform distribution.
7406    Uniform {
7407        /// Minimum
7408        min: f64,
7409        /// Maximum
7410        max: f64,
7411    },
7412    /// Beta distribution (scaled).
7413    Beta {
7414        /// Alpha parameter
7415        alpha: f64,
7416        /// Beta parameter
7417        beta: f64,
7418        /// Minimum output value
7419        min: f64,
7420        /// Maximum output value
7421        max: f64,
7422    },
7423    /// Discrete values with weights.
7424    Discrete {
7425        /// Possible values
7426        values: Vec<f64>,
7427        /// Weights (should sum to 1.0)
7428        weights: Vec<f64>,
7429    },
7430}
7431
7432impl Default for ConditionalDistributionParamsConfig {
7433    fn default() -> Self {
7434        Self::Normal {
7435            mu: 0.0,
7436            sigma: 1.0,
7437        }
7438    }
7439}
7440
7441/// Regime change configuration.
7442#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7443pub struct RegimeChangeSchemaConfig {
7444    /// Enable regime change modeling.
7445    #[serde(default)]
7446    pub enabled: bool,
7447
7448    /// List of regime changes.
7449    #[serde(default)]
7450    pub changes: Vec<RegimeChangeEventConfig>,
7451
7452    /// Economic cycle configuration.
7453    #[serde(default)]
7454    pub economic_cycle: Option<EconomicCycleSchemaConfig>,
7455
7456    /// Parameter drift configurations.
7457    #[serde(default)]
7458    pub parameter_drifts: Vec<ParameterDriftSchemaConfig>,
7459}
7460
7461/// A single regime change event.
7462#[derive(Debug, Clone, Serialize, Deserialize)]
7463pub struct RegimeChangeEventConfig {
7464    /// Date when the change occurs (ISO 8601 format).
7465    pub date: String,
7466
7467    /// Type of regime change.
7468    pub change_type: RegimeChangeTypeConfig,
7469
7470    /// Description of the change.
7471    #[serde(default)]
7472    pub description: Option<String>,
7473
7474    /// Effects of this regime change.
7475    #[serde(default)]
7476    pub effects: Vec<RegimeEffectConfig>,
7477}
7478
7479/// Type of regime change.
7480#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
7481#[serde(rename_all = "snake_case")]
7482pub enum RegimeChangeTypeConfig {
7483    /// Acquisition - sudden volume and amount increase
7484    Acquisition,
7485    /// Divestiture - sudden volume and amount decrease
7486    Divestiture,
7487    /// Price increase - amounts increase
7488    PriceIncrease,
7489    /// Price decrease - amounts decrease
7490    PriceDecrease,
7491    /// New product launch - volume ramp-up
7492    ProductLaunch,
7493    /// Product discontinuation - volume ramp-down
7494    ProductDiscontinuation,
7495    /// Policy change - affects patterns
7496    PolicyChange,
7497    /// Competitor entry - market disruption
7498    CompetitorEntry,
7499    /// Custom effect
7500    Custom,
7501}
7502
7503/// Effect of a regime change on a specific field.
7504#[derive(Debug, Clone, Serialize, Deserialize)]
7505pub struct RegimeEffectConfig {
7506    /// Field being affected.
7507    pub field: String,
7508
7509    /// Multiplier to apply (1.0 = no change, 1.5 = 50% increase).
7510    pub multiplier: f64,
7511}
7512
7513/// Economic cycle configuration.
7514#[derive(Debug, Clone, Serialize, Deserialize)]
7515pub struct EconomicCycleSchemaConfig {
7516    /// Enable economic cycle modeling.
7517    #[serde(default)]
7518    pub enabled: bool,
7519
7520    /// Cycle period in months (e.g., 48 for 4-year business cycle).
7521    #[serde(default = "default_cycle_period")]
7522    pub period_months: u32,
7523
7524    /// Amplitude of cycle effect (0.0-1.0).
7525    #[serde(default = "default_cycle_amplitude")]
7526    pub amplitude: f64,
7527
7528    /// Phase offset in months.
7529    #[serde(default)]
7530    pub phase_offset: u32,
7531
7532    /// Recession periods (start_month, duration_months).
7533    #[serde(default)]
7534    pub recessions: Vec<RecessionPeriodConfig>,
7535}
7536
7537fn default_cycle_period() -> u32 {
7538    48
7539}
7540
7541fn default_cycle_amplitude() -> f64 {
7542    0.15
7543}
7544
7545impl Default for EconomicCycleSchemaConfig {
7546    fn default() -> Self {
7547        Self {
7548            enabled: false,
7549            period_months: 48,
7550            amplitude: 0.15,
7551            phase_offset: 0,
7552            recessions: Vec::new(),
7553        }
7554    }
7555}
7556
7557/// Recession period configuration.
7558#[derive(Debug, Clone, Serialize, Deserialize)]
7559pub struct RecessionPeriodConfig {
7560    /// Start month (0-indexed from generation start).
7561    pub start_month: u32,
7562
7563    /// Duration in months.
7564    pub duration_months: u32,
7565
7566    /// Severity (0.0-1.0, affects volume reduction).
7567    #[serde(default = "default_recession_severity")]
7568    pub severity: f64,
7569}
7570
7571impl RegimeChangeSchemaConfig {
7572    /// Populate the regime-change, economic-cycle, and parameter-drift
7573    /// slots on a `DriftConfig` from this schema config. v3.5.2+.
7574    ///
7575    /// `generation_start` must match `config.global.start_date` so that
7576    /// absolute regime-change dates can be mapped to 0-indexed periods.
7577    /// Unparseable / out-of-range dates are silently skipped to keep
7578    /// runtime robust against user typos.
7579    pub fn apply_to(
7580        &self,
7581        drift: &mut datasynth_core::distributions::DriftConfig,
7582        generation_start: chrono::NaiveDate,
7583    ) {
7584        if !self.enabled {
7585            return;
7586        }
7587
7588        // Enable drift if any regime-change feature wants it.
7589        drift.enabled = true;
7590
7591        // Regime-change events (absolute dates → period offsets).
7592        for event in &self.changes {
7593            let period = match chrono::NaiveDate::parse_from_str(&event.date, "%Y-%m-%d") {
7594                Ok(d) => {
7595                    let days = (d - generation_start).num_days();
7596                    if days < 0 {
7597                        continue;
7598                    }
7599                    // Approximate month by dividing by 30.4 so we don't
7600                    // need chrono::Months arithmetic.
7601                    (days as f64 / 30.4).round() as u32
7602                }
7603                Err(_) => continue,
7604            };
7605            let change_type = convert_regime_change_type(event.change_type);
7606            let core_effects = event
7607                .effects
7608                .iter()
7609                .map(|e| datasynth_core::distributions::RegimeEffect {
7610                    field: e.field.clone(),
7611                    multiplier: e.multiplier,
7612                })
7613                .collect();
7614            drift
7615                .regime_changes
7616                .push(datasynth_core::distributions::RegimeChange {
7617                    period,
7618                    change_type,
7619                    description: event.description.clone(),
7620                    effects: core_effects,
7621                    transition_periods: 0,
7622                });
7623        }
7624
7625        // Economic cycle.
7626        if let Some(ec) = &self.economic_cycle {
7627            if ec.enabled {
7628                let recession_periods: Vec<u32> = ec
7629                    .recessions
7630                    .iter()
7631                    .flat_map(|r| r.start_month..r.start_month + r.duration_months)
7632                    .collect();
7633                // Use the most-severe recession as the severity driver;
7634                // fall back to default when none declared.
7635                let severity = ec
7636                    .recessions
7637                    .iter()
7638                    .map(|r| 1.0 - r.severity)
7639                    .fold(0.75f64, f64::min);
7640                drift.economic_cycle = datasynth_core::distributions::EconomicCycleConfig {
7641                    enabled: true,
7642                    cycle_length: ec.period_months,
7643                    amplitude: ec.amplitude,
7644                    phase_offset: ec.phase_offset,
7645                    recession_periods,
7646                    recession_severity: severity,
7647                };
7648                drift.drift_type = datasynth_core::distributions::DriftType::Mixed;
7649            }
7650        }
7651
7652        // Parameter drifts.
7653        for pd in &self.parameter_drifts {
7654            let drift_type = match pd.drift_type {
7655                ParameterDriftTypeConfig::Linear => {
7656                    datasynth_core::distributions::ParameterDriftType::Linear
7657                }
7658                ParameterDriftTypeConfig::Exponential => {
7659                    datasynth_core::distributions::ParameterDriftType::Exponential
7660                }
7661                ParameterDriftTypeConfig::Logistic => {
7662                    datasynth_core::distributions::ParameterDriftType::Logistic
7663                }
7664                ParameterDriftTypeConfig::Step => {
7665                    datasynth_core::distributions::ParameterDriftType::Step
7666                }
7667            };
7668            drift
7669                .parameter_drifts
7670                .push(datasynth_core::distributions::ParameterDrift {
7671                    parameter: pd.parameter.clone(),
7672                    drift_type,
7673                    initial_value: pd.start_value,
7674                    target_or_rate: pd.end_value,
7675                    start_period: pd.start_period,
7676                    end_period: pd.end_period,
7677                    steepness: 1.0,
7678                });
7679        }
7680    }
7681}
7682
7683fn convert_regime_change_type(
7684    t: RegimeChangeTypeConfig,
7685) -> datasynth_core::distributions::RegimeChangeType {
7686    use datasynth_core::distributions::RegimeChangeType as Core;
7687    match t {
7688        RegimeChangeTypeConfig::Acquisition => Core::Acquisition,
7689        RegimeChangeTypeConfig::Divestiture => Core::Divestiture,
7690        RegimeChangeTypeConfig::PriceIncrease => Core::PriceIncrease,
7691        RegimeChangeTypeConfig::PriceDecrease => Core::PriceDecrease,
7692        RegimeChangeTypeConfig::ProductLaunch => Core::ProductLaunch,
7693        RegimeChangeTypeConfig::ProductDiscontinuation => Core::ProductDiscontinuation,
7694        RegimeChangeTypeConfig::PolicyChange => Core::PolicyChange,
7695        RegimeChangeTypeConfig::CompetitorEntry => Core::CompetitorEntry,
7696        RegimeChangeTypeConfig::Custom => Core::Custom,
7697    }
7698}
7699
7700fn default_recession_severity() -> f64 {
7701    0.20
7702}
7703
7704/// Parameter drift configuration.
7705#[derive(Debug, Clone, Serialize, Deserialize)]
7706pub struct ParameterDriftSchemaConfig {
7707    /// Parameter being drifted.
7708    pub parameter: String,
7709
7710    /// Drift type.
7711    pub drift_type: ParameterDriftTypeConfig,
7712
7713    /// Start value.
7714    pub start_value: f64,
7715
7716    /// End value.
7717    pub end_value: f64,
7718
7719    /// Start period (month, 0-indexed).
7720    #[serde(default)]
7721    pub start_period: u32,
7722
7723    /// End period (month, optional - defaults to end of generation).
7724    #[serde(default)]
7725    pub end_period: Option<u32>,
7726}
7727
7728/// Parameter drift type.
7729#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7730#[serde(rename_all = "snake_case")]
7731pub enum ParameterDriftTypeConfig {
7732    /// Linear interpolation
7733    #[default]
7734    Linear,
7735    /// Exponential growth/decay
7736    Exponential,
7737    /// S-curve (logistic)
7738    Logistic,
7739    /// Step function
7740    Step,
7741}
7742
7743/// Statistical validation configuration.
7744#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7745pub struct StatisticalValidationSchemaConfig {
7746    /// Enable statistical validation.
7747    #[serde(default)]
7748    pub enabled: bool,
7749
7750    /// Statistical tests to run.
7751    #[serde(default)]
7752    pub tests: Vec<StatisticalTestConfig>,
7753
7754    /// Validation reporting configuration.
7755    #[serde(default)]
7756    pub reporting: ValidationReportingConfig,
7757}
7758
7759/// Statistical test configuration.
7760#[derive(Debug, Clone, Serialize, Deserialize)]
7761#[serde(tag = "type", rename_all = "snake_case")]
7762pub enum StatisticalTestConfig {
7763    /// Benford's Law first digit test.
7764    BenfordFirstDigit {
7765        /// Threshold MAD for failure.
7766        #[serde(default = "default_benford_threshold")]
7767        threshold_mad: f64,
7768        /// Warning MAD threshold.
7769        #[serde(default = "default_benford_warning")]
7770        warning_mad: f64,
7771    },
7772    /// Distribution fit test.
7773    DistributionFit {
7774        /// Target distribution to test.
7775        target: TargetDistributionConfig,
7776        /// K-S test significance level.
7777        #[serde(default = "default_ks_significance")]
7778        ks_significance: f64,
7779        /// Test method (ks, anderson_darling, chi_squared).
7780        #[serde(default)]
7781        method: DistributionFitMethod,
7782    },
7783    /// Correlation check.
7784    CorrelationCheck {
7785        /// Expected correlations to validate.
7786        expected_correlations: Vec<ExpectedCorrelationConfig>,
7787    },
7788    /// Chi-squared test.
7789    ChiSquared {
7790        /// Number of bins.
7791        #[serde(default = "default_chi_squared_bins")]
7792        bins: usize,
7793        /// Significance level.
7794        #[serde(default = "default_chi_squared_significance")]
7795        significance: f64,
7796    },
7797    /// Anderson-Darling test.
7798    AndersonDarling {
7799        /// Target distribution.
7800        target: TargetDistributionConfig,
7801        /// Significance level.
7802        #[serde(default = "default_ad_significance")]
7803        significance: f64,
7804    },
7805}
7806
7807fn default_benford_threshold() -> f64 {
7808    0.015
7809}
7810
7811fn default_benford_warning() -> f64 {
7812    0.010
7813}
7814
7815fn default_ks_significance() -> f64 {
7816    0.05
7817}
7818
7819fn default_chi_squared_bins() -> usize {
7820    10
7821}
7822
7823fn default_chi_squared_significance() -> f64 {
7824    0.05
7825}
7826
7827fn default_ad_significance() -> f64 {
7828    0.05
7829}
7830
7831/// Target distribution for fit tests.
7832#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7833#[serde(rename_all = "snake_case")]
7834pub enum TargetDistributionConfig {
7835    /// Normal distribution
7836    Normal,
7837    /// Log-normal distribution
7838    #[default]
7839    LogNormal,
7840    /// Exponential distribution
7841    Exponential,
7842    /// Uniform distribution
7843    Uniform,
7844}
7845
7846/// Distribution fit test method.
7847#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7848#[serde(rename_all = "snake_case")]
7849pub enum DistributionFitMethod {
7850    /// Kolmogorov-Smirnov test
7851    #[default]
7852    KolmogorovSmirnov,
7853    /// Anderson-Darling test
7854    AndersonDarling,
7855    /// Chi-squared test
7856    ChiSquared,
7857}
7858
7859/// Validation reporting configuration.
7860#[derive(Debug, Clone, Serialize, Deserialize)]
7861pub struct ValidationReportingConfig {
7862    /// Output validation report to file.
7863    #[serde(default)]
7864    pub output_report: bool,
7865
7866    /// Report format.
7867    #[serde(default)]
7868    pub format: ValidationReportFormat,
7869
7870    /// Fail generation if validation fails.
7871    #[serde(default)]
7872    pub fail_on_error: bool,
7873
7874    /// Include detailed statistics in report.
7875    #[serde(default = "default_true")]
7876    pub include_details: bool,
7877}
7878
7879impl Default for ValidationReportingConfig {
7880    fn default() -> Self {
7881        Self {
7882            output_report: false,
7883            format: ValidationReportFormat::Json,
7884            fail_on_error: false,
7885            include_details: true,
7886        }
7887    }
7888}
7889
7890/// Validation report format.
7891#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7892#[serde(rename_all = "snake_case")]
7893pub enum ValidationReportFormat {
7894    /// JSON format
7895    #[default]
7896    Json,
7897    /// YAML format
7898    Yaml,
7899    /// HTML report
7900    Html,
7901}
7902
7903// =============================================================================
7904// Temporal Patterns Configuration
7905// =============================================================================
7906
7907/// Temporal patterns configuration for business days, period-end dynamics, and processing lags.
7908///
7909/// This section enables sophisticated temporal modeling including:
7910/// - Business day calculations and settlement dates
7911/// - Regional holiday calendars
7912/// - Period-end decay curves (non-flat volume spikes)
7913/// - Processing lag modeling (event-to-posting delays)
7914#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7915pub struct TemporalPatternsConfig {
7916    /// Enable temporal patterns features.
7917    #[serde(default)]
7918    pub enabled: bool,
7919
7920    /// Business day calculation configuration.
7921    #[serde(default)]
7922    pub business_days: BusinessDaySchemaConfig,
7923
7924    /// Regional calendar configuration.
7925    #[serde(default)]
7926    pub calendars: CalendarSchemaConfig,
7927
7928    /// Period-end dynamics configuration.
7929    #[serde(default)]
7930    pub period_end: PeriodEndSchemaConfig,
7931
7932    /// Processing lag configuration.
7933    #[serde(default)]
7934    pub processing_lags: ProcessingLagSchemaConfig,
7935
7936    /// Fiscal calendar configuration (custom year start, 4-4-5, 13-period).
7937    #[serde(default)]
7938    pub fiscal_calendar: FiscalCalendarSchemaConfig,
7939
7940    /// Intra-day patterns configuration (morning spike, lunch dip, EOD rush).
7941    #[serde(default)]
7942    pub intraday: IntraDaySchemaConfig,
7943
7944    /// Timezone handling configuration.
7945    #[serde(default)]
7946    pub timezones: TimezoneSchemaConfig,
7947}
7948
7949/// Business day calculation configuration.
7950#[derive(Debug, Clone, Serialize, Deserialize)]
7951pub struct BusinessDaySchemaConfig {
7952    /// Enable business day calculations.
7953    #[serde(default = "default_true")]
7954    pub enabled: bool,
7955
7956    /// Half-day policy: "full_day", "half_day", "non_business_day".
7957    #[serde(default = "default_half_day_policy")]
7958    pub half_day_policy: String,
7959
7960    /// Settlement rules configuration.
7961    #[serde(default)]
7962    pub settlement_rules: SettlementRulesSchemaConfig,
7963
7964    /// Month-end convention: "modified_following", "preceding", "following", "end_of_month".
7965    #[serde(default = "default_month_end_convention")]
7966    pub month_end_convention: String,
7967
7968    /// Weekend days (e.g., ["saturday", "sunday"] or ["friday", "saturday"] for Middle East).
7969    #[serde(default)]
7970    pub weekend_days: Option<Vec<String>>,
7971}
7972
7973fn default_half_day_policy() -> String {
7974    "half_day".to_string()
7975}
7976
7977fn default_month_end_convention() -> String {
7978    "modified_following".to_string()
7979}
7980
7981impl Default for BusinessDaySchemaConfig {
7982    fn default() -> Self {
7983        Self {
7984            enabled: true,
7985            half_day_policy: "half_day".to_string(),
7986            settlement_rules: SettlementRulesSchemaConfig::default(),
7987            month_end_convention: "modified_following".to_string(),
7988            weekend_days: None,
7989        }
7990    }
7991}
7992
7993/// Settlement rules configuration.
7994#[derive(Debug, Clone, Serialize, Deserialize)]
7995pub struct SettlementRulesSchemaConfig {
7996    /// Equity settlement days (T+N).
7997    #[serde(default = "default_settlement_2")]
7998    pub equity_days: i32,
7999
8000    /// Government bonds settlement days.
8001    #[serde(default = "default_settlement_1")]
8002    pub government_bonds_days: i32,
8003
8004    /// FX spot settlement days.
8005    #[serde(default = "default_settlement_2")]
8006    pub fx_spot_days: i32,
8007
8008    /// Corporate bonds settlement days.
8009    #[serde(default = "default_settlement_2")]
8010    pub corporate_bonds_days: i32,
8011
8012    /// Wire transfer cutoff time (HH:MM format).
8013    #[serde(default = "default_wire_cutoff")]
8014    pub wire_cutoff_time: String,
8015
8016    /// International wire settlement days.
8017    #[serde(default = "default_settlement_1")]
8018    pub wire_international_days: i32,
8019
8020    /// ACH settlement days.
8021    #[serde(default = "default_settlement_1")]
8022    pub ach_days: i32,
8023}
8024
8025fn default_settlement_1() -> i32 {
8026    1
8027}
8028
8029fn default_settlement_2() -> i32 {
8030    2
8031}
8032
8033fn default_wire_cutoff() -> String {
8034    "14:00".to_string()
8035}
8036
8037impl Default for SettlementRulesSchemaConfig {
8038    fn default() -> Self {
8039        Self {
8040            equity_days: 2,
8041            government_bonds_days: 1,
8042            fx_spot_days: 2,
8043            corporate_bonds_days: 2,
8044            wire_cutoff_time: "14:00".to_string(),
8045            wire_international_days: 1,
8046            ach_days: 1,
8047        }
8048    }
8049}
8050
8051/// Regional calendar configuration.
8052#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8053pub struct CalendarSchemaConfig {
8054    /// List of regions to include (e.g., ["US", "DE", "BR", "SG", "KR"]).
8055    #[serde(default)]
8056    pub regions: Vec<String>,
8057
8058    /// Custom holidays (in addition to regional calendars).
8059    #[serde(default)]
8060    pub custom_holidays: Vec<CustomHolidaySchemaConfig>,
8061}
8062
8063/// Custom holiday configuration.
8064#[derive(Debug, Clone, Serialize, Deserialize)]
8065pub struct CustomHolidaySchemaConfig {
8066    /// Holiday name.
8067    pub name: String,
8068    /// Month (1-12).
8069    pub month: u8,
8070    /// Day of month.
8071    pub day: u8,
8072    /// Activity multiplier (0.0-1.0, default 0.05).
8073    #[serde(default = "default_holiday_multiplier")]
8074    pub activity_multiplier: f64,
8075}
8076
8077fn default_holiday_multiplier() -> f64 {
8078    0.05
8079}
8080
8081/// Period-end dynamics configuration.
8082#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8083pub struct PeriodEndSchemaConfig {
8084    /// Model type: "flat", "exponential", "extended_crunch", "daily_profile".
8085    #[serde(default)]
8086    pub model: Option<String>,
8087
8088    /// Month-end configuration.
8089    #[serde(default)]
8090    pub month_end: Option<PeriodEndModelSchemaConfig>,
8091
8092    /// Quarter-end configuration.
8093    #[serde(default)]
8094    pub quarter_end: Option<PeriodEndModelSchemaConfig>,
8095
8096    /// Year-end configuration.
8097    #[serde(default)]
8098    pub year_end: Option<PeriodEndModelSchemaConfig>,
8099}
8100
8101/// Period-end model configuration.
8102#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8103pub struct PeriodEndModelSchemaConfig {
8104    /// Inherit configuration from another period (e.g., "month_end").
8105    #[serde(default)]
8106    pub inherit_from: Option<String>,
8107
8108    /// Additional multiplier on top of inherited/base model.
8109    #[serde(default)]
8110    pub additional_multiplier: Option<f64>,
8111
8112    /// Days before period end to start acceleration (negative, e.g., -10).
8113    #[serde(default)]
8114    pub start_day: Option<i32>,
8115
8116    /// Base multiplier at start of acceleration.
8117    #[serde(default)]
8118    pub base_multiplier: Option<f64>,
8119
8120    /// Peak multiplier on last day.
8121    #[serde(default)]
8122    pub peak_multiplier: Option<f64>,
8123
8124    /// Decay rate for exponential model (0.1-0.5 typical).
8125    #[serde(default)]
8126    pub decay_rate: Option<f64>,
8127
8128    /// Sustained high days for crunch model.
8129    #[serde(default)]
8130    pub sustained_high_days: Option<i32>,
8131}
8132
8133/// Processing lag configuration.
8134#[derive(Debug, Clone, Serialize, Deserialize)]
8135pub struct ProcessingLagSchemaConfig {
8136    /// Enable processing lag calculations.
8137    #[serde(default = "default_true")]
8138    pub enabled: bool,
8139
8140    /// Sales order lag configuration (log-normal mu, sigma).
8141    #[serde(default)]
8142    pub sales_order_lag: Option<LagDistributionSchemaConfig>,
8143
8144    /// Purchase order lag configuration.
8145    #[serde(default)]
8146    pub purchase_order_lag: Option<LagDistributionSchemaConfig>,
8147
8148    /// Goods receipt lag configuration.
8149    #[serde(default)]
8150    pub goods_receipt_lag: Option<LagDistributionSchemaConfig>,
8151
8152    /// Invoice receipt lag configuration.
8153    #[serde(default)]
8154    pub invoice_receipt_lag: Option<LagDistributionSchemaConfig>,
8155
8156    /// Invoice issue lag configuration.
8157    #[serde(default)]
8158    pub invoice_issue_lag: Option<LagDistributionSchemaConfig>,
8159
8160    /// Payment lag configuration.
8161    #[serde(default)]
8162    pub payment_lag: Option<LagDistributionSchemaConfig>,
8163
8164    /// Journal entry lag configuration.
8165    #[serde(default)]
8166    pub journal_entry_lag: Option<LagDistributionSchemaConfig>,
8167
8168    /// Cross-day posting configuration.
8169    #[serde(default)]
8170    pub cross_day_posting: Option<CrossDayPostingSchemaConfig>,
8171}
8172
8173impl Default for ProcessingLagSchemaConfig {
8174    fn default() -> Self {
8175        Self {
8176            enabled: true,
8177            sales_order_lag: None,
8178            purchase_order_lag: None,
8179            goods_receipt_lag: None,
8180            invoice_receipt_lag: None,
8181            invoice_issue_lag: None,
8182            payment_lag: None,
8183            journal_entry_lag: None,
8184            cross_day_posting: None,
8185        }
8186    }
8187}
8188
8189/// Lag distribution configuration (log-normal parameters).
8190#[derive(Debug, Clone, Serialize, Deserialize)]
8191pub struct LagDistributionSchemaConfig {
8192    /// Log-scale mean (mu for log-normal).
8193    pub mu: f64,
8194    /// Log-scale standard deviation (sigma for log-normal).
8195    pub sigma: f64,
8196    /// Minimum lag in hours.
8197    #[serde(default)]
8198    pub min_hours: Option<f64>,
8199    /// Maximum lag in hours.
8200    #[serde(default)]
8201    pub max_hours: Option<f64>,
8202}
8203
8204/// Cross-day posting configuration.
8205#[derive(Debug, Clone, Serialize, Deserialize)]
8206pub struct CrossDayPostingSchemaConfig {
8207    /// Enable cross-day posting logic.
8208    #[serde(default = "default_true")]
8209    pub enabled: bool,
8210
8211    /// Probability of next-day posting by hour (map of hour -> probability).
8212    /// E.g., { 17: 0.7, 19: 0.9, 21: 0.99 }
8213    #[serde(default)]
8214    pub probability_by_hour: std::collections::HashMap<u8, f64>,
8215}
8216
8217impl Default for CrossDayPostingSchemaConfig {
8218    fn default() -> Self {
8219        let mut probability_by_hour = std::collections::HashMap::new();
8220        probability_by_hour.insert(17, 0.3);
8221        probability_by_hour.insert(18, 0.6);
8222        probability_by_hour.insert(19, 0.8);
8223        probability_by_hour.insert(20, 0.9);
8224        probability_by_hour.insert(21, 0.95);
8225        probability_by_hour.insert(22, 0.99);
8226
8227        Self {
8228            enabled: true,
8229            probability_by_hour,
8230        }
8231    }
8232}
8233
8234// =============================================================================
8235// Fiscal Calendar Configuration (P2)
8236// =============================================================================
8237
8238/// Fiscal calendar configuration.
8239///
8240/// Supports calendar year, custom year start, 4-4-5 retail calendar,
8241/// and 13-period calendars.
8242#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8243pub struct FiscalCalendarSchemaConfig {
8244    /// Enable non-standard fiscal calendar.
8245    #[serde(default)]
8246    pub enabled: bool,
8247
8248    /// Fiscal calendar type: "calendar_year", "custom", "four_four_five", "thirteen_period".
8249    #[serde(default = "default_fiscal_calendar_type")]
8250    pub calendar_type: String,
8251
8252    /// Month the fiscal year starts (1-12). Used for custom year start.
8253    #[serde(default)]
8254    pub year_start_month: Option<u8>,
8255
8256    /// Day the fiscal year starts (1-31). Used for custom year start.
8257    #[serde(default)]
8258    pub year_start_day: Option<u8>,
8259
8260    /// 4-4-5 calendar configuration (if calendar_type is "four_four_five").
8261    #[serde(default)]
8262    pub four_four_five: Option<FourFourFiveSchemaConfig>,
8263}
8264
8265fn default_fiscal_calendar_type() -> String {
8266    "calendar_year".to_string()
8267}
8268
8269/// 4-4-5 retail calendar configuration.
8270#[derive(Debug, Clone, Serialize, Deserialize)]
8271pub struct FourFourFiveSchemaConfig {
8272    /// Week pattern: "four_four_five", "four_five_four", "five_four_four".
8273    #[serde(default = "default_week_pattern")]
8274    pub pattern: String,
8275
8276    /// Anchor type: "first_sunday", "last_saturday", "nearest_saturday".
8277    #[serde(default = "default_anchor_type")]
8278    pub anchor_type: String,
8279
8280    /// Anchor month (1-12).
8281    #[serde(default = "default_anchor_month")]
8282    pub anchor_month: u8,
8283
8284    /// Where to place leap week: "q4_period3" or "q1_period1".
8285    #[serde(default = "default_leap_week_placement")]
8286    pub leap_week_placement: String,
8287}
8288
8289fn default_week_pattern() -> String {
8290    "four_four_five".to_string()
8291}
8292
8293fn default_anchor_type() -> String {
8294    "last_saturday".to_string()
8295}
8296
8297fn default_anchor_month() -> u8 {
8298    1 // January
8299}
8300
8301fn default_leap_week_placement() -> String {
8302    "q4_period3".to_string()
8303}
8304
8305impl Default for FourFourFiveSchemaConfig {
8306    fn default() -> Self {
8307        Self {
8308            pattern: "four_four_five".to_string(),
8309            anchor_type: "last_saturday".to_string(),
8310            anchor_month: 1,
8311            leap_week_placement: "q4_period3".to_string(),
8312        }
8313    }
8314}
8315
8316// =============================================================================
8317// Intra-Day Patterns Configuration (P2)
8318// =============================================================================
8319
8320/// Intra-day patterns configuration.
8321///
8322/// Defines time-of-day segments with different activity multipliers
8323/// for realistic modeling of morning spikes, lunch dips, and end-of-day rushes.
8324#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8325pub struct IntraDaySchemaConfig {
8326    /// Enable intra-day patterns.
8327    #[serde(default)]
8328    pub enabled: bool,
8329
8330    /// Custom intra-day segments.
8331    #[serde(default)]
8332    pub segments: Vec<IntraDaySegmentSchemaConfig>,
8333}
8334
8335/// Intra-day segment configuration.
8336#[derive(Debug, Clone, Serialize, Deserialize)]
8337pub struct IntraDaySegmentSchemaConfig {
8338    /// Name of the segment (e.g., "morning_spike", "lunch_dip").
8339    pub name: String,
8340
8341    /// Start time (HH:MM format).
8342    pub start: String,
8343
8344    /// End time (HH:MM format).
8345    pub end: String,
8346
8347    /// Activity multiplier (1.0 = normal).
8348    #[serde(default = "default_multiplier")]
8349    pub multiplier: f64,
8350
8351    /// Posting type: "human", "system", "both".
8352    #[serde(default = "default_posting_type")]
8353    pub posting_type: String,
8354}
8355
8356fn default_multiplier() -> f64 {
8357    1.0
8358}
8359
8360fn default_posting_type() -> String {
8361    "both".to_string()
8362}
8363
8364// =============================================================================
8365// Timezone Configuration
8366// =============================================================================
8367
8368/// Timezone handling configuration for multi-region entities.
8369#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8370pub struct TimezoneSchemaConfig {
8371    /// Enable timezone handling.
8372    #[serde(default)]
8373    pub enabled: bool,
8374
8375    /// Default timezone (IANA format, e.g., "America/New_York").
8376    #[serde(default = "default_timezone")]
8377    pub default_timezone: String,
8378
8379    /// Consolidation timezone for group reporting (IANA format).
8380    #[serde(default = "default_consolidation_timezone")]
8381    pub consolidation_timezone: String,
8382
8383    /// Entity-to-timezone mappings.
8384    /// Supports patterns like "EU_*" -> "Europe/London".
8385    #[serde(default)]
8386    pub entity_mappings: Vec<EntityTimezoneMapping>,
8387}
8388
8389fn default_timezone() -> String {
8390    "America/New_York".to_string()
8391}
8392
8393fn default_consolidation_timezone() -> String {
8394    "UTC".to_string()
8395}
8396
8397/// Mapping from entity pattern to timezone.
8398#[derive(Debug, Clone, Serialize, Deserialize)]
8399pub struct EntityTimezoneMapping {
8400    /// Entity code pattern (e.g., "EU_*", "*_APAC", "1000").
8401    pub pattern: String,
8402
8403    /// Timezone (IANA format, e.g., "Europe/London").
8404    pub timezone: String,
8405}
8406
8407// =============================================================================
8408// Vendor Network Configuration
8409// =============================================================================
8410
8411/// Configuration for multi-tier vendor network generation.
8412#[derive(Debug, Clone, Serialize, Deserialize)]
8413pub struct VendorNetworkSchemaConfig {
8414    /// Enable vendor network generation.
8415    #[serde(default)]
8416    pub enabled: bool,
8417
8418    /// Maximum depth of supply chain tiers (1-3).
8419    #[serde(default = "default_vendor_tier_depth")]
8420    pub depth: u8,
8421
8422    /// Tier 1 vendor count configuration.
8423    #[serde(default)]
8424    pub tier1: TierCountSchemaConfig,
8425
8426    /// Tier 2 vendors per Tier 1 parent.
8427    #[serde(default)]
8428    pub tier2_per_parent: TierCountSchemaConfig,
8429
8430    /// Tier 3 vendors per Tier 2 parent.
8431    #[serde(default)]
8432    pub tier3_per_parent: TierCountSchemaConfig,
8433
8434    /// Vendor cluster distribution.
8435    #[serde(default)]
8436    pub clusters: VendorClusterSchemaConfig,
8437
8438    /// Concentration limits.
8439    #[serde(default)]
8440    pub dependencies: DependencySchemaConfig,
8441}
8442
8443fn default_vendor_tier_depth() -> u8 {
8444    3
8445}
8446
8447impl Default for VendorNetworkSchemaConfig {
8448    fn default() -> Self {
8449        Self {
8450            enabled: false,
8451            depth: 3,
8452            tier1: TierCountSchemaConfig { min: 50, max: 100 },
8453            tier2_per_parent: TierCountSchemaConfig { min: 4, max: 10 },
8454            tier3_per_parent: TierCountSchemaConfig { min: 2, max: 5 },
8455            clusters: VendorClusterSchemaConfig::default(),
8456            dependencies: DependencySchemaConfig::default(),
8457        }
8458    }
8459}
8460
8461/// Tier count configuration.
8462#[derive(Debug, Clone, Serialize, Deserialize)]
8463pub struct TierCountSchemaConfig {
8464    /// Minimum count.
8465    #[serde(default = "default_tier_min")]
8466    pub min: usize,
8467
8468    /// Maximum count.
8469    #[serde(default = "default_tier_max")]
8470    pub max: usize,
8471}
8472
8473fn default_tier_min() -> usize {
8474    5
8475}
8476
8477fn default_tier_max() -> usize {
8478    20
8479}
8480
8481impl Default for TierCountSchemaConfig {
8482    fn default() -> Self {
8483        Self {
8484            min: default_tier_min(),
8485            max: default_tier_max(),
8486        }
8487    }
8488}
8489
8490/// Vendor cluster distribution configuration.
8491#[derive(Debug, Clone, Serialize, Deserialize)]
8492pub struct VendorClusterSchemaConfig {
8493    /// Reliable strategic vendors percentage (default: 0.20).
8494    #[serde(default = "default_reliable_strategic")]
8495    pub reliable_strategic: f64,
8496
8497    /// Standard operational vendors percentage (default: 0.50).
8498    #[serde(default = "default_standard_operational")]
8499    pub standard_operational: f64,
8500
8501    /// Transactional vendors percentage (default: 0.25).
8502    #[serde(default = "default_transactional")]
8503    pub transactional: f64,
8504
8505    /// Problematic vendors percentage (default: 0.05).
8506    #[serde(default = "default_problematic")]
8507    pub problematic: f64,
8508}
8509
8510fn default_reliable_strategic() -> f64 {
8511    0.20
8512}
8513
8514fn default_standard_operational() -> f64 {
8515    0.50
8516}
8517
8518fn default_transactional() -> f64 {
8519    0.25
8520}
8521
8522fn default_problematic() -> f64 {
8523    0.05
8524}
8525
8526impl Default for VendorClusterSchemaConfig {
8527    fn default() -> Self {
8528        Self {
8529            reliable_strategic: 0.20,
8530            standard_operational: 0.50,
8531            transactional: 0.25,
8532            problematic: 0.05,
8533        }
8534    }
8535}
8536
8537/// Dependency and concentration limits configuration.
8538#[derive(Debug, Clone, Serialize, Deserialize)]
8539pub struct DependencySchemaConfig {
8540    /// Maximum concentration for a single vendor (default: 0.15).
8541    #[serde(default = "default_max_single_vendor")]
8542    pub max_single_vendor_concentration: f64,
8543
8544    /// Maximum concentration for top 5 vendors (default: 0.45).
8545    #[serde(default = "default_max_top5")]
8546    pub top_5_concentration: f64,
8547
8548    /// Percentage of single-source vendors (default: 0.05).
8549    #[serde(default = "default_single_source_percent")]
8550    pub single_source_percent: f64,
8551}
8552
8553fn default_max_single_vendor() -> f64 {
8554    0.15
8555}
8556
8557fn default_max_top5() -> f64 {
8558    0.45
8559}
8560
8561fn default_single_source_percent() -> f64 {
8562    0.05
8563}
8564
8565impl Default for DependencySchemaConfig {
8566    fn default() -> Self {
8567        Self {
8568            max_single_vendor_concentration: 0.15,
8569            top_5_concentration: 0.45,
8570            single_source_percent: 0.05,
8571        }
8572    }
8573}
8574
8575// =============================================================================
8576// Customer Segmentation Configuration
8577// =============================================================================
8578
8579/// Configuration for customer segmentation generation.
8580#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8581pub struct CustomerSegmentationSchemaConfig {
8582    /// Enable customer segmentation generation.
8583    #[serde(default)]
8584    pub enabled: bool,
8585
8586    /// Value segment distribution.
8587    #[serde(default)]
8588    pub value_segments: ValueSegmentsSchemaConfig,
8589
8590    /// Lifecycle stage configuration.
8591    #[serde(default)]
8592    pub lifecycle: LifecycleSchemaConfig,
8593
8594    /// Network (referrals, hierarchies) configuration.
8595    #[serde(default)]
8596    pub networks: CustomerNetworksSchemaConfig,
8597}
8598
8599/// Customer value segments distribution configuration.
8600#[derive(Debug, Clone, Serialize, Deserialize)]
8601pub struct ValueSegmentsSchemaConfig {
8602    /// Enterprise segment configuration.
8603    #[serde(default)]
8604    pub enterprise: SegmentDetailSchemaConfig,
8605
8606    /// Mid-market segment configuration.
8607    #[serde(default)]
8608    pub mid_market: SegmentDetailSchemaConfig,
8609
8610    /// SMB segment configuration.
8611    #[serde(default)]
8612    pub smb: SegmentDetailSchemaConfig,
8613
8614    /// Consumer segment configuration.
8615    #[serde(default)]
8616    pub consumer: SegmentDetailSchemaConfig,
8617}
8618
8619impl Default for ValueSegmentsSchemaConfig {
8620    fn default() -> Self {
8621        Self {
8622            enterprise: SegmentDetailSchemaConfig {
8623                revenue_share: 0.40,
8624                customer_share: 0.05,
8625                avg_order_value_range: "50000+".to_string(),
8626            },
8627            mid_market: SegmentDetailSchemaConfig {
8628                revenue_share: 0.35,
8629                customer_share: 0.20,
8630                avg_order_value_range: "5000-50000".to_string(),
8631            },
8632            smb: SegmentDetailSchemaConfig {
8633                revenue_share: 0.20,
8634                customer_share: 0.50,
8635                avg_order_value_range: "500-5000".to_string(),
8636            },
8637            consumer: SegmentDetailSchemaConfig {
8638                revenue_share: 0.05,
8639                customer_share: 0.25,
8640                avg_order_value_range: "50-500".to_string(),
8641            },
8642        }
8643    }
8644}
8645
8646/// Individual segment detail configuration.
8647#[derive(Debug, Clone, Serialize, Deserialize)]
8648pub struct SegmentDetailSchemaConfig {
8649    /// Revenue share for this segment.
8650    #[serde(default)]
8651    pub revenue_share: f64,
8652
8653    /// Customer share for this segment.
8654    #[serde(default)]
8655    pub customer_share: f64,
8656
8657    /// Average order value range (e.g., "5000-50000" or "50000+").
8658    #[serde(default)]
8659    pub avg_order_value_range: String,
8660}
8661
8662impl Default for SegmentDetailSchemaConfig {
8663    fn default() -> Self {
8664        Self {
8665            revenue_share: 0.25,
8666            customer_share: 0.25,
8667            avg_order_value_range: "1000-10000".to_string(),
8668        }
8669    }
8670}
8671
8672/// Customer lifecycle stage configuration.
8673#[derive(Debug, Clone, Serialize, Deserialize)]
8674pub struct LifecycleSchemaConfig {
8675    /// Prospect stage rate.
8676    #[serde(default)]
8677    pub prospect_rate: f64,
8678
8679    /// New customer stage rate.
8680    #[serde(default = "default_new_rate")]
8681    pub new_rate: f64,
8682
8683    /// Growth stage rate.
8684    #[serde(default = "default_growth_rate")]
8685    pub growth_rate: f64,
8686
8687    /// Mature stage rate.
8688    #[serde(default = "default_mature_rate")]
8689    pub mature_rate: f64,
8690
8691    /// At-risk stage rate.
8692    #[serde(default = "default_at_risk_rate")]
8693    pub at_risk_rate: f64,
8694
8695    /// Churned stage rate.
8696    #[serde(default = "default_churned_rate")]
8697    pub churned_rate: f64,
8698
8699    /// Won-back stage rate (churned customers reacquired).
8700    #[serde(default)]
8701    pub won_back_rate: f64,
8702}
8703
8704fn default_new_rate() -> f64 {
8705    0.10
8706}
8707
8708fn default_growth_rate() -> f64 {
8709    0.15
8710}
8711
8712fn default_mature_rate() -> f64 {
8713    0.60
8714}
8715
8716fn default_at_risk_rate() -> f64 {
8717    0.10
8718}
8719
8720fn default_churned_rate() -> f64 {
8721    0.05
8722}
8723
8724impl Default for LifecycleSchemaConfig {
8725    fn default() -> Self {
8726        Self {
8727            prospect_rate: 0.0,
8728            new_rate: 0.10,
8729            growth_rate: 0.15,
8730            mature_rate: 0.60,
8731            at_risk_rate: 0.10,
8732            churned_rate: 0.05,
8733            won_back_rate: 0.0,
8734        }
8735    }
8736}
8737
8738/// Customer networks configuration (referrals, hierarchies).
8739#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8740pub struct CustomerNetworksSchemaConfig {
8741    /// Referral network configuration.
8742    #[serde(default)]
8743    pub referrals: ReferralSchemaConfig,
8744
8745    /// Corporate hierarchy configuration.
8746    #[serde(default)]
8747    pub corporate_hierarchies: HierarchySchemaConfig,
8748}
8749
8750/// Referral network configuration.
8751#[derive(Debug, Clone, Serialize, Deserialize)]
8752pub struct ReferralSchemaConfig {
8753    /// Enable referral generation.
8754    #[serde(default = "default_true")]
8755    pub enabled: bool,
8756
8757    /// Rate of customers acquired via referral.
8758    #[serde(default = "default_referral_rate")]
8759    pub referral_rate: f64,
8760}
8761
8762fn default_referral_rate() -> f64 {
8763    0.15
8764}
8765
8766impl Default for ReferralSchemaConfig {
8767    fn default() -> Self {
8768        Self {
8769            enabled: true,
8770            referral_rate: 0.15,
8771        }
8772    }
8773}
8774
8775/// Corporate hierarchy configuration.
8776#[derive(Debug, Clone, Serialize, Deserialize)]
8777pub struct HierarchySchemaConfig {
8778    /// Enable corporate hierarchy generation.
8779    #[serde(default = "default_true")]
8780    pub enabled: bool,
8781
8782    /// Rate of customers in hierarchies.
8783    #[serde(default = "default_hierarchy_rate")]
8784    pub probability: f64,
8785}
8786
8787fn default_hierarchy_rate() -> f64 {
8788    0.30
8789}
8790
8791impl Default for HierarchySchemaConfig {
8792    fn default() -> Self {
8793        Self {
8794            enabled: true,
8795            probability: 0.30,
8796        }
8797    }
8798}
8799
8800// =============================================================================
8801// Relationship Strength Configuration
8802// =============================================================================
8803
8804/// Configuration for relationship strength calculation.
8805#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8806pub struct RelationshipStrengthSchemaConfig {
8807    /// Enable relationship strength calculation.
8808    #[serde(default)]
8809    pub enabled: bool,
8810
8811    /// Calculation weights.
8812    #[serde(default)]
8813    pub calculation: StrengthCalculationSchemaConfig,
8814
8815    /// Strength thresholds for classification.
8816    #[serde(default)]
8817    pub thresholds: StrengthThresholdsSchemaConfig,
8818}
8819
8820/// Strength calculation weights configuration.
8821#[derive(Debug, Clone, Serialize, Deserialize)]
8822pub struct StrengthCalculationSchemaConfig {
8823    /// Weight for transaction volume (default: 0.30).
8824    #[serde(default = "default_volume_weight")]
8825    pub transaction_volume_weight: f64,
8826
8827    /// Weight for transaction count (default: 0.25).
8828    #[serde(default = "default_count_weight")]
8829    pub transaction_count_weight: f64,
8830
8831    /// Weight for relationship duration (default: 0.20).
8832    #[serde(default = "default_duration_weight")]
8833    pub relationship_duration_weight: f64,
8834
8835    /// Weight for recency (default: 0.15).
8836    #[serde(default = "default_recency_weight")]
8837    pub recency_weight: f64,
8838
8839    /// Weight for mutual connections (default: 0.10).
8840    #[serde(default = "default_mutual_weight")]
8841    pub mutual_connections_weight: f64,
8842
8843    /// Recency half-life in days (default: 90).
8844    #[serde(default = "default_recency_half_life")]
8845    pub recency_half_life_days: u32,
8846}
8847
8848fn default_volume_weight() -> f64 {
8849    0.30
8850}
8851
8852fn default_count_weight() -> f64 {
8853    0.25
8854}
8855
8856fn default_duration_weight() -> f64 {
8857    0.20
8858}
8859
8860fn default_recency_weight() -> f64 {
8861    0.15
8862}
8863
8864fn default_mutual_weight() -> f64 {
8865    0.10
8866}
8867
8868fn default_recency_half_life() -> u32 {
8869    90
8870}
8871
8872impl Default for StrengthCalculationSchemaConfig {
8873    fn default() -> Self {
8874        Self {
8875            transaction_volume_weight: 0.30,
8876            transaction_count_weight: 0.25,
8877            relationship_duration_weight: 0.20,
8878            recency_weight: 0.15,
8879            mutual_connections_weight: 0.10,
8880            recency_half_life_days: 90,
8881        }
8882    }
8883}
8884
8885/// Strength thresholds for relationship classification.
8886#[derive(Debug, Clone, Serialize, Deserialize)]
8887pub struct StrengthThresholdsSchemaConfig {
8888    /// Threshold for strong relationships (default: 0.7).
8889    #[serde(default = "default_strong_threshold")]
8890    pub strong: f64,
8891
8892    /// Threshold for moderate relationships (default: 0.4).
8893    #[serde(default = "default_moderate_threshold")]
8894    pub moderate: f64,
8895
8896    /// Threshold for weak relationships (default: 0.1).
8897    #[serde(default = "default_weak_threshold")]
8898    pub weak: f64,
8899}
8900
8901fn default_strong_threshold() -> f64 {
8902    0.7
8903}
8904
8905fn default_moderate_threshold() -> f64 {
8906    0.4
8907}
8908
8909fn default_weak_threshold() -> f64 {
8910    0.1
8911}
8912
8913impl Default for StrengthThresholdsSchemaConfig {
8914    fn default() -> Self {
8915        Self {
8916            strong: 0.7,
8917            moderate: 0.4,
8918            weak: 0.1,
8919        }
8920    }
8921}
8922
8923// =============================================================================
8924// Cross-Process Links Configuration
8925// =============================================================================
8926
8927/// Configuration for cross-process linkages.
8928#[derive(Debug, Clone, Serialize, Deserialize)]
8929pub struct CrossProcessLinksSchemaConfig {
8930    /// Enable cross-process link generation.
8931    #[serde(default)]
8932    pub enabled: bool,
8933
8934    /// Enable inventory links between P2P and O2C.
8935    #[serde(default = "default_true")]
8936    pub inventory_p2p_o2c: bool,
8937
8938    /// Enable payment to bank reconciliation links.
8939    #[serde(default = "default_true")]
8940    pub payment_bank_reconciliation: bool,
8941
8942    /// Enable intercompany bilateral matching.
8943    #[serde(default = "default_true")]
8944    pub intercompany_bilateral: bool,
8945
8946    /// Percentage of GR/Deliveries to link via inventory (0.0 - 1.0).
8947    #[serde(default = "default_inventory_link_rate")]
8948    pub inventory_link_rate: f64,
8949}
8950
8951fn default_inventory_link_rate() -> f64 {
8952    0.30
8953}
8954
8955impl Default for CrossProcessLinksSchemaConfig {
8956    fn default() -> Self {
8957        Self {
8958            enabled: false,
8959            inventory_p2p_o2c: true,
8960            payment_bank_reconciliation: true,
8961            intercompany_bilateral: true,
8962            inventory_link_rate: 0.30,
8963        }
8964    }
8965}
8966
8967// =============================================================================
8968// Organizational Events Configuration
8969// =============================================================================
8970
8971/// Configuration for organizational events (acquisitions, divestitures, etc.).
8972#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8973pub struct OrganizationalEventsSchemaConfig {
8974    /// Enable organizational events.
8975    #[serde(default)]
8976    pub enabled: bool,
8977
8978    /// Effect blending mode (multiplicative, additive, maximum, minimum).
8979    #[serde(default)]
8980    pub effect_blending: EffectBlendingModeConfig,
8981
8982    /// Organizational events (acquisitions, divestitures, reorganizations, etc.).
8983    #[serde(default)]
8984    pub events: Vec<OrganizationalEventSchemaConfig>,
8985
8986    /// Process evolution events.
8987    #[serde(default)]
8988    pub process_evolution: Vec<ProcessEvolutionSchemaConfig>,
8989
8990    /// Technology transition events.
8991    #[serde(default)]
8992    pub technology_transitions: Vec<TechnologyTransitionSchemaConfig>,
8993}
8994
8995/// Effect blending mode for combining multiple event effects.
8996#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
8997#[serde(rename_all = "snake_case")]
8998pub enum EffectBlendingModeConfig {
8999    /// Multiply effects together.
9000    #[default]
9001    Multiplicative,
9002    /// Add effects together.
9003    Additive,
9004    /// Take the maximum effect.
9005    Maximum,
9006    /// Take the minimum effect.
9007    Minimum,
9008}
9009
9010/// Configuration for a single organizational event.
9011#[derive(Debug, Clone, Serialize, Deserialize)]
9012pub struct OrganizationalEventSchemaConfig {
9013    /// Event ID.
9014    pub id: String,
9015
9016    /// Event type and configuration.
9017    pub event_type: OrganizationalEventTypeSchemaConfig,
9018
9019    /// Effective date.
9020    pub effective_date: String,
9021
9022    /// Transition duration in months.
9023    #[serde(default = "default_org_transition_months")]
9024    pub transition_months: u32,
9025
9026    /// Description.
9027    #[serde(default)]
9028    pub description: Option<String>,
9029}
9030
9031fn default_org_transition_months() -> u32 {
9032    6
9033}
9034
9035/// Organizational event type configuration.
9036#[derive(Debug, Clone, Serialize, Deserialize)]
9037#[serde(tag = "type", rename_all = "snake_case")]
9038pub enum OrganizationalEventTypeSchemaConfig {
9039    /// Acquisition event.
9040    Acquisition {
9041        /// Acquired entity code.
9042        acquired_entity: String,
9043        /// Volume increase multiplier.
9044        #[serde(default = "default_acquisition_volume")]
9045        volume_increase: f64,
9046        /// Integration error rate.
9047        #[serde(default = "default_acquisition_error")]
9048        integration_error_rate: f64,
9049        /// Parallel posting days.
9050        #[serde(default = "default_parallel_days")]
9051        parallel_posting_days: u32,
9052    },
9053    /// Divestiture event.
9054    Divestiture {
9055        /// Divested entity code.
9056        divested_entity: String,
9057        /// Volume reduction factor.
9058        #[serde(default = "default_divestiture_volume")]
9059        volume_reduction: f64,
9060        /// Remove entity from generation.
9061        #[serde(default = "default_true_val")]
9062        remove_entity: bool,
9063    },
9064    /// Reorganization event.
9065    Reorganization {
9066        /// Cost center remapping.
9067        #[serde(default)]
9068        cost_center_remapping: std::collections::HashMap<String, String>,
9069        /// Transition error rate.
9070        #[serde(default = "default_reorg_error")]
9071        transition_error_rate: f64,
9072    },
9073    /// Leadership change event.
9074    LeadershipChange {
9075        /// Role that changed.
9076        role: String,
9077        /// Policy changes.
9078        #[serde(default)]
9079        policy_changes: Vec<String>,
9080    },
9081    /// Workforce reduction event.
9082    WorkforceReduction {
9083        /// Reduction percentage.
9084        #[serde(default = "default_workforce_reduction")]
9085        reduction_percent: f64,
9086        /// Error rate increase.
9087        #[serde(default = "default_workforce_error")]
9088        error_rate_increase: f64,
9089    },
9090    /// Merger event.
9091    Merger {
9092        /// Merged entity code.
9093        merged_entity: String,
9094        /// Volume increase multiplier.
9095        #[serde(default = "default_merger_volume")]
9096        volume_increase: f64,
9097    },
9098}
9099
9100fn default_acquisition_volume() -> f64 {
9101    1.35
9102}
9103
9104fn default_acquisition_error() -> f64 {
9105    0.05
9106}
9107
9108fn default_parallel_days() -> u32 {
9109    30
9110}
9111
9112fn default_divestiture_volume() -> f64 {
9113    0.70
9114}
9115
9116fn default_true_val() -> bool {
9117    true
9118}
9119
9120fn default_reorg_error() -> f64 {
9121    0.04
9122}
9123
9124fn default_workforce_reduction() -> f64 {
9125    0.10
9126}
9127
9128fn default_workforce_error() -> f64 {
9129    0.05
9130}
9131
9132fn default_merger_volume() -> f64 {
9133    1.80
9134}
9135
9136/// Configuration for a process evolution event.
9137#[derive(Debug, Clone, Serialize, Deserialize)]
9138pub struct ProcessEvolutionSchemaConfig {
9139    /// Event ID.
9140    pub id: String,
9141
9142    /// Event type.
9143    pub event_type: ProcessEvolutionTypeSchemaConfig,
9144
9145    /// Effective date.
9146    pub effective_date: String,
9147
9148    /// Description.
9149    #[serde(default)]
9150    pub description: Option<String>,
9151}
9152
9153/// Process evolution type configuration.
9154#[derive(Debug, Clone, Serialize, Deserialize)]
9155#[serde(tag = "type", rename_all = "snake_case")]
9156pub enum ProcessEvolutionTypeSchemaConfig {
9157    /// Process automation.
9158    ProcessAutomation {
9159        /// Process name.
9160        process_name: String,
9161        /// Manual rate before.
9162        #[serde(default = "default_manual_before")]
9163        manual_rate_before: f64,
9164        /// Manual rate after.
9165        #[serde(default = "default_manual_after")]
9166        manual_rate_after: f64,
9167    },
9168    /// Approval workflow change.
9169    ApprovalWorkflowChange {
9170        /// Description.
9171        description: String,
9172    },
9173    /// Control enhancement.
9174    ControlEnhancement {
9175        /// Control ID.
9176        control_id: String,
9177        /// Error reduction.
9178        #[serde(default = "default_error_reduction")]
9179        error_reduction: f64,
9180    },
9181}
9182
9183fn default_manual_before() -> f64 {
9184    0.80
9185}
9186
9187fn default_manual_after() -> f64 {
9188    0.15
9189}
9190
9191fn default_error_reduction() -> f64 {
9192    0.02
9193}
9194
9195/// Configuration for a technology transition event.
9196#[derive(Debug, Clone, Serialize, Deserialize)]
9197pub struct TechnologyTransitionSchemaConfig {
9198    /// Event ID.
9199    pub id: String,
9200
9201    /// Event type.
9202    pub event_type: TechnologyTransitionTypeSchemaConfig,
9203
9204    /// Description.
9205    #[serde(default)]
9206    pub description: Option<String>,
9207}
9208
9209/// Technology transition type configuration.
9210#[derive(Debug, Clone, Serialize, Deserialize)]
9211#[serde(tag = "type", rename_all = "snake_case")]
9212pub enum TechnologyTransitionTypeSchemaConfig {
9213    /// ERP migration.
9214    ErpMigration {
9215        /// Source system.
9216        source_system: String,
9217        /// Target system.
9218        target_system: String,
9219        /// Cutover date.
9220        cutover_date: String,
9221        /// Stabilization end date.
9222        stabilization_end: String,
9223        /// Duplicate rate during migration.
9224        #[serde(default = "default_erp_duplicate_rate")]
9225        duplicate_rate: f64,
9226        /// Format mismatch rate.
9227        #[serde(default = "default_format_mismatch")]
9228        format_mismatch_rate: f64,
9229    },
9230    /// Module implementation.
9231    ModuleImplementation {
9232        /// Module name.
9233        module_name: String,
9234        /// Go-live date.
9235        go_live_date: String,
9236    },
9237}
9238
9239fn default_erp_duplicate_rate() -> f64 {
9240    0.02
9241}
9242
9243fn default_format_mismatch() -> f64 {
9244    0.03
9245}
9246
9247// =============================================================================
9248// Behavioral Drift Configuration
9249// =============================================================================
9250
9251/// Configuration for behavioral drift (vendor, customer, employee behavior).
9252///
9253/// **Deprecated (v4.1.2):** this schema section is currently
9254/// validated-but-inert — no runtime code consumes its fields. Users
9255/// who want behavioral drift-style effects should reach for
9256/// `distributions.regime_changes` (v3.5.2+), which drives the
9257/// `DriftController` via the parameter-drift path. The schema type
9258/// remains for backward-compatible YAML loading; it will be removed
9259/// in a future major version once `regime_changes` gains per-entity
9260/// (vendor / customer / employee) targeting.
9261#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9262pub struct BehavioralDriftSchemaConfig {
9263    /// Enable behavioral drift.
9264    #[serde(default)]
9265    pub enabled: bool,
9266
9267    /// Vendor behavior drift.
9268    #[serde(default)]
9269    pub vendor_behavior: VendorBehaviorSchemaConfig,
9270
9271    /// Customer behavior drift.
9272    #[serde(default)]
9273    pub customer_behavior: CustomerBehaviorSchemaConfig,
9274
9275    /// Employee behavior drift.
9276    #[serde(default)]
9277    pub employee_behavior: EmployeeBehaviorSchemaConfig,
9278
9279    /// Collective behavior drift.
9280    #[serde(default)]
9281    pub collective: CollectiveBehaviorSchemaConfig,
9282}
9283
9284/// Vendor behavior drift configuration.
9285#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9286pub struct VendorBehaviorSchemaConfig {
9287    /// Payment terms drift.
9288    #[serde(default)]
9289    pub payment_terms_drift: PaymentTermsDriftSchemaConfig,
9290
9291    /// Quality drift.
9292    #[serde(default)]
9293    pub quality_drift: QualityDriftSchemaConfig,
9294}
9295
9296/// Payment terms drift configuration.
9297#[derive(Debug, Clone, Serialize, Deserialize)]
9298pub struct PaymentTermsDriftSchemaConfig {
9299    /// Extension rate per year (days).
9300    #[serde(default = "default_extension_rate")]
9301    pub extension_rate_per_year: f64,
9302
9303    /// Economic sensitivity.
9304    #[serde(default = "default_economic_sensitivity")]
9305    pub economic_sensitivity: f64,
9306}
9307
9308fn default_extension_rate() -> f64 {
9309    2.5
9310}
9311
9312fn default_economic_sensitivity() -> f64 {
9313    1.0
9314}
9315
9316impl Default for PaymentTermsDriftSchemaConfig {
9317    fn default() -> Self {
9318        Self {
9319            extension_rate_per_year: 2.5,
9320            economic_sensitivity: 1.0,
9321        }
9322    }
9323}
9324
9325/// Quality drift configuration.
9326#[derive(Debug, Clone, Serialize, Deserialize)]
9327pub struct QualityDriftSchemaConfig {
9328    /// New vendor improvement rate (per year).
9329    #[serde(default = "default_improvement_rate")]
9330    pub new_vendor_improvement_rate: f64,
9331
9332    /// Complacency decline rate (per year after first year).
9333    #[serde(default = "default_decline_rate")]
9334    pub complacency_decline_rate: f64,
9335}
9336
9337fn default_improvement_rate() -> f64 {
9338    0.02
9339}
9340
9341fn default_decline_rate() -> f64 {
9342    0.01
9343}
9344
9345impl Default for QualityDriftSchemaConfig {
9346    fn default() -> Self {
9347        Self {
9348            new_vendor_improvement_rate: 0.02,
9349            complacency_decline_rate: 0.01,
9350        }
9351    }
9352}
9353
9354/// Customer behavior drift configuration.
9355#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9356pub struct CustomerBehaviorSchemaConfig {
9357    /// Payment drift.
9358    #[serde(default)]
9359    pub payment_drift: CustomerPaymentDriftSchemaConfig,
9360
9361    /// Order drift.
9362    #[serde(default)]
9363    pub order_drift: OrderDriftSchemaConfig,
9364}
9365
9366/// Customer payment drift configuration.
9367#[derive(Debug, Clone, Serialize, Deserialize)]
9368pub struct CustomerPaymentDriftSchemaConfig {
9369    /// Days extension during downturn (min, max).
9370    #[serde(default = "default_downturn_extension")]
9371    pub downturn_days_extension: (u32, u32),
9372
9373    /// Bad debt increase during downturn.
9374    #[serde(default = "default_bad_debt_increase")]
9375    pub downturn_bad_debt_increase: f64,
9376}
9377
9378fn default_downturn_extension() -> (u32, u32) {
9379    (5, 15)
9380}
9381
9382fn default_bad_debt_increase() -> f64 {
9383    0.02
9384}
9385
9386impl Default for CustomerPaymentDriftSchemaConfig {
9387    fn default() -> Self {
9388        Self {
9389            downturn_days_extension: (5, 15),
9390            downturn_bad_debt_increase: 0.02,
9391        }
9392    }
9393}
9394
9395/// Order drift configuration.
9396#[derive(Debug, Clone, Serialize, Deserialize)]
9397pub struct OrderDriftSchemaConfig {
9398    /// Digital shift rate (per year).
9399    #[serde(default = "default_digital_shift")]
9400    pub digital_shift_rate: f64,
9401}
9402
9403fn default_digital_shift() -> f64 {
9404    0.05
9405}
9406
9407impl Default for OrderDriftSchemaConfig {
9408    fn default() -> Self {
9409        Self {
9410            digital_shift_rate: 0.05,
9411        }
9412    }
9413}
9414
9415/// Employee behavior drift configuration.
9416#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9417pub struct EmployeeBehaviorSchemaConfig {
9418    /// Approval drift.
9419    #[serde(default)]
9420    pub approval_drift: ApprovalDriftSchemaConfig,
9421
9422    /// Error drift.
9423    #[serde(default)]
9424    pub error_drift: ErrorDriftSchemaConfig,
9425}
9426
9427/// Approval drift configuration.
9428#[derive(Debug, Clone, Serialize, Deserialize)]
9429pub struct ApprovalDriftSchemaConfig {
9430    /// EOM intensity increase per year.
9431    #[serde(default = "default_eom_intensity")]
9432    pub eom_intensity_increase_per_year: f64,
9433
9434    /// Rubber stamp volume threshold.
9435    #[serde(default = "default_rubber_stamp")]
9436    pub rubber_stamp_volume_threshold: u32,
9437}
9438
9439fn default_eom_intensity() -> f64 {
9440    0.05
9441}
9442
9443fn default_rubber_stamp() -> u32 {
9444    50
9445}
9446
9447impl Default for ApprovalDriftSchemaConfig {
9448    fn default() -> Self {
9449        Self {
9450            eom_intensity_increase_per_year: 0.05,
9451            rubber_stamp_volume_threshold: 50,
9452        }
9453    }
9454}
9455
9456/// Error drift configuration.
9457#[derive(Debug, Clone, Serialize, Deserialize)]
9458pub struct ErrorDriftSchemaConfig {
9459    /// New employee error rate.
9460    #[serde(default = "default_new_error")]
9461    pub new_employee_error_rate: f64,
9462
9463    /// Learning curve months.
9464    #[serde(default = "default_learning_months")]
9465    pub learning_curve_months: u32,
9466}
9467
9468fn default_new_error() -> f64 {
9469    0.08
9470}
9471
9472fn default_learning_months() -> u32 {
9473    6
9474}
9475
9476impl Default for ErrorDriftSchemaConfig {
9477    fn default() -> Self {
9478        Self {
9479            new_employee_error_rate: 0.08,
9480            learning_curve_months: 6,
9481        }
9482    }
9483}
9484
9485/// Collective behavior drift configuration.
9486#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9487pub struct CollectiveBehaviorSchemaConfig {
9488    /// Automation adoption configuration.
9489    #[serde(default)]
9490    pub automation_adoption: AutomationAdoptionSchemaConfig,
9491}
9492
9493/// Automation adoption configuration.
9494#[derive(Debug, Clone, Serialize, Deserialize)]
9495pub struct AutomationAdoptionSchemaConfig {
9496    /// Enable S-curve adoption model.
9497    #[serde(default)]
9498    pub s_curve_enabled: bool,
9499
9500    /// Adoption midpoint in months.
9501    #[serde(default = "default_midpoint")]
9502    pub adoption_midpoint_months: u32,
9503
9504    /// Steepness of adoption curve.
9505    #[serde(default = "default_steepness")]
9506    pub steepness: f64,
9507}
9508
9509fn default_midpoint() -> u32 {
9510    24
9511}
9512
9513fn default_steepness() -> f64 {
9514    0.15
9515}
9516
9517impl Default for AutomationAdoptionSchemaConfig {
9518    fn default() -> Self {
9519        Self {
9520            s_curve_enabled: false,
9521            adoption_midpoint_months: 24,
9522            steepness: 0.15,
9523        }
9524    }
9525}
9526
9527// =============================================================================
9528// Market Drift Configuration
9529// =============================================================================
9530
9531/// Configuration for market drift (economic cycles, commodities, price shocks).
9532///
9533/// **Deprecated (v4.1.2):** validated-but-inert. Use
9534/// `distributions.regime_changes.economic_cycle` +
9535/// `distributions.regime_changes.parameter_drifts` for the
9536/// equivalent runtime behaviour (shipped in v3.5.2). The schema
9537/// type remains for backward-compatible YAML loading; will be
9538/// removed in v5.0.
9539#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9540pub struct MarketDriftSchemaConfig {
9541    /// Enable market drift.
9542    #[serde(default)]
9543    pub enabled: bool,
9544
9545    /// Economic cycle configuration.
9546    #[serde(default)]
9547    pub economic_cycle: MarketEconomicCycleSchemaConfig,
9548
9549    /// Industry-specific cycles.
9550    #[serde(default)]
9551    pub industry_cycles: std::collections::HashMap<String, IndustryCycleSchemaConfig>,
9552
9553    /// Commodity drift configuration.
9554    #[serde(default)]
9555    pub commodities: CommoditiesSchemaConfig,
9556}
9557
9558/// Market economic cycle configuration.
9559#[derive(Debug, Clone, Serialize, Deserialize)]
9560pub struct MarketEconomicCycleSchemaConfig {
9561    /// Enable economic cycle.
9562    #[serde(default)]
9563    pub enabled: bool,
9564
9565    /// Cycle type.
9566    #[serde(default)]
9567    pub cycle_type: CycleTypeSchemaConfig,
9568
9569    /// Cycle period in months.
9570    #[serde(default = "default_market_cycle_period")]
9571    pub period_months: u32,
9572
9573    /// Amplitude.
9574    #[serde(default = "default_market_amplitude")]
9575    pub amplitude: f64,
9576
9577    /// Recession configuration.
9578    #[serde(default)]
9579    pub recession: RecessionSchemaConfig,
9580}
9581
9582fn default_market_cycle_period() -> u32 {
9583    48
9584}
9585
9586fn default_market_amplitude() -> f64 {
9587    0.15
9588}
9589
9590impl Default for MarketEconomicCycleSchemaConfig {
9591    fn default() -> Self {
9592        Self {
9593            enabled: false,
9594            cycle_type: CycleTypeSchemaConfig::Sinusoidal,
9595            period_months: 48,
9596            amplitude: 0.15,
9597            recession: RecessionSchemaConfig::default(),
9598        }
9599    }
9600}
9601
9602/// Cycle type configuration.
9603#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9604#[serde(rename_all = "snake_case")]
9605pub enum CycleTypeSchemaConfig {
9606    /// Sinusoidal cycle.
9607    #[default]
9608    Sinusoidal,
9609    /// Asymmetric cycle.
9610    Asymmetric,
9611    /// Mean-reverting cycle.
9612    MeanReverting,
9613}
9614
9615/// Recession configuration.
9616#[derive(Debug, Clone, Serialize, Deserialize)]
9617pub struct RecessionSchemaConfig {
9618    /// Enable recession simulation.
9619    #[serde(default)]
9620    pub enabled: bool,
9621
9622    /// Probability per year.
9623    #[serde(default = "default_recession_prob")]
9624    pub probability_per_year: f64,
9625
9626    /// Severity.
9627    #[serde(default)]
9628    pub severity: RecessionSeveritySchemaConfig,
9629
9630    /// Specific recession periods.
9631    #[serde(default)]
9632    pub recession_periods: Vec<RecessionPeriodSchemaConfig>,
9633}
9634
9635fn default_recession_prob() -> f64 {
9636    0.10
9637}
9638
9639impl Default for RecessionSchemaConfig {
9640    fn default() -> Self {
9641        Self {
9642            enabled: false,
9643            probability_per_year: 0.10,
9644            severity: RecessionSeveritySchemaConfig::Moderate,
9645            recession_periods: Vec::new(),
9646        }
9647    }
9648}
9649
9650/// Recession severity configuration.
9651#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9652#[serde(rename_all = "snake_case")]
9653pub enum RecessionSeveritySchemaConfig {
9654    /// Mild recession.
9655    Mild,
9656    /// Moderate recession.
9657    #[default]
9658    Moderate,
9659    /// Severe recession.
9660    Severe,
9661}
9662
9663/// Recession period configuration.
9664#[derive(Debug, Clone, Serialize, Deserialize)]
9665pub struct RecessionPeriodSchemaConfig {
9666    /// Start month.
9667    pub start_month: u32,
9668    /// Duration in months.
9669    pub duration_months: u32,
9670}
9671
9672/// Industry cycle configuration.
9673#[derive(Debug, Clone, Serialize, Deserialize)]
9674pub struct IndustryCycleSchemaConfig {
9675    /// Period in months.
9676    #[serde(default = "default_industry_period")]
9677    pub period_months: u32,
9678
9679    /// Amplitude.
9680    #[serde(default = "default_industry_amp")]
9681    pub amplitude: f64,
9682}
9683
9684fn default_industry_period() -> u32 {
9685    36
9686}
9687
9688fn default_industry_amp() -> f64 {
9689    0.20
9690}
9691
9692/// Commodities drift configuration.
9693#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9694pub struct CommoditiesSchemaConfig {
9695    /// Enable commodity drift.
9696    #[serde(default)]
9697    pub enabled: bool,
9698
9699    /// Commodity items.
9700    #[serde(default)]
9701    pub items: Vec<CommodityItemSchemaConfig>,
9702}
9703
9704/// Commodity item configuration.
9705#[derive(Debug, Clone, Serialize, Deserialize)]
9706pub struct CommodityItemSchemaConfig {
9707    /// Commodity name.
9708    pub name: String,
9709
9710    /// Volatility.
9711    #[serde(default = "default_volatility")]
9712    pub volatility: f64,
9713
9714    /// COGS pass-through.
9715    #[serde(default)]
9716    pub cogs_pass_through: f64,
9717
9718    /// Overhead pass-through.
9719    #[serde(default)]
9720    pub overhead_pass_through: f64,
9721}
9722
9723fn default_volatility() -> f64 {
9724    0.20
9725}
9726
9727// =============================================================================
9728// Drift Labeling Configuration
9729// =============================================================================
9730
9731/// Configuration for drift ground truth labeling.
9732///
9733/// **Deprecated (v4.1.2):** validated-but-inert. The v3.3.0
9734/// analytics-metadata phase (`DriftEventGenerator` +
9735/// `AnalyticsMetadataSnapshot.drift_events`) produces drift labels
9736/// at runtime — configure it via `analytics_metadata.drift_events`
9737/// instead. The schema type remains for backward-compatible YAML
9738/// loading; will be removed in v5.0.
9739#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9740pub struct DriftLabelingSchemaConfig {
9741    /// Enable drift labeling.
9742    #[serde(default)]
9743    pub enabled: bool,
9744
9745    /// Statistical drift labeling.
9746    #[serde(default)]
9747    pub statistical: StatisticalDriftLabelingSchemaConfig,
9748
9749    /// Categorical drift labeling.
9750    #[serde(default)]
9751    pub categorical: CategoricalDriftLabelingSchemaConfig,
9752
9753    /// Temporal drift labeling.
9754    #[serde(default)]
9755    pub temporal: TemporalDriftLabelingSchemaConfig,
9756
9757    /// Regulatory calendar preset.
9758    #[serde(default)]
9759    pub regulatory_calendar_preset: Option<String>,
9760}
9761
9762/// Statistical drift labeling configuration.
9763#[derive(Debug, Clone, Serialize, Deserialize)]
9764pub struct StatisticalDriftLabelingSchemaConfig {
9765    /// Enable statistical drift labeling.
9766    #[serde(default = "default_true_val")]
9767    pub enabled: bool,
9768
9769    /// Minimum magnitude threshold.
9770    #[serde(default = "default_min_magnitude")]
9771    pub min_magnitude_threshold: f64,
9772}
9773
9774fn default_min_magnitude() -> f64 {
9775    0.05
9776}
9777
9778impl Default for StatisticalDriftLabelingSchemaConfig {
9779    fn default() -> Self {
9780        Self {
9781            enabled: true,
9782            min_magnitude_threshold: 0.05,
9783        }
9784    }
9785}
9786
9787/// Categorical drift labeling configuration.
9788#[derive(Debug, Clone, Serialize, Deserialize)]
9789pub struct CategoricalDriftLabelingSchemaConfig {
9790    /// Enable categorical drift labeling.
9791    #[serde(default = "default_true_val")]
9792    pub enabled: bool,
9793}
9794
9795impl Default for CategoricalDriftLabelingSchemaConfig {
9796    fn default() -> Self {
9797        Self { enabled: true }
9798    }
9799}
9800
9801/// Temporal drift labeling configuration.
9802#[derive(Debug, Clone, Serialize, Deserialize)]
9803pub struct TemporalDriftLabelingSchemaConfig {
9804    /// Enable temporal drift labeling.
9805    #[serde(default = "default_true_val")]
9806    pub enabled: bool,
9807}
9808
9809impl Default for TemporalDriftLabelingSchemaConfig {
9810    fn default() -> Self {
9811        Self { enabled: true }
9812    }
9813}
9814
9815// =============================================================================
9816// Enhanced Anomaly Injection Configuration
9817// =============================================================================
9818
9819/// Enhanced anomaly injection configuration.
9820///
9821/// Provides comprehensive anomaly injection capabilities including:
9822/// - Multi-stage fraud schemes (embezzlement, revenue manipulation, kickbacks)
9823/// - Correlated anomaly injection (co-occurrence patterns, error cascades)
9824/// - Near-miss generation for false positive reduction
9825/// - Detection difficulty classification
9826/// - Context-aware injection based on entity behavior
9827#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9828pub struct EnhancedAnomalyConfig {
9829    /// Enable enhanced anomaly injection.
9830    #[serde(default)]
9831    pub enabled: bool,
9832
9833    /// Base anomaly rates.
9834    #[serde(default)]
9835    pub rates: AnomalyRateConfig,
9836
9837    /// Multi-stage fraud scheme configuration.
9838    #[serde(default)]
9839    pub multi_stage_schemes: MultiStageSchemeConfig,
9840
9841    /// Correlated anomaly injection configuration.
9842    #[serde(default)]
9843    pub correlated_injection: CorrelatedInjectionConfig,
9844
9845    /// Near-miss generation configuration.
9846    #[serde(default)]
9847    pub near_miss: NearMissConfig,
9848
9849    /// Detection difficulty classification configuration.
9850    #[serde(default)]
9851    pub difficulty_classification: DifficultyClassificationConfig,
9852
9853    /// Context-aware injection configuration.
9854    #[serde(default)]
9855    pub context_aware: ContextAwareConfig,
9856
9857    /// Enhanced labeling configuration.
9858    #[serde(default)]
9859    pub labeling: EnhancedLabelingConfig,
9860
9861    /// SOTA-12 (#140, FINDINGS §13): post-process tagger that tags the top
9862    /// `rate × n_jes` JEs whose `(source, gl_account)` is rare under the
9863    /// per-source empirical PMF as `RelationalAnomalyType::SourceConditional-
9864    /// Rarity`. `None` = disabled (default); typical value `0.01` matches the
9865    /// audit-packet hot-list size. Runs AFTER per-entry strategies — additive,
9866    /// doesn't replace them.
9867    ///
9868    /// **Phase 1 deprecation note:** this key remains the source of truth for
9869    /// back-compat. If `concentration.source_conditional_rarity.rate` is also
9870    /// set, that value wins (it's an opt-in to the unified DSL).
9871    #[serde(default)]
9872    pub source_conditional_rarity_rate: Option<f64>,
9873}
9874
9875// ---------------------------------------------------------------------------
9876// ConcentrationConfig — central post-process pass pipeline (#143, Phase 1).
9877//
9878// Design reference:
9879//   docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md
9880//
9881// Phase 1 fields: SourceConditionalRarityPass (wrapping shipped SOTA-12) +
9882// TradingPartnerPoolPass (closes SOTA-11.1 / #142).
9883// Phase 2 will add: account_pair_substitution (closes SOTA-8.1 / #141).
9884// ---------------------------------------------------------------------------
9885
9886/// Top-level configuration for the post-generation concentration pipeline.
9887///
9888/// Each sub-field is `Option<_>`; presence enables the corresponding pass.
9889/// `enabled = false` (default) disables the pipeline regardless of sub-fields,
9890/// matching the parent proposal's "opt-in" guidance.
9891#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9892pub struct ConcentrationConfig {
9893    /// Master switch. `false` (default) → pipeline is no-op.
9894    #[serde(default)]
9895    pub enabled: bool,
9896
9897    /// Phase 1: source-conditional rarity tagger (wraps shipped SOTA-12).
9898    /// If also `anomaly_injection.source_conditional_rarity_rate` is set, this
9899    /// field wins.
9900    #[serde(default)]
9901    pub source_conditional_rarity: Option<SourceConditionalRarityPassConfig>,
9902
9903    /// Phase 1: trading-partner pool resizing (closes SOTA-11.1 / #142).
9904    #[serde(default)]
9905    pub trading_partner_pool: Option<TradingPartnerPoolPassConfig>,
9906
9907    /// Phase 2: account-pair substitution against a corpus-derived PMF
9908    /// (closes SOTA-8.1 / #141). Defers to Phase 2 when wired.
9909    #[serde(default)]
9910    pub account_pair_substitution: Option<AccountPairSubstitutionPassConfig>,
9911
9912    /// Phase 1.5: blank-source post-process (closes SOTA-7 / #132). Nulls
9913    /// `sap_source_code` on a configurable fraction of JEs to match the
9914    /// corpus's ~21% blank-source rate. Runs LAST in the pipeline so
9915    /// earlier passes (`SourceConditionalRarityPass`,
9916    /// `AccountPairSubstitutionPass`) see full source coverage.
9917    #[serde(default)]
9918    pub source_blanking: Option<SourceBlankingPassConfig>,
9919
9920    /// v5.30 B2 (#154) — heavy-tail consolidation outlier emission.
9921    /// Reshapes a small fraction of JEs (~0.001 typical) into
9922    /// multi-100-line postings touching bridge / suspense / clearing
9923    /// accounts. Lifts the synthetic relational_score p99/max
9924    /// percentiles toward the corpus's heavy tail without distorting
9925    /// the median. Honors `anomaly_injection.consolidation_outlier_rate`
9926    /// as a back-compat alias — if both are set, this DSL field wins.
9927    #[serde(default)]
9928    pub consolidation_outlier: Option<ConsolidationOutlierPassConfig>,
9929}
9930
9931/// Per-pass config for SourceConditionalRarityPass.
9932#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9933pub struct SourceConditionalRarityPassConfig {
9934    /// Fraction of input JEs to tag (typically `0.01`).
9935    pub rate: f64,
9936    /// Optional min surprise floor (Σ -log P(account|source)). Default `5.0`.
9937    #[serde(default)]
9938    pub min_surprise: Option<f64>,
9939    /// Per-source line-count floor (sources below have unreliable PMFs).
9940    /// Default `5`.
9941    #[serde(default)]
9942    pub min_per_source_lines: Option<u32>,
9943}
9944
9945/// Per-pass config for TradingPartnerPoolPass.
9946#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9947pub struct TradingPartnerPoolPassConfig {
9948    /// Target distinct trading-partner pool size. `0` is clamped to `1` at
9949    /// runtime. Typical corpus value `~12`; synthetic default `~40`.
9950    pub target_size: usize,
9951}
9952
9953/// Per-pass config for SourceBlankingPass (Phase 1.5 / SOTA-7).
9954#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9955pub struct SourceBlankingPassConfig {
9956    /// Fraction of JEs whose `sap_source_code` should be nulled. Typical
9957    /// corpus-matching value `0.21`. Clamped to `[0.0, 1.0]` at runtime.
9958    pub rate: f64,
9959}
9960
9961/// Per-pass config for ConsolidationOutlierPass (v5.30 B2 / #154).
9962///
9963/// Amounts are stored as `f64` here (schema layer) and converted to
9964/// `rust_decimal::Decimal` in the pass constructor. The synthetic
9965/// bridge-line amounts are log-uniformly distributed and the pp99
9966/// metric reads scale rather than exact value, so the f64 → Decimal
9967/// rounding is irrelevant for the heavy-tail signal we're trying to
9968/// emit. Keeping `rust_decimal` out of `datasynth-config`'s direct
9969/// dependency graph avoids a downstream crate-pull.
9970#[derive(Debug, Clone, Serialize, Deserialize)]
9971pub struct ConsolidationOutlierPassConfig {
9972    /// Fraction of JEs to reshape into multi-line bridge-account
9973    /// postings. Typical baseline `0.001` (one in a thousand).
9974    /// Clamped to `[0.0, 1.0]` at runtime.
9975    pub rate: f64,
9976    /// Minimum number of extra lines to append (always rounded up to
9977    /// an even number — lines are added in balanced DR/CR pairs).
9978    /// Default `50`.
9979    #[serde(default = "default_consolidation_outlier_min_lines")]
9980    pub min_extra_lines: usize,
9981    /// Maximum number of extra lines to append. Default `200`.
9982    #[serde(default = "default_consolidation_outlier_max_lines")]
9983    pub max_extra_lines: usize,
9984    /// Bridge / suspense / clearing accounts the appended lines use.
9985    /// Empty (default) → use the pass's built-in default list.
9986    #[serde(default)]
9987    pub bridge_accounts: Vec<String>,
9988    /// Minimum bridge-line amount (log-uniform draw). Default `100.0`.
9989    #[serde(default = "default_consolidation_outlier_min_amount")]
9990    pub line_amount_min: f64,
9991    /// Maximum bridge-line amount (log-uniform draw). Default `50_000.0`.
9992    #[serde(default = "default_consolidation_outlier_max_amount")]
9993    pub line_amount_max: f64,
9994}
9995
9996impl Default for ConsolidationOutlierPassConfig {
9997    fn default() -> Self {
9998        Self {
9999            rate: 0.0,
10000            min_extra_lines: default_consolidation_outlier_min_lines(),
10001            max_extra_lines: default_consolidation_outlier_max_lines(),
10002            bridge_accounts: Vec::new(),
10003            line_amount_min: default_consolidation_outlier_min_amount(),
10004            line_amount_max: default_consolidation_outlier_max_amount(),
10005        }
10006    }
10007}
10008
10009fn default_consolidation_outlier_min_lines() -> usize {
10010    50
10011}
10012fn default_consolidation_outlier_max_lines() -> usize {
10013    200
10014}
10015fn default_consolidation_outlier_min_amount() -> f64 {
10016    100.0
10017}
10018fn default_consolidation_outlier_max_amount() -> f64 {
10019    50_000.0
10020}
10021
10022/// Per-pass config for AccountPairSubstitutionPass (Phase 2).
10023#[derive(Debug, Clone, Default, Serialize, Deserialize)]
10024pub struct AccountPairSubstitutionPassConfig {
10025    /// Path to a per-source pair-PMF JSON (produced by
10026    /// `corpus_vs_synth_gap.py --emit-pair-pmf`). Aggregate-only; never
10027    /// contains row content or client identifiers.
10028    pub pmf_path: String,
10029    /// JEs whose dominant (debit, credit) pair has corpus probability ≥ this
10030    /// threshold are left alone (they're already plausible). Default `0.005`.
10031    #[serde(default)]
10032    pub rarity_threshold: Option<f64>,
10033    /// When substituting, draw from the top-K corpus pairs (weighted by
10034    /// probability). Default `10`.
10035    #[serde(default)]
10036    pub top_k: Option<usize>,
10037}
10038
10039/// Base anomaly rate configuration.
10040#[derive(Debug, Clone, Serialize, Deserialize)]
10041pub struct AnomalyRateConfig {
10042    /// Total anomaly rate (0.0 to 1.0).
10043    #[serde(default = "default_total_anomaly_rate")]
10044    pub total_rate: f64,
10045
10046    /// Fraud anomaly rate.
10047    #[serde(default = "default_fraud_anomaly_rate")]
10048    pub fraud_rate: f64,
10049
10050    /// Error anomaly rate.
10051    #[serde(default = "default_error_anomaly_rate")]
10052    pub error_rate: f64,
10053
10054    /// Process issue rate.
10055    #[serde(default = "default_process_anomaly_rate")]
10056    pub process_rate: f64,
10057
10058    /// v5.30 B2 (#154) — heavy-tail outlier JE rate. Fraction of
10059    /// emitted JEs that get re-shaped into multi-100-line postings
10060    /// touching bridge accounts. Models real consolidation entries,
10061    /// period-end accruals, and manual reclasses. Default `0.0`
10062    /// preserves v5.29 byte-identical output; opt in (e.g. `0.001`)
10063    /// to lift synth p99/max relational_score percentiles toward the
10064    /// reference shard's heavy tail (~20× vs synth's default ~12×).
10065    #[serde(
10066        default = "default_consolidation_outlier_rate",
10067        alias = "consolidationOutlierRate"
10068    )]
10069    pub consolidation_outlier_rate: f64,
10070}
10071
10072fn default_total_anomaly_rate() -> f64 {
10073    0.03
10074}
10075fn default_fraud_anomaly_rate() -> f64 {
10076    0.01
10077}
10078fn default_error_anomaly_rate() -> f64 {
10079    0.015
10080}
10081fn default_process_anomaly_rate() -> f64 {
10082    0.005
10083}
10084fn default_consolidation_outlier_rate() -> f64 {
10085    // v5.30 B2 (#154) — small baseline so the synth heavy tail moves
10086    // toward the reference shard's p99 / max relational_score (~20×)
10087    // without overpowering downstream metrics. At 0.001, roughly 1 in
10088    // 1000 JEs becomes a multi-100-line bridge-account posting —
10089    // matching the observed corpus frequency of period-close /
10090    // manual reclass / consolidation entries.
10091    0.001
10092}
10093
10094impl Default for AnomalyRateConfig {
10095    fn default() -> Self {
10096        Self {
10097            total_rate: default_total_anomaly_rate(),
10098            fraud_rate: default_fraud_anomaly_rate(),
10099            error_rate: default_error_anomaly_rate(),
10100            process_rate: default_process_anomaly_rate(),
10101            consolidation_outlier_rate: default_consolidation_outlier_rate(),
10102        }
10103    }
10104}
10105
10106/// Multi-stage fraud scheme configuration.
10107#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10108pub struct MultiStageSchemeConfig {
10109    /// Enable multi-stage fraud schemes.
10110    #[serde(default)]
10111    pub enabled: bool,
10112
10113    /// Embezzlement scheme configuration.
10114    #[serde(default)]
10115    pub embezzlement: EmbezzlementSchemeConfig,
10116
10117    /// Revenue manipulation scheme configuration.
10118    #[serde(default)]
10119    pub revenue_manipulation: RevenueManipulationSchemeConfig,
10120
10121    /// Vendor kickback scheme configuration.
10122    #[serde(default)]
10123    pub kickback: KickbackSchemeConfig,
10124}
10125
10126/// Embezzlement scheme configuration.
10127#[derive(Debug, Clone, Serialize, Deserialize)]
10128pub struct EmbezzlementSchemeConfig {
10129    /// Probability of starting an embezzlement scheme per perpetrator per year.
10130    #[serde(default = "default_embezzlement_probability")]
10131    pub probability: f64,
10132
10133    /// Testing stage configuration.
10134    #[serde(default)]
10135    pub testing_stage: SchemeStageConfig,
10136
10137    /// Escalation stage configuration.
10138    #[serde(default)]
10139    pub escalation_stage: SchemeStageConfig,
10140
10141    /// Acceleration stage configuration.
10142    #[serde(default)]
10143    pub acceleration_stage: SchemeStageConfig,
10144
10145    /// Desperation stage configuration.
10146    #[serde(default)]
10147    pub desperation_stage: SchemeStageConfig,
10148}
10149
10150fn default_embezzlement_probability() -> f64 {
10151    0.02
10152}
10153
10154impl Default for EmbezzlementSchemeConfig {
10155    fn default() -> Self {
10156        Self {
10157            probability: default_embezzlement_probability(),
10158            testing_stage: SchemeStageConfig {
10159                duration_months: 2,
10160                amount_min: 100.0,
10161                amount_max: 500.0,
10162                transaction_count_min: 2,
10163                transaction_count_max: 5,
10164                difficulty: "hard".to_string(),
10165            },
10166            escalation_stage: SchemeStageConfig {
10167                duration_months: 6,
10168                amount_min: 500.0,
10169                amount_max: 2000.0,
10170                transaction_count_min: 3,
10171                transaction_count_max: 8,
10172                difficulty: "moderate".to_string(),
10173            },
10174            acceleration_stage: SchemeStageConfig {
10175                duration_months: 3,
10176                amount_min: 2000.0,
10177                amount_max: 10000.0,
10178                transaction_count_min: 5,
10179                transaction_count_max: 12,
10180                difficulty: "easy".to_string(),
10181            },
10182            desperation_stage: SchemeStageConfig {
10183                duration_months: 1,
10184                amount_min: 10000.0,
10185                amount_max: 50000.0,
10186                transaction_count_min: 3,
10187                transaction_count_max: 6,
10188                difficulty: "trivial".to_string(),
10189            },
10190        }
10191    }
10192}
10193
10194/// Revenue manipulation scheme configuration.
10195#[derive(Debug, Clone, Serialize, Deserialize)]
10196pub struct RevenueManipulationSchemeConfig {
10197    /// Probability of starting a revenue manipulation scheme per period.
10198    #[serde(default = "default_revenue_manipulation_probability")]
10199    pub probability: f64,
10200
10201    /// Early revenue recognition inflation target (Q4).
10202    #[serde(default = "default_early_recognition_target")]
10203    pub early_recognition_target: f64,
10204
10205    /// Expense deferral inflation target (Q1).
10206    #[serde(default = "default_expense_deferral_target")]
10207    pub expense_deferral_target: f64,
10208
10209    /// Reserve release inflation target (Q2).
10210    #[serde(default = "default_reserve_release_target")]
10211    pub reserve_release_target: f64,
10212
10213    /// Channel stuffing inflation target (Q4).
10214    #[serde(default = "default_channel_stuffing_target")]
10215    pub channel_stuffing_target: f64,
10216}
10217
10218fn default_revenue_manipulation_probability() -> f64 {
10219    0.01
10220}
10221fn default_early_recognition_target() -> f64 {
10222    0.02
10223}
10224fn default_expense_deferral_target() -> f64 {
10225    0.03
10226}
10227fn default_reserve_release_target() -> f64 {
10228    0.02
10229}
10230fn default_channel_stuffing_target() -> f64 {
10231    0.05
10232}
10233
10234impl Default for RevenueManipulationSchemeConfig {
10235    fn default() -> Self {
10236        Self {
10237            probability: default_revenue_manipulation_probability(),
10238            early_recognition_target: default_early_recognition_target(),
10239            expense_deferral_target: default_expense_deferral_target(),
10240            reserve_release_target: default_reserve_release_target(),
10241            channel_stuffing_target: default_channel_stuffing_target(),
10242        }
10243    }
10244}
10245
10246/// Vendor kickback scheme configuration.
10247#[derive(Debug, Clone, Serialize, Deserialize)]
10248pub struct KickbackSchemeConfig {
10249    /// Probability of starting a kickback scheme.
10250    #[serde(default = "default_kickback_probability")]
10251    pub probability: f64,
10252
10253    /// Minimum price inflation percentage.
10254    #[serde(default = "default_kickback_inflation_min")]
10255    pub inflation_min: f64,
10256
10257    /// Maximum price inflation percentage.
10258    #[serde(default = "default_kickback_inflation_max")]
10259    pub inflation_max: f64,
10260
10261    /// Kickback percentage (of inflation).
10262    #[serde(default = "default_kickback_percent")]
10263    pub kickback_percent: f64,
10264
10265    /// Setup duration in months.
10266    #[serde(default = "default_kickback_setup_months")]
10267    pub setup_months: u32,
10268
10269    /// Main operation duration in months.
10270    #[serde(default = "default_kickback_operation_months")]
10271    pub operation_months: u32,
10272}
10273
10274fn default_kickback_probability() -> f64 {
10275    0.01
10276}
10277fn default_kickback_inflation_min() -> f64 {
10278    0.10
10279}
10280fn default_kickback_inflation_max() -> f64 {
10281    0.25
10282}
10283fn default_kickback_percent() -> f64 {
10284    0.50
10285}
10286fn default_kickback_setup_months() -> u32 {
10287    3
10288}
10289fn default_kickback_operation_months() -> u32 {
10290    12
10291}
10292
10293impl Default for KickbackSchemeConfig {
10294    fn default() -> Self {
10295        Self {
10296            probability: default_kickback_probability(),
10297            inflation_min: default_kickback_inflation_min(),
10298            inflation_max: default_kickback_inflation_max(),
10299            kickback_percent: default_kickback_percent(),
10300            setup_months: default_kickback_setup_months(),
10301            operation_months: default_kickback_operation_months(),
10302        }
10303    }
10304}
10305
10306/// Individual scheme stage configuration.
10307#[derive(Debug, Clone, Serialize, Deserialize)]
10308pub struct SchemeStageConfig {
10309    /// Duration in months.
10310    pub duration_months: u32,
10311
10312    /// Minimum transaction amount.
10313    pub amount_min: f64,
10314
10315    /// Maximum transaction amount.
10316    pub amount_max: f64,
10317
10318    /// Minimum number of transactions.
10319    pub transaction_count_min: u32,
10320
10321    /// Maximum number of transactions.
10322    pub transaction_count_max: u32,
10323
10324    /// Detection difficulty level (trivial, easy, moderate, hard, expert).
10325    pub difficulty: String,
10326}
10327
10328impl Default for SchemeStageConfig {
10329    fn default() -> Self {
10330        Self {
10331            duration_months: 3,
10332            amount_min: 100.0,
10333            amount_max: 1000.0,
10334            transaction_count_min: 2,
10335            transaction_count_max: 10,
10336            difficulty: "moderate".to_string(),
10337        }
10338    }
10339}
10340
10341/// Correlated anomaly injection configuration.
10342#[derive(Debug, Clone, Serialize, Deserialize)]
10343pub struct CorrelatedInjectionConfig {
10344    /// Enable correlated anomaly injection.
10345    #[serde(default)]
10346    pub enabled: bool,
10347
10348    /// Enable fraud concealment co-occurrence patterns.
10349    #[serde(default = "default_true_val")]
10350    pub fraud_concealment: bool,
10351
10352    /// Enable error cascade patterns.
10353    #[serde(default = "default_true_val")]
10354    pub error_cascade: bool,
10355
10356    /// Enable temporal clustering (period-end spikes).
10357    #[serde(default = "default_true_val")]
10358    pub temporal_clustering: bool,
10359
10360    /// Temporal clustering configuration.
10361    #[serde(default)]
10362    pub temporal_clustering_config: TemporalClusteringConfig,
10363
10364    /// Co-occurrence patterns.
10365    #[serde(default)]
10366    pub co_occurrence_patterns: Vec<CoOccurrencePatternConfig>,
10367}
10368
10369impl Default for CorrelatedInjectionConfig {
10370    fn default() -> Self {
10371        Self {
10372            enabled: false,
10373            fraud_concealment: true,
10374            error_cascade: true,
10375            temporal_clustering: true,
10376            temporal_clustering_config: TemporalClusteringConfig::default(),
10377            co_occurrence_patterns: Vec::new(),
10378        }
10379    }
10380}
10381
10382/// Temporal clustering configuration.
10383#[derive(Debug, Clone, Serialize, Deserialize)]
10384pub struct TemporalClusteringConfig {
10385    /// Period-end error multiplier.
10386    #[serde(default = "default_period_end_multiplier")]
10387    pub period_end_multiplier: f64,
10388
10389    /// Number of business days before period end to apply multiplier.
10390    #[serde(default = "default_period_end_days")]
10391    pub period_end_days: u32,
10392
10393    /// Quarter-end additional multiplier.
10394    #[serde(default = "default_quarter_end_multiplier")]
10395    pub quarter_end_multiplier: f64,
10396
10397    /// Year-end additional multiplier.
10398    #[serde(default = "default_year_end_multiplier")]
10399    pub year_end_multiplier: f64,
10400}
10401
10402fn default_period_end_multiplier() -> f64 {
10403    2.5
10404}
10405fn default_period_end_days() -> u32 {
10406    5
10407}
10408fn default_quarter_end_multiplier() -> f64 {
10409    1.5
10410}
10411fn default_year_end_multiplier() -> f64 {
10412    2.0
10413}
10414
10415impl Default for TemporalClusteringConfig {
10416    fn default() -> Self {
10417        Self {
10418            period_end_multiplier: default_period_end_multiplier(),
10419            period_end_days: default_period_end_days(),
10420            quarter_end_multiplier: default_quarter_end_multiplier(),
10421            year_end_multiplier: default_year_end_multiplier(),
10422        }
10423    }
10424}
10425
10426/// Co-occurrence pattern configuration.
10427#[derive(Debug, Clone, Serialize, Deserialize)]
10428pub struct CoOccurrencePatternConfig {
10429    /// Pattern name.
10430    pub name: String,
10431
10432    /// Primary anomaly type that triggers the pattern.
10433    pub primary_type: String,
10434
10435    /// Correlated anomalies.
10436    pub correlated: Vec<CorrelatedAnomalyConfig>,
10437}
10438
10439/// Correlated anomaly configuration.
10440#[derive(Debug, Clone, Serialize, Deserialize)]
10441pub struct CorrelatedAnomalyConfig {
10442    /// Anomaly type.
10443    pub anomaly_type: String,
10444
10445    /// Probability of occurrence (0.0 to 1.0).
10446    pub probability: f64,
10447
10448    /// Minimum lag in days.
10449    pub lag_days_min: i32,
10450
10451    /// Maximum lag in days.
10452    pub lag_days_max: i32,
10453}
10454
10455/// Near-miss generation configuration.
10456#[derive(Debug, Clone, Serialize, Deserialize)]
10457pub struct NearMissConfig {
10458    /// Enable near-miss generation.
10459    #[serde(default)]
10460    pub enabled: bool,
10461
10462    /// Proportion of "anomalies" that are actually near-misses (0.0 to 1.0).
10463    #[serde(default = "default_near_miss_proportion")]
10464    pub proportion: f64,
10465
10466    /// Enable near-duplicate pattern.
10467    #[serde(default = "default_true_val")]
10468    pub near_duplicate: bool,
10469
10470    /// Near-duplicate date difference range in days.
10471    #[serde(default)]
10472    pub near_duplicate_days: NearDuplicateDaysConfig,
10473
10474    /// Enable threshold proximity pattern.
10475    #[serde(default = "default_true_val")]
10476    pub threshold_proximity: bool,
10477
10478    /// Threshold proximity range (e.g., 0.90-0.99 of threshold).
10479    #[serde(default)]
10480    pub threshold_proximity_range: ThresholdProximityRangeConfig,
10481
10482    /// Enable unusual but legitimate patterns.
10483    #[serde(default = "default_true_val")]
10484    pub unusual_legitimate: bool,
10485
10486    /// Types of unusual legitimate patterns to generate.
10487    #[serde(default = "default_unusual_legitimate_types")]
10488    pub unusual_legitimate_types: Vec<String>,
10489
10490    /// Enable corrected error patterns.
10491    #[serde(default = "default_true_val")]
10492    pub corrected_errors: bool,
10493
10494    /// Corrected error correction lag range in days.
10495    #[serde(default)]
10496    pub corrected_error_lag: CorrectedErrorLagConfig,
10497}
10498
10499fn default_near_miss_proportion() -> f64 {
10500    0.30
10501}
10502
10503fn default_unusual_legitimate_types() -> Vec<String> {
10504    vec![
10505        "year_end_bonus".to_string(),
10506        "contract_prepayment".to_string(),
10507        "insurance_claim".to_string(),
10508        "settlement_payment".to_string(),
10509    ]
10510}
10511
10512impl Default for NearMissConfig {
10513    fn default() -> Self {
10514        Self {
10515            enabled: false,
10516            proportion: default_near_miss_proportion(),
10517            near_duplicate: true,
10518            near_duplicate_days: NearDuplicateDaysConfig::default(),
10519            threshold_proximity: true,
10520            threshold_proximity_range: ThresholdProximityRangeConfig::default(),
10521            unusual_legitimate: true,
10522            unusual_legitimate_types: default_unusual_legitimate_types(),
10523            corrected_errors: true,
10524            corrected_error_lag: CorrectedErrorLagConfig::default(),
10525        }
10526    }
10527}
10528
10529/// Near-duplicate days configuration.
10530#[derive(Debug, Clone, Serialize, Deserialize)]
10531pub struct NearDuplicateDaysConfig {
10532    /// Minimum days apart.
10533    #[serde(default = "default_near_duplicate_min")]
10534    pub min: u32,
10535
10536    /// Maximum days apart.
10537    #[serde(default = "default_near_duplicate_max")]
10538    pub max: u32,
10539}
10540
10541fn default_near_duplicate_min() -> u32 {
10542    1
10543}
10544fn default_near_duplicate_max() -> u32 {
10545    3
10546}
10547
10548impl Default for NearDuplicateDaysConfig {
10549    fn default() -> Self {
10550        Self {
10551            min: default_near_duplicate_min(),
10552            max: default_near_duplicate_max(),
10553        }
10554    }
10555}
10556
10557/// Threshold proximity range configuration.
10558#[derive(Debug, Clone, Serialize, Deserialize)]
10559pub struct ThresholdProximityRangeConfig {
10560    /// Minimum proximity (e.g., 0.90 = 90% of threshold).
10561    #[serde(default = "default_threshold_proximity_min")]
10562    pub min: f64,
10563
10564    /// Maximum proximity (e.g., 0.99 = 99% of threshold).
10565    #[serde(default = "default_threshold_proximity_max")]
10566    pub max: f64,
10567}
10568
10569fn default_threshold_proximity_min() -> f64 {
10570    0.90
10571}
10572fn default_threshold_proximity_max() -> f64 {
10573    0.99
10574}
10575
10576impl Default for ThresholdProximityRangeConfig {
10577    fn default() -> Self {
10578        Self {
10579            min: default_threshold_proximity_min(),
10580            max: default_threshold_proximity_max(),
10581        }
10582    }
10583}
10584
10585/// Corrected error lag configuration.
10586#[derive(Debug, Clone, Serialize, Deserialize)]
10587pub struct CorrectedErrorLagConfig {
10588    /// Minimum correction lag in days.
10589    #[serde(default = "default_corrected_error_lag_min")]
10590    pub min: u32,
10591
10592    /// Maximum correction lag in days.
10593    #[serde(default = "default_corrected_error_lag_max")]
10594    pub max: u32,
10595}
10596
10597fn default_corrected_error_lag_min() -> u32 {
10598    1
10599}
10600fn default_corrected_error_lag_max() -> u32 {
10601    5
10602}
10603
10604impl Default for CorrectedErrorLagConfig {
10605    fn default() -> Self {
10606        Self {
10607            min: default_corrected_error_lag_min(),
10608            max: default_corrected_error_lag_max(),
10609        }
10610    }
10611}
10612
10613/// Detection difficulty classification configuration.
10614#[derive(Debug, Clone, Serialize, Deserialize)]
10615pub struct DifficultyClassificationConfig {
10616    /// Enable detection difficulty classification.
10617    #[serde(default)]
10618    pub enabled: bool,
10619
10620    /// Target distribution of difficulty levels.
10621    #[serde(default)]
10622    pub target_distribution: DifficultyDistributionConfig,
10623}
10624
10625impl Default for DifficultyClassificationConfig {
10626    fn default() -> Self {
10627        Self {
10628            enabled: true,
10629            target_distribution: DifficultyDistributionConfig::default(),
10630        }
10631    }
10632}
10633
10634/// Target distribution of detection difficulty levels.
10635#[derive(Debug, Clone, Serialize, Deserialize)]
10636pub struct DifficultyDistributionConfig {
10637    /// Proportion of trivial anomalies (expected 99% detection).
10638    #[serde(default = "default_difficulty_trivial")]
10639    pub trivial: f64,
10640
10641    /// Proportion of easy anomalies (expected 90% detection).
10642    #[serde(default = "default_difficulty_easy")]
10643    pub easy: f64,
10644
10645    /// Proportion of moderate anomalies (expected 70% detection).
10646    #[serde(default = "default_difficulty_moderate")]
10647    pub moderate: f64,
10648
10649    /// Proportion of hard anomalies (expected 40% detection).
10650    #[serde(default = "default_difficulty_hard")]
10651    pub hard: f64,
10652
10653    /// Proportion of expert anomalies (expected 15% detection).
10654    #[serde(default = "default_difficulty_expert")]
10655    pub expert: f64,
10656}
10657
10658fn default_difficulty_trivial() -> f64 {
10659    0.15
10660}
10661fn default_difficulty_easy() -> f64 {
10662    0.25
10663}
10664fn default_difficulty_moderate() -> f64 {
10665    0.30
10666}
10667fn default_difficulty_hard() -> f64 {
10668    0.20
10669}
10670fn default_difficulty_expert() -> f64 {
10671    0.10
10672}
10673
10674impl Default for DifficultyDistributionConfig {
10675    fn default() -> Self {
10676        Self {
10677            trivial: default_difficulty_trivial(),
10678            easy: default_difficulty_easy(),
10679            moderate: default_difficulty_moderate(),
10680            hard: default_difficulty_hard(),
10681            expert: default_difficulty_expert(),
10682        }
10683    }
10684}
10685
10686/// Context-aware injection configuration.
10687#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10688pub struct ContextAwareConfig {
10689    /// Enable context-aware injection.
10690    #[serde(default)]
10691    pub enabled: bool,
10692
10693    /// Vendor-specific anomaly rules.
10694    #[serde(default)]
10695    pub vendor_rules: VendorAnomalyRulesConfig,
10696
10697    /// Employee-specific anomaly rules.
10698    #[serde(default)]
10699    pub employee_rules: EmployeeAnomalyRulesConfig,
10700
10701    /// Account-specific anomaly rules.
10702    #[serde(default)]
10703    pub account_rules: AccountAnomalyRulesConfig,
10704
10705    /// Behavioral baseline configuration.
10706    #[serde(default)]
10707    pub behavioral_baseline: BehavioralBaselineConfig,
10708}
10709
10710/// Vendor-specific anomaly rules configuration.
10711#[derive(Debug, Clone, Serialize, Deserialize)]
10712pub struct VendorAnomalyRulesConfig {
10713    /// Error rate multiplier for new vendors (< threshold days).
10714    #[serde(default = "default_new_vendor_multiplier")]
10715    pub new_vendor_error_multiplier: f64,
10716
10717    /// Days threshold for "new" vendor classification.
10718    #[serde(default = "default_new_vendor_threshold")]
10719    pub new_vendor_threshold_days: u32,
10720
10721    /// Error rate multiplier for international vendors.
10722    #[serde(default = "default_international_multiplier")]
10723    pub international_error_multiplier: f64,
10724
10725    /// Strategic vendor anomaly types (may differ from general vendors).
10726    #[serde(default = "default_strategic_vendor_types")]
10727    pub strategic_vendor_anomaly_types: Vec<String>,
10728}
10729
10730fn default_new_vendor_multiplier() -> f64 {
10731    2.5
10732}
10733fn default_new_vendor_threshold() -> u32 {
10734    90
10735}
10736fn default_international_multiplier() -> f64 {
10737    1.5
10738}
10739fn default_strategic_vendor_types() -> Vec<String> {
10740    vec![
10741        "pricing_dispute".to_string(),
10742        "contract_violation".to_string(),
10743    ]
10744}
10745
10746impl Default for VendorAnomalyRulesConfig {
10747    fn default() -> Self {
10748        Self {
10749            new_vendor_error_multiplier: default_new_vendor_multiplier(),
10750            new_vendor_threshold_days: default_new_vendor_threshold(),
10751            international_error_multiplier: default_international_multiplier(),
10752            strategic_vendor_anomaly_types: default_strategic_vendor_types(),
10753        }
10754    }
10755}
10756
10757/// Employee-specific anomaly rules configuration.
10758#[derive(Debug, Clone, Serialize, Deserialize)]
10759pub struct EmployeeAnomalyRulesConfig {
10760    /// Error rate for new employees (< threshold days).
10761    #[serde(default = "default_new_employee_rate")]
10762    pub new_employee_error_rate: f64,
10763
10764    /// Days threshold for "new" employee classification.
10765    #[serde(default = "default_new_employee_threshold")]
10766    pub new_employee_threshold_days: u32,
10767
10768    /// Transaction volume threshold for fatigue errors.
10769    #[serde(default = "default_volume_fatigue_threshold")]
10770    pub volume_fatigue_threshold: u32,
10771
10772    /// Error rate multiplier when primary approver is absent.
10773    #[serde(default = "default_coverage_multiplier")]
10774    pub coverage_error_multiplier: f64,
10775}
10776
10777fn default_new_employee_rate() -> f64 {
10778    0.05
10779}
10780fn default_new_employee_threshold() -> u32 {
10781    180
10782}
10783fn default_volume_fatigue_threshold() -> u32 {
10784    50
10785}
10786fn default_coverage_multiplier() -> f64 {
10787    1.8
10788}
10789
10790impl Default for EmployeeAnomalyRulesConfig {
10791    fn default() -> Self {
10792        Self {
10793            new_employee_error_rate: default_new_employee_rate(),
10794            new_employee_threshold_days: default_new_employee_threshold(),
10795            volume_fatigue_threshold: default_volume_fatigue_threshold(),
10796            coverage_error_multiplier: default_coverage_multiplier(),
10797        }
10798    }
10799}
10800
10801/// Account-specific anomaly rules configuration.
10802#[derive(Debug, Clone, Serialize, Deserialize)]
10803pub struct AccountAnomalyRulesConfig {
10804    /// Error rate multiplier for high-risk accounts.
10805    #[serde(default = "default_high_risk_multiplier")]
10806    pub high_risk_account_multiplier: f64,
10807
10808    /// Account codes considered high-risk.
10809    #[serde(default = "default_high_risk_accounts")]
10810    pub high_risk_accounts: Vec<String>,
10811
10812    /// Error rate multiplier for suspense accounts.
10813    #[serde(default = "default_suspense_multiplier")]
10814    pub suspense_account_multiplier: f64,
10815
10816    /// Account codes considered suspense accounts.
10817    #[serde(default = "default_suspense_accounts")]
10818    pub suspense_accounts: Vec<String>,
10819
10820    /// Error rate multiplier for intercompany accounts.
10821    #[serde(default = "default_intercompany_multiplier")]
10822    pub intercompany_account_multiplier: f64,
10823}
10824
10825fn default_high_risk_multiplier() -> f64 {
10826    2.0
10827}
10828fn default_high_risk_accounts() -> Vec<String> {
10829    vec![
10830        "1100".to_string(), // AR Control
10831        "2000".to_string(), // AP Control
10832        "3000".to_string(), // Cash
10833    ]
10834}
10835fn default_suspense_multiplier() -> f64 {
10836    3.0
10837}
10838fn default_suspense_accounts() -> Vec<String> {
10839    vec!["9999".to_string(), "9998".to_string()]
10840}
10841fn default_intercompany_multiplier() -> f64 {
10842    1.5
10843}
10844
10845impl Default for AccountAnomalyRulesConfig {
10846    fn default() -> Self {
10847        Self {
10848            high_risk_account_multiplier: default_high_risk_multiplier(),
10849            high_risk_accounts: default_high_risk_accounts(),
10850            suspense_account_multiplier: default_suspense_multiplier(),
10851            suspense_accounts: default_suspense_accounts(),
10852            intercompany_account_multiplier: default_intercompany_multiplier(),
10853        }
10854    }
10855}
10856
10857/// Behavioral baseline configuration.
10858#[derive(Debug, Clone, Serialize, Deserialize)]
10859pub struct BehavioralBaselineConfig {
10860    /// Enable behavioral baseline tracking.
10861    #[serde(default)]
10862    pub enabled: bool,
10863
10864    /// Number of days to build baseline from.
10865    #[serde(default = "default_baseline_period")]
10866    pub baseline_period_days: u32,
10867
10868    /// Standard deviation threshold for amount anomalies.
10869    #[serde(default = "default_deviation_threshold")]
10870    pub deviation_threshold_std: f64,
10871
10872    /// Standard deviation threshold for frequency anomalies.
10873    #[serde(default = "default_frequency_deviation")]
10874    pub frequency_deviation_threshold: f64,
10875}
10876
10877fn default_baseline_period() -> u32 {
10878    90
10879}
10880fn default_deviation_threshold() -> f64 {
10881    3.0
10882}
10883fn default_frequency_deviation() -> f64 {
10884    2.0
10885}
10886
10887impl Default for BehavioralBaselineConfig {
10888    fn default() -> Self {
10889        Self {
10890            enabled: false,
10891            baseline_period_days: default_baseline_period(),
10892            deviation_threshold_std: default_deviation_threshold(),
10893            frequency_deviation_threshold: default_frequency_deviation(),
10894        }
10895    }
10896}
10897
10898/// Enhanced labeling configuration.
10899#[derive(Debug, Clone, Serialize, Deserialize)]
10900pub struct EnhancedLabelingConfig {
10901    /// Enable severity scoring.
10902    #[serde(default = "default_true_val")]
10903    pub severity_scoring: bool,
10904
10905    /// Enable difficulty classification.
10906    #[serde(default = "default_true_val")]
10907    pub difficulty_classification: bool,
10908
10909    /// Materiality thresholds for severity classification.
10910    #[serde(default)]
10911    pub materiality_thresholds: MaterialityThresholdsConfig,
10912}
10913
10914impl Default for EnhancedLabelingConfig {
10915    fn default() -> Self {
10916        Self {
10917            severity_scoring: true,
10918            difficulty_classification: true,
10919            materiality_thresholds: MaterialityThresholdsConfig::default(),
10920        }
10921    }
10922}
10923
10924/// Materiality thresholds configuration.
10925#[derive(Debug, Clone, Serialize, Deserialize)]
10926pub struct MaterialityThresholdsConfig {
10927    /// Threshold for trivial impact (as percentage of total).
10928    #[serde(default = "default_materiality_trivial")]
10929    pub trivial: f64,
10930
10931    /// Threshold for immaterial impact.
10932    #[serde(default = "default_materiality_immaterial")]
10933    pub immaterial: f64,
10934
10935    /// Threshold for material impact.
10936    #[serde(default = "default_materiality_material")]
10937    pub material: f64,
10938
10939    /// Threshold for highly material impact.
10940    #[serde(default = "default_materiality_highly_material")]
10941    pub highly_material: f64,
10942}
10943
10944fn default_materiality_trivial() -> f64 {
10945    0.001
10946}
10947fn default_materiality_immaterial() -> f64 {
10948    0.01
10949}
10950fn default_materiality_material() -> f64 {
10951    0.05
10952}
10953fn default_materiality_highly_material() -> f64 {
10954    0.10
10955}
10956
10957impl Default for MaterialityThresholdsConfig {
10958    fn default() -> Self {
10959        Self {
10960            trivial: default_materiality_trivial(),
10961            immaterial: default_materiality_immaterial(),
10962            material: default_materiality_material(),
10963            highly_material: default_materiality_highly_material(),
10964        }
10965    }
10966}
10967
10968// =============================================================================
10969// Industry-Specific Configuration
10970// =============================================================================
10971
10972/// Industry-specific transaction and anomaly generation configuration.
10973///
10974/// This configuration enables generation of industry-authentic:
10975/// - Transaction types with appropriate terminology
10976/// - Master data (BOM, routings, clinical codes, etc.)
10977/// - Industry-specific anomaly patterns
10978/// - Regulatory framework compliance
10979#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10980pub struct IndustrySpecificConfig {
10981    /// Enable industry-specific generation.
10982    #[serde(default)]
10983    pub enabled: bool,
10984
10985    /// Manufacturing industry settings.
10986    #[serde(default)]
10987    pub manufacturing: ManufacturingConfig,
10988
10989    /// Retail industry settings.
10990    #[serde(default)]
10991    pub retail: RetailConfig,
10992
10993    /// Healthcare industry settings.
10994    #[serde(default)]
10995    pub healthcare: HealthcareConfig,
10996
10997    /// Technology industry settings.
10998    #[serde(default)]
10999    pub technology: TechnologyConfig,
11000
11001    /// Financial services industry settings.
11002    #[serde(default)]
11003    pub financial_services: FinancialServicesConfig,
11004
11005    /// Professional services industry settings.
11006    #[serde(default)]
11007    pub professional_services: ProfessionalServicesConfig,
11008}
11009
11010/// Manufacturing industry configuration.
11011#[derive(Debug, Clone, Serialize, Deserialize)]
11012pub struct ManufacturingConfig {
11013    /// Enable manufacturing-specific generation.
11014    #[serde(default)]
11015    pub enabled: bool,
11016
11017    /// Bill of Materials depth (typical: 3-7).
11018    #[serde(default = "default_bom_depth")]
11019    pub bom_depth: u32,
11020
11021    /// Whether to use just-in-time inventory.
11022    #[serde(default)]
11023    pub just_in_time: bool,
11024
11025    /// Production order types to generate.
11026    #[serde(default = "default_production_order_types")]
11027    pub production_order_types: Vec<String>,
11028
11029    /// Quality framework (ISO_9001, Six_Sigma, etc.).
11030    #[serde(default)]
11031    pub quality_framework: Option<String>,
11032
11033    /// Number of supplier tiers to model (1-3).
11034    #[serde(default = "default_supplier_tiers")]
11035    pub supplier_tiers: u32,
11036
11037    /// Standard cost update frequency.
11038    #[serde(default = "default_cost_frequency")]
11039    pub standard_cost_frequency: String,
11040
11041    /// Target yield rate (0.95-0.99 typical).
11042    #[serde(default = "default_yield_rate")]
11043    pub target_yield_rate: f64,
11044
11045    /// Scrap percentage threshold for alerts.
11046    #[serde(default = "default_scrap_threshold")]
11047    pub scrap_alert_threshold: f64,
11048
11049    /// Manufacturing anomaly injection rates.
11050    #[serde(default)]
11051    pub anomaly_rates: ManufacturingAnomalyRates,
11052
11053    /// Cost accounting configuration (WIP → FG → COGS pipeline).
11054    #[serde(default)]
11055    pub cost_accounting: ManufacturingCostAccountingConfig,
11056}
11057
11058/// Configuration for manufacturing cost accounting JE generation.
11059#[derive(Debug, Clone, Serialize, Deserialize)]
11060pub struct ManufacturingCostAccountingConfig {
11061    /// Enable multi-stage cost flow (WIP → FG → COGS) instead of flat JEs.
11062    #[serde(default = "default_true")]
11063    pub enabled: bool,
11064
11065    /// Generate standard cost variance JEs.
11066    #[serde(default = "default_true")]
11067    pub variance_accounts_enabled: bool,
11068
11069    /// Generate warranty provisions from quality inspection failures.
11070    #[serde(default = "default_true")]
11071    pub warranty_provisions_enabled: bool,
11072
11073    /// Minimum defect rate (0.0-1.0) to trigger warranty provision generation.
11074    #[serde(default = "default_warranty_defect_threshold")]
11075    pub warranty_defect_threshold: f64,
11076}
11077
11078fn default_warranty_defect_threshold() -> f64 {
11079    0.01
11080}
11081
11082impl Default for ManufacturingCostAccountingConfig {
11083    fn default() -> Self {
11084        Self {
11085            enabled: true,
11086            variance_accounts_enabled: true,
11087            warranty_provisions_enabled: true,
11088            warranty_defect_threshold: 0.01,
11089        }
11090    }
11091}
11092
11093fn default_bom_depth() -> u32 {
11094    4
11095}
11096
11097fn default_production_order_types() -> Vec<String> {
11098    vec![
11099        "standard".to_string(),
11100        "rework".to_string(),
11101        "prototype".to_string(),
11102    ]
11103}
11104
11105fn default_supplier_tiers() -> u32 {
11106    2
11107}
11108
11109fn default_cost_frequency() -> String {
11110    "quarterly".to_string()
11111}
11112
11113fn default_yield_rate() -> f64 {
11114    0.97
11115}
11116
11117fn default_scrap_threshold() -> f64 {
11118    0.03
11119}
11120
11121impl Default for ManufacturingConfig {
11122    fn default() -> Self {
11123        Self {
11124            enabled: false,
11125            bom_depth: default_bom_depth(),
11126            just_in_time: false,
11127            production_order_types: default_production_order_types(),
11128            quality_framework: Some("ISO_9001".to_string()),
11129            supplier_tiers: default_supplier_tiers(),
11130            standard_cost_frequency: default_cost_frequency(),
11131            target_yield_rate: default_yield_rate(),
11132            scrap_alert_threshold: default_scrap_threshold(),
11133            anomaly_rates: ManufacturingAnomalyRates::default(),
11134            cost_accounting: ManufacturingCostAccountingConfig::default(),
11135        }
11136    }
11137}
11138
11139/// Manufacturing anomaly injection rates.
11140#[derive(Debug, Clone, Serialize, Deserialize)]
11141pub struct ManufacturingAnomalyRates {
11142    /// Yield manipulation rate.
11143    #[serde(default = "default_mfg_yield_rate")]
11144    pub yield_manipulation: f64,
11145
11146    /// Labor misallocation rate.
11147    #[serde(default = "default_mfg_labor_rate")]
11148    pub labor_misallocation: f64,
11149
11150    /// Phantom production rate.
11151    #[serde(default = "default_mfg_phantom_rate")]
11152    pub phantom_production: f64,
11153
11154    /// Standard cost manipulation rate.
11155    #[serde(default = "default_mfg_cost_rate")]
11156    pub standard_cost_manipulation: f64,
11157
11158    /// Inventory fraud rate.
11159    #[serde(default = "default_mfg_inventory_rate")]
11160    pub inventory_fraud: f64,
11161}
11162
11163fn default_mfg_yield_rate() -> f64 {
11164    0.015
11165}
11166
11167fn default_mfg_labor_rate() -> f64 {
11168    0.02
11169}
11170
11171fn default_mfg_phantom_rate() -> f64 {
11172    0.005
11173}
11174
11175fn default_mfg_cost_rate() -> f64 {
11176    0.01
11177}
11178
11179fn default_mfg_inventory_rate() -> f64 {
11180    0.008
11181}
11182
11183impl Default for ManufacturingAnomalyRates {
11184    fn default() -> Self {
11185        Self {
11186            yield_manipulation: default_mfg_yield_rate(),
11187            labor_misallocation: default_mfg_labor_rate(),
11188            phantom_production: default_mfg_phantom_rate(),
11189            standard_cost_manipulation: default_mfg_cost_rate(),
11190            inventory_fraud: default_mfg_inventory_rate(),
11191        }
11192    }
11193}
11194
11195/// Retail industry configuration.
11196#[derive(Debug, Clone, Serialize, Deserialize)]
11197pub struct RetailConfig {
11198    /// Enable retail-specific generation.
11199    #[serde(default)]
11200    pub enabled: bool,
11201
11202    /// Store type distribution.
11203    #[serde(default)]
11204    pub store_types: RetailStoreTypeConfig,
11205
11206    /// Average daily transactions per store.
11207    #[serde(default = "default_retail_daily_txns")]
11208    pub avg_daily_transactions: u32,
11209
11210    /// Enable loss prevention tracking.
11211    #[serde(default = "default_true")]
11212    pub loss_prevention: bool,
11213
11214    /// Shrinkage rate (0.01-0.03 typical).
11215    #[serde(default = "default_shrinkage_rate")]
11216    pub shrinkage_rate: f64,
11217
11218    /// Retail anomaly injection rates.
11219    #[serde(default)]
11220    pub anomaly_rates: RetailAnomalyRates,
11221}
11222
11223fn default_retail_daily_txns() -> u32 {
11224    500
11225}
11226
11227fn default_shrinkage_rate() -> f64 {
11228    0.015
11229}
11230
11231impl Default for RetailConfig {
11232    fn default() -> Self {
11233        Self {
11234            enabled: false,
11235            store_types: RetailStoreTypeConfig::default(),
11236            avg_daily_transactions: default_retail_daily_txns(),
11237            loss_prevention: true,
11238            shrinkage_rate: default_shrinkage_rate(),
11239            anomaly_rates: RetailAnomalyRates::default(),
11240        }
11241    }
11242}
11243
11244/// Retail store type distribution.
11245#[derive(Debug, Clone, Serialize, Deserialize)]
11246pub struct RetailStoreTypeConfig {
11247    /// Percentage of flagship stores.
11248    #[serde(default = "default_flagship_pct")]
11249    pub flagship: f64,
11250
11251    /// Percentage of regional stores.
11252    #[serde(default = "default_regional_pct")]
11253    pub regional: f64,
11254
11255    /// Percentage of outlet stores.
11256    #[serde(default = "default_outlet_pct")]
11257    pub outlet: f64,
11258
11259    /// Percentage of e-commerce.
11260    #[serde(default = "default_ecommerce_pct")]
11261    pub ecommerce: f64,
11262}
11263
11264fn default_flagship_pct() -> f64 {
11265    0.10
11266}
11267
11268fn default_regional_pct() -> f64 {
11269    0.50
11270}
11271
11272fn default_outlet_pct() -> f64 {
11273    0.25
11274}
11275
11276fn default_ecommerce_pct() -> f64 {
11277    0.15
11278}
11279
11280impl Default for RetailStoreTypeConfig {
11281    fn default() -> Self {
11282        Self {
11283            flagship: default_flagship_pct(),
11284            regional: default_regional_pct(),
11285            outlet: default_outlet_pct(),
11286            ecommerce: default_ecommerce_pct(),
11287        }
11288    }
11289}
11290
11291/// Retail anomaly injection rates.
11292#[derive(Debug, Clone, Serialize, Deserialize)]
11293pub struct RetailAnomalyRates {
11294    /// Sweethearting rate.
11295    #[serde(default = "default_sweethearting_rate")]
11296    pub sweethearting: f64,
11297
11298    /// Skimming rate.
11299    #[serde(default = "default_skimming_rate")]
11300    pub skimming: f64,
11301
11302    /// Refund fraud rate.
11303    #[serde(default = "default_refund_fraud_rate")]
11304    pub refund_fraud: f64,
11305
11306    /// Void abuse rate.
11307    #[serde(default = "default_void_abuse_rate")]
11308    pub void_abuse: f64,
11309
11310    /// Gift card fraud rate.
11311    #[serde(default = "default_gift_card_rate")]
11312    pub gift_card_fraud: f64,
11313
11314    /// Vendor kickback rate.
11315    #[serde(default = "default_retail_kickback_rate")]
11316    pub vendor_kickback: f64,
11317}
11318
11319fn default_sweethearting_rate() -> f64 {
11320    0.02
11321}
11322
11323fn default_skimming_rate() -> f64 {
11324    0.005
11325}
11326
11327fn default_refund_fraud_rate() -> f64 {
11328    0.015
11329}
11330
11331fn default_void_abuse_rate() -> f64 {
11332    0.01
11333}
11334
11335fn default_gift_card_rate() -> f64 {
11336    0.008
11337}
11338
11339fn default_retail_kickback_rate() -> f64 {
11340    0.003
11341}
11342
11343impl Default for RetailAnomalyRates {
11344    fn default() -> Self {
11345        Self {
11346            sweethearting: default_sweethearting_rate(),
11347            skimming: default_skimming_rate(),
11348            refund_fraud: default_refund_fraud_rate(),
11349            void_abuse: default_void_abuse_rate(),
11350            gift_card_fraud: default_gift_card_rate(),
11351            vendor_kickback: default_retail_kickback_rate(),
11352        }
11353    }
11354}
11355
11356/// Healthcare industry configuration.
11357#[derive(Debug, Clone, Serialize, Deserialize)]
11358pub struct HealthcareConfig {
11359    /// Enable healthcare-specific generation.
11360    #[serde(default)]
11361    pub enabled: bool,
11362
11363    /// Healthcare facility type.
11364    #[serde(default = "default_facility_type")]
11365    pub facility_type: String,
11366
11367    /// Payer mix distribution.
11368    #[serde(default)]
11369    pub payer_mix: HealthcarePayerMix,
11370
11371    /// Coding systems enabled.
11372    #[serde(default)]
11373    pub coding_systems: HealthcareCodingSystems,
11374
11375    /// Healthcare compliance settings.
11376    #[serde(default)]
11377    pub compliance: HealthcareComplianceConfig,
11378
11379    /// Average daily encounters.
11380    #[serde(default = "default_daily_encounters")]
11381    pub avg_daily_encounters: u32,
11382
11383    /// Average charges per encounter.
11384    #[serde(default = "default_charges_per_encounter")]
11385    pub avg_charges_per_encounter: u32,
11386
11387    /// Denial rate (0.0-1.0).
11388    #[serde(default = "default_hc_denial_rate")]
11389    pub denial_rate: f64,
11390
11391    /// Bad debt rate (0.0-1.0).
11392    #[serde(default = "default_hc_bad_debt_rate")]
11393    pub bad_debt_rate: f64,
11394
11395    /// Charity care rate (0.0-1.0).
11396    #[serde(default = "default_hc_charity_care_rate")]
11397    pub charity_care_rate: f64,
11398
11399    /// Healthcare anomaly injection rates.
11400    #[serde(default)]
11401    pub anomaly_rates: HealthcareAnomalyRates,
11402}
11403
11404fn default_facility_type() -> String {
11405    "hospital".to_string()
11406}
11407
11408fn default_daily_encounters() -> u32 {
11409    150
11410}
11411
11412fn default_charges_per_encounter() -> u32 {
11413    8
11414}
11415
11416fn default_hc_denial_rate() -> f64 {
11417    0.05
11418}
11419
11420fn default_hc_bad_debt_rate() -> f64 {
11421    0.03
11422}
11423
11424fn default_hc_charity_care_rate() -> f64 {
11425    0.02
11426}
11427
11428impl Default for HealthcareConfig {
11429    fn default() -> Self {
11430        Self {
11431            enabled: false,
11432            facility_type: default_facility_type(),
11433            payer_mix: HealthcarePayerMix::default(),
11434            coding_systems: HealthcareCodingSystems::default(),
11435            compliance: HealthcareComplianceConfig::default(),
11436            avg_daily_encounters: default_daily_encounters(),
11437            avg_charges_per_encounter: default_charges_per_encounter(),
11438            denial_rate: default_hc_denial_rate(),
11439            bad_debt_rate: default_hc_bad_debt_rate(),
11440            charity_care_rate: default_hc_charity_care_rate(),
11441            anomaly_rates: HealthcareAnomalyRates::default(),
11442        }
11443    }
11444}
11445
11446/// Healthcare payer mix distribution.
11447#[derive(Debug, Clone, Serialize, Deserialize)]
11448pub struct HealthcarePayerMix {
11449    /// Medicare percentage.
11450    #[serde(default = "default_medicare_pct")]
11451    pub medicare: f64,
11452
11453    /// Medicaid percentage.
11454    #[serde(default = "default_medicaid_pct")]
11455    pub medicaid: f64,
11456
11457    /// Commercial insurance percentage.
11458    #[serde(default = "default_commercial_pct")]
11459    pub commercial: f64,
11460
11461    /// Self-pay percentage.
11462    #[serde(default = "default_self_pay_pct")]
11463    pub self_pay: f64,
11464}
11465
11466fn default_medicare_pct() -> f64 {
11467    0.40
11468}
11469
11470fn default_medicaid_pct() -> f64 {
11471    0.20
11472}
11473
11474fn default_commercial_pct() -> f64 {
11475    0.30
11476}
11477
11478fn default_self_pay_pct() -> f64 {
11479    0.10
11480}
11481
11482impl Default for HealthcarePayerMix {
11483    fn default() -> Self {
11484        Self {
11485            medicare: default_medicare_pct(),
11486            medicaid: default_medicaid_pct(),
11487            commercial: default_commercial_pct(),
11488            self_pay: default_self_pay_pct(),
11489        }
11490    }
11491}
11492
11493/// Healthcare coding systems configuration.
11494#[derive(Debug, Clone, Serialize, Deserialize)]
11495pub struct HealthcareCodingSystems {
11496    /// Enable ICD-10 diagnosis coding.
11497    #[serde(default = "default_true")]
11498    pub icd10: bool,
11499
11500    /// Enable CPT procedure coding.
11501    #[serde(default = "default_true")]
11502    pub cpt: bool,
11503
11504    /// Enable DRG grouping.
11505    #[serde(default = "default_true")]
11506    pub drg: bool,
11507
11508    /// Enable HCPCS Level II coding.
11509    #[serde(default = "default_true")]
11510    pub hcpcs: bool,
11511
11512    /// Enable revenue codes.
11513    #[serde(default = "default_true")]
11514    pub revenue_codes: bool,
11515}
11516
11517impl Default for HealthcareCodingSystems {
11518    fn default() -> Self {
11519        Self {
11520            icd10: true,
11521            cpt: true,
11522            drg: true,
11523            hcpcs: true,
11524            revenue_codes: true,
11525        }
11526    }
11527}
11528
11529/// Healthcare compliance configuration.
11530#[derive(Debug, Clone, Serialize, Deserialize)]
11531pub struct HealthcareComplianceConfig {
11532    /// Enable HIPAA compliance.
11533    #[serde(default = "default_true")]
11534    pub hipaa: bool,
11535
11536    /// Enable Stark Law compliance.
11537    #[serde(default = "default_true")]
11538    pub stark_law: bool,
11539
11540    /// Enable Anti-Kickback Statute compliance.
11541    #[serde(default = "default_true")]
11542    pub anti_kickback: bool,
11543
11544    /// Enable False Claims Act compliance.
11545    #[serde(default = "default_true")]
11546    pub false_claims_act: bool,
11547
11548    /// Enable EMTALA compliance (for hospitals).
11549    #[serde(default = "default_true")]
11550    pub emtala: bool,
11551}
11552
11553impl Default for HealthcareComplianceConfig {
11554    fn default() -> Self {
11555        Self {
11556            hipaa: true,
11557            stark_law: true,
11558            anti_kickback: true,
11559            false_claims_act: true,
11560            emtala: true,
11561        }
11562    }
11563}
11564
11565/// Healthcare anomaly injection rates.
11566#[derive(Debug, Clone, Serialize, Deserialize)]
11567pub struct HealthcareAnomalyRates {
11568    /// Upcoding rate.
11569    #[serde(default = "default_upcoding_rate")]
11570    pub upcoding: f64,
11571
11572    /// Unbundling rate.
11573    #[serde(default = "default_unbundling_rate")]
11574    pub unbundling: f64,
11575
11576    /// Phantom billing rate.
11577    #[serde(default = "default_phantom_billing_rate")]
11578    pub phantom_billing: f64,
11579
11580    /// Kickback rate.
11581    #[serde(default = "default_healthcare_kickback_rate")]
11582    pub kickbacks: f64,
11583
11584    /// Duplicate billing rate.
11585    #[serde(default = "default_duplicate_billing_rate")]
11586    pub duplicate_billing: f64,
11587
11588    /// Medical necessity abuse rate.
11589    #[serde(default = "default_med_necessity_rate")]
11590    pub medical_necessity_abuse: f64,
11591}
11592
11593fn default_upcoding_rate() -> f64 {
11594    0.02
11595}
11596
11597fn default_unbundling_rate() -> f64 {
11598    0.015
11599}
11600
11601fn default_phantom_billing_rate() -> f64 {
11602    0.005
11603}
11604
11605fn default_healthcare_kickback_rate() -> f64 {
11606    0.003
11607}
11608
11609fn default_duplicate_billing_rate() -> f64 {
11610    0.008
11611}
11612
11613fn default_med_necessity_rate() -> f64 {
11614    0.01
11615}
11616
11617impl Default for HealthcareAnomalyRates {
11618    fn default() -> Self {
11619        Self {
11620            upcoding: default_upcoding_rate(),
11621            unbundling: default_unbundling_rate(),
11622            phantom_billing: default_phantom_billing_rate(),
11623            kickbacks: default_healthcare_kickback_rate(),
11624            duplicate_billing: default_duplicate_billing_rate(),
11625            medical_necessity_abuse: default_med_necessity_rate(),
11626        }
11627    }
11628}
11629
11630/// Technology industry configuration.
11631#[derive(Debug, Clone, Serialize, Deserialize)]
11632pub struct TechnologyConfig {
11633    /// Enable technology-specific generation.
11634    #[serde(default)]
11635    pub enabled: bool,
11636
11637    /// Revenue model type.
11638    #[serde(default = "default_revenue_model")]
11639    pub revenue_model: String,
11640
11641    /// Subscription revenue percentage (for SaaS).
11642    #[serde(default = "default_subscription_pct")]
11643    pub subscription_revenue_pct: f64,
11644
11645    /// License revenue percentage.
11646    #[serde(default = "default_license_pct")]
11647    pub license_revenue_pct: f64,
11648
11649    /// Services revenue percentage.
11650    #[serde(default = "default_services_pct")]
11651    pub services_revenue_pct: f64,
11652
11653    /// R&D capitalization settings.
11654    #[serde(default)]
11655    pub rd_capitalization: RdCapitalizationConfig,
11656
11657    /// Technology anomaly injection rates.
11658    #[serde(default)]
11659    pub anomaly_rates: TechnologyAnomalyRates,
11660}
11661
11662fn default_revenue_model() -> String {
11663    "saas".to_string()
11664}
11665
11666fn default_subscription_pct() -> f64 {
11667    0.60
11668}
11669
11670fn default_license_pct() -> f64 {
11671    0.25
11672}
11673
11674fn default_services_pct() -> f64 {
11675    0.15
11676}
11677
11678impl Default for TechnologyConfig {
11679    fn default() -> Self {
11680        Self {
11681            enabled: false,
11682            revenue_model: default_revenue_model(),
11683            subscription_revenue_pct: default_subscription_pct(),
11684            license_revenue_pct: default_license_pct(),
11685            services_revenue_pct: default_services_pct(),
11686            rd_capitalization: RdCapitalizationConfig::default(),
11687            anomaly_rates: TechnologyAnomalyRates::default(),
11688        }
11689    }
11690}
11691
11692/// R&D capitalization configuration.
11693#[derive(Debug, Clone, Serialize, Deserialize)]
11694pub struct RdCapitalizationConfig {
11695    /// Enable R&D capitalization.
11696    #[serde(default = "default_true")]
11697    pub enabled: bool,
11698
11699    /// Capitalization rate (0.0-1.0).
11700    #[serde(default = "default_cap_rate")]
11701    pub capitalization_rate: f64,
11702
11703    /// Useful life in years.
11704    #[serde(default = "default_useful_life")]
11705    pub useful_life_years: u32,
11706}
11707
11708fn default_cap_rate() -> f64 {
11709    0.30
11710}
11711
11712fn default_useful_life() -> u32 {
11713    3
11714}
11715
11716impl Default for RdCapitalizationConfig {
11717    fn default() -> Self {
11718        Self {
11719            enabled: true,
11720            capitalization_rate: default_cap_rate(),
11721            useful_life_years: default_useful_life(),
11722        }
11723    }
11724}
11725
11726/// Technology anomaly injection rates.
11727#[derive(Debug, Clone, Serialize, Deserialize)]
11728pub struct TechnologyAnomalyRates {
11729    /// Premature revenue recognition rate.
11730    #[serde(default = "default_premature_rev_rate")]
11731    pub premature_revenue: f64,
11732
11733    /// Side letter abuse rate.
11734    #[serde(default = "default_side_letter_rate")]
11735    pub side_letter_abuse: f64,
11736
11737    /// Channel stuffing rate.
11738    #[serde(default = "default_channel_stuffing_rate")]
11739    pub channel_stuffing: f64,
11740
11741    /// Improper capitalization rate.
11742    #[serde(default = "default_improper_cap_rate")]
11743    pub improper_capitalization: f64,
11744}
11745
11746fn default_premature_rev_rate() -> f64 {
11747    0.015
11748}
11749
11750fn default_side_letter_rate() -> f64 {
11751    0.008
11752}
11753
11754fn default_channel_stuffing_rate() -> f64 {
11755    0.01
11756}
11757
11758fn default_improper_cap_rate() -> f64 {
11759    0.012
11760}
11761
11762impl Default for TechnologyAnomalyRates {
11763    fn default() -> Self {
11764        Self {
11765            premature_revenue: default_premature_rev_rate(),
11766            side_letter_abuse: default_side_letter_rate(),
11767            channel_stuffing: default_channel_stuffing_rate(),
11768            improper_capitalization: default_improper_cap_rate(),
11769        }
11770    }
11771}
11772
11773/// Financial services industry configuration.
11774#[derive(Debug, Clone, Serialize, Deserialize)]
11775pub struct FinancialServicesConfig {
11776    /// Enable financial services-specific generation.
11777    #[serde(default)]
11778    pub enabled: bool,
11779
11780    /// Financial institution type.
11781    #[serde(default = "default_fi_type")]
11782    pub institution_type: String,
11783
11784    /// Regulatory framework.
11785    #[serde(default = "default_fi_regulatory")]
11786    pub regulatory_framework: String,
11787
11788    /// Financial services anomaly injection rates.
11789    #[serde(default)]
11790    pub anomaly_rates: FinancialServicesAnomalyRates,
11791}
11792
11793fn default_fi_type() -> String {
11794    "commercial_bank".to_string()
11795}
11796
11797fn default_fi_regulatory() -> String {
11798    "us_banking".to_string()
11799}
11800
11801impl Default for FinancialServicesConfig {
11802    fn default() -> Self {
11803        Self {
11804            enabled: false,
11805            institution_type: default_fi_type(),
11806            regulatory_framework: default_fi_regulatory(),
11807            anomaly_rates: FinancialServicesAnomalyRates::default(),
11808        }
11809    }
11810}
11811
11812/// Financial services anomaly injection rates.
11813#[derive(Debug, Clone, Serialize, Deserialize)]
11814pub struct FinancialServicesAnomalyRates {
11815    /// Loan fraud rate.
11816    #[serde(default = "default_loan_fraud_rate")]
11817    pub loan_fraud: f64,
11818
11819    /// Trading fraud rate.
11820    #[serde(default = "default_trading_fraud_rate")]
11821    pub trading_fraud: f64,
11822
11823    /// Insurance fraud rate.
11824    #[serde(default = "default_insurance_fraud_rate")]
11825    pub insurance_fraud: f64,
11826
11827    /// Account manipulation rate.
11828    #[serde(default = "default_account_manip_rate")]
11829    pub account_manipulation: f64,
11830}
11831
11832fn default_loan_fraud_rate() -> f64 {
11833    0.01
11834}
11835
11836fn default_trading_fraud_rate() -> f64 {
11837    0.008
11838}
11839
11840fn default_insurance_fraud_rate() -> f64 {
11841    0.012
11842}
11843
11844fn default_account_manip_rate() -> f64 {
11845    0.005
11846}
11847
11848impl Default for FinancialServicesAnomalyRates {
11849    fn default() -> Self {
11850        Self {
11851            loan_fraud: default_loan_fraud_rate(),
11852            trading_fraud: default_trading_fraud_rate(),
11853            insurance_fraud: default_insurance_fraud_rate(),
11854            account_manipulation: default_account_manip_rate(),
11855        }
11856    }
11857}
11858
11859/// Professional services industry configuration.
11860#[derive(Debug, Clone, Serialize, Deserialize)]
11861pub struct ProfessionalServicesConfig {
11862    /// Enable professional services-specific generation.
11863    #[serde(default)]
11864    pub enabled: bool,
11865
11866    /// Firm type.
11867    #[serde(default = "default_firm_type")]
11868    pub firm_type: String,
11869
11870    /// Billing model.
11871    #[serde(default = "default_billing_model")]
11872    pub billing_model: String,
11873
11874    /// Average hourly rate.
11875    #[serde(default = "default_hourly_rate")]
11876    pub avg_hourly_rate: f64,
11877
11878    /// Trust account settings (for law firms).
11879    #[serde(default)]
11880    pub trust_accounting: TrustAccountingConfig,
11881
11882    /// Professional services anomaly injection rates.
11883    #[serde(default)]
11884    pub anomaly_rates: ProfessionalServicesAnomalyRates,
11885}
11886
11887fn default_firm_type() -> String {
11888    "consulting".to_string()
11889}
11890
11891fn default_billing_model() -> String {
11892    "time_and_materials".to_string()
11893}
11894
11895fn default_hourly_rate() -> f64 {
11896    250.0
11897}
11898
11899impl Default for ProfessionalServicesConfig {
11900    fn default() -> Self {
11901        Self {
11902            enabled: false,
11903            firm_type: default_firm_type(),
11904            billing_model: default_billing_model(),
11905            avg_hourly_rate: default_hourly_rate(),
11906            trust_accounting: TrustAccountingConfig::default(),
11907            anomaly_rates: ProfessionalServicesAnomalyRates::default(),
11908        }
11909    }
11910}
11911
11912/// Trust accounting configuration for law firms.
11913#[derive(Debug, Clone, Serialize, Deserialize)]
11914pub struct TrustAccountingConfig {
11915    /// Enable trust accounting.
11916    #[serde(default)]
11917    pub enabled: bool,
11918
11919    /// Require three-way reconciliation.
11920    #[serde(default = "default_true")]
11921    pub require_three_way_reconciliation: bool,
11922}
11923
11924impl Default for TrustAccountingConfig {
11925    fn default() -> Self {
11926        Self {
11927            enabled: false,
11928            require_three_way_reconciliation: true,
11929        }
11930    }
11931}
11932
11933/// Professional services anomaly injection rates.
11934#[derive(Debug, Clone, Serialize, Deserialize)]
11935pub struct ProfessionalServicesAnomalyRates {
11936    /// Time billing fraud rate.
11937    #[serde(default = "default_time_fraud_rate")]
11938    pub time_billing_fraud: f64,
11939
11940    /// Expense report fraud rate.
11941    #[serde(default = "default_expense_fraud_rate")]
11942    pub expense_fraud: f64,
11943
11944    /// Trust misappropriation rate.
11945    #[serde(default = "default_trust_misappropriation_rate")]
11946    pub trust_misappropriation: f64,
11947}
11948
11949fn default_time_fraud_rate() -> f64 {
11950    0.02
11951}
11952
11953fn default_expense_fraud_rate() -> f64 {
11954    0.015
11955}
11956
11957fn default_trust_misappropriation_rate() -> f64 {
11958    0.003
11959}
11960
11961impl Default for ProfessionalServicesAnomalyRates {
11962    fn default() -> Self {
11963        Self {
11964            time_billing_fraud: default_time_fraud_rate(),
11965            expense_fraud: default_expense_fraud_rate(),
11966            trust_misappropriation: default_trust_misappropriation_rate(),
11967        }
11968    }
11969}
11970
11971/// Fingerprint privacy configuration for extraction and synthesis.
11972///
11973/// Controls the privacy parameters used when extracting fingerprints
11974/// from sensitive data. Supports predefined levels or custom (epsilon, delta) tuples.
11975///
11976/// ```yaml
11977/// fingerprint_privacy:
11978///   level: custom
11979///   epsilon: 0.5
11980///   delta: 1.0e-5
11981///   k_anonymity: 10
11982///   composition_method: renyi_dp
11983/// ```
11984#[derive(Debug, Clone, Serialize, Deserialize)]
11985pub struct FingerprintPrivacyConfig {
11986    /// Privacy level preset. Use "custom" for user-specified epsilon/delta.
11987    #[serde(default)]
11988    pub level: String,
11989    /// Custom epsilon value (only used when level = "custom").
11990    #[serde(default = "default_epsilon")]
11991    pub epsilon: f64,
11992    /// Custom delta value for (epsilon, delta)-DP (only used with RDP/zCDP).
11993    #[serde(default = "default_delta")]
11994    pub delta: f64,
11995    /// K-anonymity threshold.
11996    #[serde(default = "default_k_anonymity")]
11997    pub k_anonymity: u32,
11998    /// Composition method: "naive", "advanced", "renyi_dp", "zcdp".
11999    #[serde(default)]
12000    pub composition_method: String,
12001}
12002
12003fn default_epsilon() -> f64 {
12004    1.0
12005}
12006
12007fn default_delta() -> f64 {
12008    1e-5
12009}
12010
12011fn default_k_anonymity() -> u32 {
12012    5
12013}
12014
12015impl Default for FingerprintPrivacyConfig {
12016    fn default() -> Self {
12017        Self {
12018            level: "standard".to_string(),
12019            epsilon: default_epsilon(),
12020            delta: default_delta(),
12021            k_anonymity: default_k_anonymity(),
12022            composition_method: "naive".to_string(),
12023        }
12024    }
12025}
12026
12027/// Quality gates configuration for pass/fail thresholds on generation runs.
12028///
12029/// ```yaml
12030/// quality_gates:
12031///   enabled: true
12032///   profile: strict  # strict, default, lenient, custom
12033///   fail_on_violation: true
12034///   custom_gates:
12035///     - name: benford_compliance
12036///       metric: benford_mad
12037///       threshold: 0.015
12038///       comparison: lte
12039/// ```
12040#[derive(Debug, Clone, Serialize, Deserialize)]
12041pub struct QualityGatesSchemaConfig {
12042    /// Enable quality gate evaluation.
12043    #[serde(default)]
12044    pub enabled: bool,
12045    /// Gate profile: "strict", "default", "lenient", or "custom".
12046    #[serde(default = "default_gate_profile_name")]
12047    pub profile: String,
12048    /// Whether to fail the generation on gate violations.
12049    #[serde(default)]
12050    pub fail_on_violation: bool,
12051    /// Custom gate definitions (used when profile = "custom").
12052    #[serde(default)]
12053    pub custom_gates: Vec<QualityGateEntry>,
12054}
12055
12056fn default_gate_profile_name() -> String {
12057    "default".to_string()
12058}
12059
12060impl Default for QualityGatesSchemaConfig {
12061    fn default() -> Self {
12062        Self {
12063            enabled: false,
12064            profile: default_gate_profile_name(),
12065            fail_on_violation: false,
12066            custom_gates: Vec::new(),
12067        }
12068    }
12069}
12070
12071/// A single quality gate entry in configuration.
12072#[derive(Debug, Clone, Serialize, Deserialize)]
12073pub struct QualityGateEntry {
12074    /// Gate name.
12075    pub name: String,
12076    /// Metric to check: benford_mad, balance_coherence, document_chain_integrity,
12077    /// correlation_preservation, temporal_consistency, privacy_mia_auc,
12078    /// completion_rate, duplicate_rate, referential_integrity, ic_match_rate.
12079    pub metric: String,
12080    /// Threshold value.
12081    pub threshold: f64,
12082    /// Upper threshold for "between" comparison.
12083    #[serde(default)]
12084    pub upper_threshold: Option<f64>,
12085    /// Comparison operator: "gte", "lte", "eq", "between".
12086    #[serde(default = "default_gate_comparison")]
12087    pub comparison: String,
12088}
12089
12090fn default_gate_comparison() -> String {
12091    "gte".to_string()
12092}
12093
12094/// Compliance configuration for regulatory requirements.
12095///
12096/// ```yaml
12097/// compliance:
12098///   content_marking:
12099///     enabled: true
12100///     format: embedded  # embedded, sidecar, both
12101///   article10_report: true
12102/// ```
12103#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12104pub struct ComplianceSchemaConfig {
12105    /// Synthetic content marking configuration (EU AI Act Article 50).
12106    #[serde(default)]
12107    pub content_marking: ContentMarkingSchemaConfig,
12108    /// Generate Article 10 data governance report.
12109    #[serde(default)]
12110    pub article10_report: bool,
12111    /// Certificate configuration for proving DP guarantees.
12112    #[serde(default)]
12113    pub certificates: CertificateSchemaConfig,
12114}
12115
12116/// Configuration for synthetic data certificates.
12117#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12118pub struct CertificateSchemaConfig {
12119    /// Whether certificate generation is enabled.
12120    #[serde(default)]
12121    pub enabled: bool,
12122    /// Environment variable name for the signing key.
12123    #[serde(default)]
12124    pub signing_key_env: Option<String>,
12125    /// Whether to include quality metrics in the certificate.
12126    #[serde(default)]
12127    pub include_quality_metrics: bool,
12128}
12129
12130/// Content marking configuration for synthetic data output.
12131#[derive(Debug, Clone, Serialize, Deserialize)]
12132pub struct ContentMarkingSchemaConfig {
12133    /// Whether content marking is enabled.
12134    #[serde(default = "default_true")]
12135    pub enabled: bool,
12136    /// Marking format: "embedded", "sidecar", or "both".
12137    #[serde(default = "default_marking_format")]
12138    pub format: String,
12139}
12140
12141fn default_marking_format() -> String {
12142    "embedded".to_string()
12143}
12144
12145impl Default for ContentMarkingSchemaConfig {
12146    fn default() -> Self {
12147        Self {
12148            enabled: true,
12149            format: default_marking_format(),
12150        }
12151    }
12152}
12153
12154/// Webhook notification configuration.
12155#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12156pub struct WebhookSchemaConfig {
12157    /// Whether webhooks are enabled.
12158    #[serde(default)]
12159    pub enabled: bool,
12160    /// Webhook endpoint configurations.
12161    #[serde(default)]
12162    pub endpoints: Vec<WebhookEndpointConfig>,
12163}
12164
12165/// Configuration for a single webhook endpoint.
12166#[derive(Debug, Clone, Serialize, Deserialize)]
12167pub struct WebhookEndpointConfig {
12168    /// Target URL for the webhook.
12169    pub url: String,
12170    /// Event types this endpoint subscribes to.
12171    #[serde(default)]
12172    pub events: Vec<String>,
12173    /// Optional secret for HMAC-SHA256 signature.
12174    #[serde(default)]
12175    pub secret: Option<String>,
12176    /// Maximum retry attempts (default: 3).
12177    #[serde(default = "default_webhook_retries")]
12178    pub max_retries: u32,
12179    /// Timeout in seconds (default: 10).
12180    #[serde(default = "default_webhook_timeout")]
12181    pub timeout_secs: u64,
12182}
12183
12184fn default_webhook_retries() -> u32 {
12185    3
12186}
12187fn default_webhook_timeout() -> u64 {
12188    10
12189}
12190
12191// ===== Enterprise Process Chain Config Structs =====
12192
12193// ----- Source-to-Pay (S2C/S2P) -----
12194
12195/// Source-to-Pay configuration covering the entire sourcing lifecycle.
12196#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12197pub struct SourceToPayConfig {
12198    /// Enable source-to-pay generation
12199    #[serde(default)]
12200    pub enabled: bool,
12201    /// Spend analysis configuration
12202    #[serde(default)]
12203    pub spend_analysis: SpendAnalysisConfig,
12204    /// Sourcing project configuration
12205    #[serde(default)]
12206    pub sourcing: SourcingConfig,
12207    /// Supplier qualification configuration
12208    #[serde(default)]
12209    pub qualification: QualificationConfig,
12210    /// RFx event configuration
12211    #[serde(default)]
12212    pub rfx: RfxConfig,
12213    /// Contract configuration
12214    #[serde(default)]
12215    pub contracts: ContractConfig,
12216    /// Catalog configuration
12217    #[serde(default)]
12218    pub catalog: CatalogConfig,
12219    /// Scorecard configuration
12220    #[serde(default)]
12221    pub scorecards: ScorecardConfig,
12222    /// P2P integration settings
12223    #[serde(default)]
12224    pub p2p_integration: P2PIntegrationConfig,
12225}
12226
12227/// Spend analysis configuration.
12228#[derive(Debug, Clone, Serialize, Deserialize)]
12229pub struct SpendAnalysisConfig {
12230    /// HHI threshold for triggering sourcing project
12231    #[serde(default = "default_hhi_threshold")]
12232    pub hhi_threshold: f64,
12233    /// Target spend coverage under contracts
12234    #[serde(default = "default_contract_coverage_target")]
12235    pub contract_coverage_target: f64,
12236}
12237
12238impl Default for SpendAnalysisConfig {
12239    fn default() -> Self {
12240        Self {
12241            hhi_threshold: default_hhi_threshold(),
12242            contract_coverage_target: default_contract_coverage_target(),
12243        }
12244    }
12245}
12246
12247fn default_hhi_threshold() -> f64 {
12248    2500.0
12249}
12250fn default_contract_coverage_target() -> f64 {
12251    0.80
12252}
12253
12254/// Sourcing project configuration.
12255#[derive(Debug, Clone, Serialize, Deserialize)]
12256pub struct SourcingConfig {
12257    /// Number of sourcing projects per year
12258    #[serde(default = "default_sourcing_projects_per_year")]
12259    pub projects_per_year: u32,
12260    /// Months before expiry to trigger renewal project
12261    #[serde(default = "default_renewal_horizon_months")]
12262    pub renewal_horizon_months: u32,
12263    /// Average project duration in months
12264    #[serde(default = "default_project_duration_months")]
12265    pub project_duration_months: u32,
12266}
12267
12268impl Default for SourcingConfig {
12269    fn default() -> Self {
12270        Self {
12271            projects_per_year: default_sourcing_projects_per_year(),
12272            renewal_horizon_months: default_renewal_horizon_months(),
12273            project_duration_months: default_project_duration_months(),
12274        }
12275    }
12276}
12277
12278fn default_sourcing_projects_per_year() -> u32 {
12279    10
12280}
12281fn default_renewal_horizon_months() -> u32 {
12282    3
12283}
12284fn default_project_duration_months() -> u32 {
12285    4
12286}
12287
12288/// Supplier qualification configuration.
12289#[derive(Debug, Clone, Serialize, Deserialize)]
12290pub struct QualificationConfig {
12291    /// Pass rate for qualification
12292    #[serde(default = "default_qualification_pass_rate")]
12293    pub pass_rate: f64,
12294    /// Qualification validity in days
12295    #[serde(default = "default_qualification_validity_days")]
12296    pub validity_days: u32,
12297    /// Financial stability weight
12298    #[serde(default = "default_financial_weight")]
12299    pub financial_weight: f64,
12300    /// Quality management weight
12301    #[serde(default = "default_quality_weight")]
12302    pub quality_weight: f64,
12303    /// Delivery performance weight
12304    #[serde(default = "default_delivery_weight")]
12305    pub delivery_weight: f64,
12306    /// Compliance weight
12307    #[serde(default = "default_compliance_weight")]
12308    pub compliance_weight: f64,
12309}
12310
12311impl Default for QualificationConfig {
12312    fn default() -> Self {
12313        Self {
12314            pass_rate: default_qualification_pass_rate(),
12315            validity_days: default_qualification_validity_days(),
12316            financial_weight: default_financial_weight(),
12317            quality_weight: default_quality_weight(),
12318            delivery_weight: default_delivery_weight(),
12319            compliance_weight: default_compliance_weight(),
12320        }
12321    }
12322}
12323
12324fn default_qualification_pass_rate() -> f64 {
12325    0.75
12326}
12327fn default_qualification_validity_days() -> u32 {
12328    365
12329}
12330fn default_financial_weight() -> f64 {
12331    0.25
12332}
12333fn default_quality_weight() -> f64 {
12334    0.30
12335}
12336fn default_delivery_weight() -> f64 {
12337    0.25
12338}
12339fn default_compliance_weight() -> f64 {
12340    0.20
12341}
12342
12343/// RFx event configuration.
12344#[derive(Debug, Clone, Serialize, Deserialize)]
12345pub struct RfxConfig {
12346    /// Spend threshold above which RFI is required before RFP
12347    #[serde(default = "default_rfi_threshold")]
12348    pub rfi_threshold: f64,
12349    /// Minimum vendors invited per RFx
12350    #[serde(default = "default_min_invited_vendors")]
12351    pub min_invited_vendors: u32,
12352    /// Maximum vendors invited per RFx
12353    #[serde(default = "default_max_invited_vendors")]
12354    pub max_invited_vendors: u32,
12355    /// Response rate (% of invited vendors that submit bids)
12356    #[serde(default = "default_response_rate")]
12357    pub response_rate: f64,
12358    /// Default price weight in evaluation
12359    #[serde(default = "default_price_weight")]
12360    pub default_price_weight: f64,
12361    /// Default quality weight in evaluation
12362    #[serde(default = "default_rfx_quality_weight")]
12363    pub default_quality_weight: f64,
12364    /// Default delivery weight in evaluation
12365    #[serde(default = "default_rfx_delivery_weight")]
12366    pub default_delivery_weight: f64,
12367}
12368
12369impl Default for RfxConfig {
12370    fn default() -> Self {
12371        Self {
12372            rfi_threshold: default_rfi_threshold(),
12373            min_invited_vendors: default_min_invited_vendors(),
12374            max_invited_vendors: default_max_invited_vendors(),
12375            response_rate: default_response_rate(),
12376            default_price_weight: default_price_weight(),
12377            default_quality_weight: default_rfx_quality_weight(),
12378            default_delivery_weight: default_rfx_delivery_weight(),
12379        }
12380    }
12381}
12382
12383fn default_rfi_threshold() -> f64 {
12384    100_000.0
12385}
12386fn default_min_invited_vendors() -> u32 {
12387    3
12388}
12389fn default_max_invited_vendors() -> u32 {
12390    8
12391}
12392fn default_response_rate() -> f64 {
12393    0.70
12394}
12395fn default_price_weight() -> f64 {
12396    0.40
12397}
12398fn default_rfx_quality_weight() -> f64 {
12399    0.35
12400}
12401fn default_rfx_delivery_weight() -> f64 {
12402    0.25
12403}
12404
12405/// Contract configuration.
12406#[derive(Debug, Clone, Serialize, Deserialize)]
12407pub struct ContractConfig {
12408    /// Minimum contract duration in months
12409    #[serde(default = "default_min_contract_months")]
12410    pub min_duration_months: u32,
12411    /// Maximum contract duration in months
12412    #[serde(default = "default_max_contract_months")]
12413    pub max_duration_months: u32,
12414    /// Auto-renewal rate
12415    #[serde(default = "default_auto_renewal_rate")]
12416    pub auto_renewal_rate: f64,
12417    /// Amendment rate (% of contracts with at least one amendment)
12418    #[serde(default = "default_amendment_rate")]
12419    pub amendment_rate: f64,
12420    /// Distribution of contract types
12421    #[serde(default)]
12422    pub type_distribution: ContractTypeDistribution,
12423}
12424
12425impl Default for ContractConfig {
12426    fn default() -> Self {
12427        Self {
12428            min_duration_months: default_min_contract_months(),
12429            max_duration_months: default_max_contract_months(),
12430            auto_renewal_rate: default_auto_renewal_rate(),
12431            amendment_rate: default_amendment_rate(),
12432            type_distribution: ContractTypeDistribution::default(),
12433        }
12434    }
12435}
12436
12437fn default_min_contract_months() -> u32 {
12438    12
12439}
12440fn default_max_contract_months() -> u32 {
12441    36
12442}
12443fn default_auto_renewal_rate() -> f64 {
12444    0.40
12445}
12446fn default_amendment_rate() -> f64 {
12447    0.20
12448}
12449
12450/// Distribution of contract types.
12451#[derive(Debug, Clone, Serialize, Deserialize)]
12452pub struct ContractTypeDistribution {
12453    /// Fixed price percentage
12454    #[serde(default = "default_fixed_price_pct")]
12455    pub fixed_price: f64,
12456    /// Blanket/framework percentage
12457    #[serde(default = "default_blanket_pct")]
12458    pub blanket: f64,
12459    /// Time and materials percentage
12460    #[serde(default = "default_time_materials_pct")]
12461    pub time_and_materials: f64,
12462    /// Service agreement percentage
12463    #[serde(default = "default_service_agreement_pct")]
12464    pub service_agreement: f64,
12465}
12466
12467impl Default for ContractTypeDistribution {
12468    fn default() -> Self {
12469        Self {
12470            fixed_price: default_fixed_price_pct(),
12471            blanket: default_blanket_pct(),
12472            time_and_materials: default_time_materials_pct(),
12473            service_agreement: default_service_agreement_pct(),
12474        }
12475    }
12476}
12477
12478fn default_fixed_price_pct() -> f64 {
12479    0.40
12480}
12481fn default_blanket_pct() -> f64 {
12482    0.30
12483}
12484fn default_time_materials_pct() -> f64 {
12485    0.15
12486}
12487fn default_service_agreement_pct() -> f64 {
12488    0.15
12489}
12490
12491/// Catalog configuration.
12492#[derive(Debug, Clone, Serialize, Deserialize)]
12493pub struct CatalogConfig {
12494    /// Percentage of catalog items marked as preferred
12495    #[serde(default = "default_preferred_vendor_flag_rate")]
12496    pub preferred_vendor_flag_rate: f64,
12497    /// Rate of materials with multiple sources in catalog
12498    #[serde(default = "default_multi_source_rate")]
12499    pub multi_source_rate: f64,
12500}
12501
12502impl Default for CatalogConfig {
12503    fn default() -> Self {
12504        Self {
12505            preferred_vendor_flag_rate: default_preferred_vendor_flag_rate(),
12506            multi_source_rate: default_multi_source_rate(),
12507        }
12508    }
12509}
12510
12511fn default_preferred_vendor_flag_rate() -> f64 {
12512    0.70
12513}
12514fn default_multi_source_rate() -> f64 {
12515    0.25
12516}
12517
12518/// Scorecard configuration.
12519#[derive(Debug, Clone, Serialize, Deserialize)]
12520pub struct ScorecardConfig {
12521    /// Scorecard review frequency (quarterly, monthly)
12522    #[serde(default = "default_scorecard_frequency")]
12523    pub frequency: String,
12524    /// On-time delivery weight in overall score
12525    #[serde(default = "default_otd_weight")]
12526    pub on_time_delivery_weight: f64,
12527    /// Quality weight in overall score
12528    #[serde(default = "default_quality_score_weight")]
12529    pub quality_weight: f64,
12530    /// Price competitiveness weight
12531    #[serde(default = "default_price_score_weight")]
12532    pub price_weight: f64,
12533    /// Responsiveness weight
12534    #[serde(default = "default_responsiveness_weight")]
12535    pub responsiveness_weight: f64,
12536    /// Grade A threshold (score >= this)
12537    #[serde(default = "default_grade_a_threshold")]
12538    pub grade_a_threshold: f64,
12539    /// Grade B threshold
12540    #[serde(default = "default_grade_b_threshold")]
12541    pub grade_b_threshold: f64,
12542    /// Grade C threshold
12543    #[serde(default = "default_grade_c_threshold")]
12544    pub grade_c_threshold: f64,
12545}
12546
12547impl Default for ScorecardConfig {
12548    fn default() -> Self {
12549        Self {
12550            frequency: default_scorecard_frequency(),
12551            on_time_delivery_weight: default_otd_weight(),
12552            quality_weight: default_quality_score_weight(),
12553            price_weight: default_price_score_weight(),
12554            responsiveness_weight: default_responsiveness_weight(),
12555            grade_a_threshold: default_grade_a_threshold(),
12556            grade_b_threshold: default_grade_b_threshold(),
12557            grade_c_threshold: default_grade_c_threshold(),
12558        }
12559    }
12560}
12561
12562fn default_scorecard_frequency() -> String {
12563    "quarterly".to_string()
12564}
12565fn default_otd_weight() -> f64 {
12566    0.30
12567}
12568fn default_quality_score_weight() -> f64 {
12569    0.30
12570}
12571fn default_price_score_weight() -> f64 {
12572    0.25
12573}
12574fn default_responsiveness_weight() -> f64 {
12575    0.15
12576}
12577fn default_grade_a_threshold() -> f64 {
12578    90.0
12579}
12580fn default_grade_b_threshold() -> f64 {
12581    75.0
12582}
12583fn default_grade_c_threshold() -> f64 {
12584    60.0
12585}
12586
12587/// P2P integration settings for contract enforcement.
12588#[derive(Debug, Clone, Serialize, Deserialize)]
12589pub struct P2PIntegrationConfig {
12590    /// Rate of off-contract (maverick) purchases
12591    #[serde(default = "default_off_contract_rate")]
12592    pub off_contract_rate: f64,
12593    /// Price tolerance for contract price validation
12594    #[serde(default = "default_price_tolerance")]
12595    pub price_tolerance: f64,
12596    /// Whether to enforce catalog ordering
12597    #[serde(default)]
12598    pub catalog_enforcement: bool,
12599}
12600
12601impl Default for P2PIntegrationConfig {
12602    fn default() -> Self {
12603        Self {
12604            off_contract_rate: default_off_contract_rate(),
12605            price_tolerance: default_price_tolerance(),
12606            catalog_enforcement: false,
12607        }
12608    }
12609}
12610
12611fn default_off_contract_rate() -> f64 {
12612    0.15
12613}
12614fn default_price_tolerance() -> f64 {
12615    0.02
12616}
12617
12618// ----- Financial Reporting -----
12619
12620/// Financial reporting configuration.
12621#[derive(Debug, Clone, Serialize, Deserialize)]
12622pub struct FinancialReportingConfig {
12623    /// Enable financial reporting generation
12624    #[serde(default)]
12625    pub enabled: bool,
12626    /// Generate balance sheet
12627    #[serde(default = "default_true")]
12628    pub generate_balance_sheet: bool,
12629    /// Generate income statement
12630    #[serde(default = "default_true")]
12631    pub generate_income_statement: bool,
12632    /// Generate cash flow statement
12633    #[serde(default = "default_true")]
12634    pub generate_cash_flow: bool,
12635    /// Generate changes in equity statement
12636    #[serde(default = "default_true")]
12637    pub generate_changes_in_equity: bool,
12638    /// Number of comparative periods
12639    #[serde(default = "default_comparative_periods")]
12640    pub comparative_periods: u32,
12641    /// Management KPIs configuration
12642    #[serde(default)]
12643    pub management_kpis: ManagementKpisConfig,
12644    /// Budget configuration
12645    #[serde(default)]
12646    pub budgets: BudgetConfig,
12647    /// External-expectation (ISA-520 substantive-analytics) configuration
12648    #[serde(default, alias = "externalExpectations")]
12649    pub external_expectations: ExternalExpectationsConfig,
12650    /// Evidence-anchor (ISA-505 external-corroboration) configuration
12651    #[serde(default, alias = "evidenceAnchors")]
12652    pub evidence_anchors: EvidenceAnchorsConfig,
12653}
12654
12655impl Default for FinancialReportingConfig {
12656    fn default() -> Self {
12657        Self {
12658            enabled: false,
12659            generate_balance_sheet: true,
12660            generate_income_statement: true,
12661            generate_cash_flow: true,
12662            generate_changes_in_equity: true,
12663            comparative_periods: default_comparative_periods(),
12664            management_kpis: ManagementKpisConfig::default(),
12665            budgets: BudgetConfig::default(),
12666            external_expectations: ExternalExpectationsConfig::default(),
12667            evidence_anchors: EvidenceAnchorsConfig::default(),
12668        }
12669    }
12670}
12671
12672fn default_comparative_periods() -> u32 {
12673    1
12674}
12675
12676/// Management KPIs configuration.
12677#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12678pub struct ManagementKpisConfig {
12679    /// Enable KPI generation
12680    #[serde(default)]
12681    pub enabled: bool,
12682    /// KPI calculation frequency (monthly, quarterly)
12683    #[serde(default = "default_kpi_frequency")]
12684    pub frequency: String,
12685}
12686
12687fn default_kpi_frequency() -> String {
12688    "monthly".to_string()
12689}
12690
12691/// Budget configuration.
12692#[derive(Debug, Clone, Serialize, Deserialize)]
12693pub struct BudgetConfig {
12694    /// Enable budget generation
12695    #[serde(default)]
12696    pub enabled: bool,
12697    /// Expected revenue growth rate for budgeting
12698    #[serde(default = "default_revenue_growth_rate")]
12699    pub revenue_growth_rate: f64,
12700    /// Expected expense inflation rate
12701    #[serde(default = "default_expense_inflation_rate")]
12702    pub expense_inflation_rate: f64,
12703    /// Random noise to add to budget vs actual
12704    #[serde(default = "default_variance_noise")]
12705    pub variance_noise: f64,
12706}
12707
12708impl Default for BudgetConfig {
12709    fn default() -> Self {
12710        Self {
12711            enabled: false,
12712            revenue_growth_rate: default_revenue_growth_rate(),
12713            expense_inflation_rate: default_expense_inflation_rate(),
12714            variance_noise: default_variance_noise(),
12715        }
12716    }
12717}
12718
12719fn default_revenue_growth_rate() -> f64 {
12720    0.05
12721}
12722fn default_expense_inflation_rate() -> f64 {
12723    0.03
12724}
12725fn default_variance_noise() -> f64 {
12726    0.10
12727}
12728
12729/// External-expectation (ISA-520 substantive-analytics) configuration.
12730///
12731/// When enabled, the engine emits, per material GL account, an expected period total derived from an
12732/// exogenous driver (prior-year / market / macro / budget) plus a materiality tolerance band, with the
12733/// realized deviation and the ground-truth fraud contribution. This is the Phase-2 substantive-
12734/// analytics layer — the engine-side counterpart to the perfect-crime countermeasure (see
12735/// `docs/phase2-ledger-evidence-assurance.md`).
12736#[derive(Debug, Clone, Serialize, Deserialize)]
12737pub struct ExternalExpectationsConfig {
12738    /// Enable external-expectation generation.
12739    #[serde(default)]
12740    pub enabled: bool,
12741    /// Exogenous driver the expectation is built on.
12742    #[serde(default, alias = "primaryDriver")]
12743    pub driver: ExpectationDriver,
12744    /// Materiality tolerance band as a fraction of the expectation (the ISA-520 investigate threshold).
12745    #[serde(default = "default_expectation_tolerance_pct", alias = "tolerancePct")]
12746    pub tolerance_pct: f64,
12747    /// Forecast-error std (fraction) of the auditor's expectation around the legitimate level —
12748    /// models imperfect expectations, producing realistic false positives on volatile accounts.
12749    #[serde(default = "default_forecast_noise", alias = "forecastNoise")]
12750    pub forecast_noise: f64,
12751    /// Expected period-over-period growth used to frame the driver (e.g. prior-year × (1 + growth)).
12752    #[serde(default = "default_expectation_growth_rate", alias = "growthRate")]
12753    pub growth_rate: f64,
12754    /// Only accounts whose legitimate share of total activity is at least this fraction are scored —
12755    /// substantive analytics targets material balances.
12756    #[serde(
12757        default = "default_min_materiality_share",
12758        alias = "minMaterialityShare"
12759    )]
12760    pub min_materiality_share: f64,
12761}
12762
12763impl Default for ExternalExpectationsConfig {
12764    fn default() -> Self {
12765        Self {
12766            enabled: false,
12767            driver: ExpectationDriver::default(),
12768            tolerance_pct: default_expectation_tolerance_pct(),
12769            forecast_noise: default_forecast_noise(),
12770            growth_rate: default_expectation_growth_rate(),
12771            min_materiality_share: default_min_materiality_share(),
12772        }
12773    }
12774}
12775
12776fn default_expectation_tolerance_pct() -> f64 {
12777    0.10
12778}
12779fn default_forecast_noise() -> f64 {
12780    0.05
12781}
12782fn default_expectation_growth_rate() -> f64 {
12783    0.05
12784}
12785fn default_min_materiality_share() -> f64 {
12786    0.005
12787}
12788
12789/// Evidence-anchor (ISA-505 external-corroboration) configuration.
12790///
12791/// When enabled, the engine emits, per material GL account, whether the account's activity is
12792/// corroborated by evidence exogenous to the ledger; a material, uncorroborated account is a
12793/// **dangling node** — the ISA-505 existence/occurrence lead. Genuine accounts are corroborated at
12794/// `corroboration_rate`; fraud-linked accounts are corroborated only at `fabrication_evade_rate`
12795/// (the adversary who forged external evidence — the expensive "perfect audit crime"). Phase-2
12796/// evidence layer (see `docs/phase2-ledger-evidence-assurance.md`).
12797#[derive(Debug, Clone, Serialize, Deserialize)]
12798pub struct EvidenceAnchorsConfig {
12799    /// Enable evidence-anchor generation.
12800    #[serde(default)]
12801    pub enabled: bool,
12802    /// Only accounts whose share of total activity is at least this fraction are scored.
12803    #[serde(
12804        default = "default_min_materiality_share",
12805        alias = "minMaterialityShare"
12806    )]
12807    pub min_materiality_share: f64,
12808    /// Rate at which a genuine account's activity is externally corroborated (1 − this = false-positive
12809    /// dangling rate on clean accounts, modelling unconfirmed-but-legitimate balances).
12810    #[serde(default = "default_corroboration_rate", alias = "corroborationRate")]
12811    pub corroboration_rate: f64,
12812    /// Rate at which a fraud-linked account is nonetheless corroborated — the adversary who forged the
12813    /// external evidence (a false negative; the expensive, fragile perfect-audit-crime, `prop:counter`).
12814    #[serde(
12815        default = "default_fabrication_evade_rate",
12816        alias = "fabricationEvadeRate"
12817    )]
12818    pub fabrication_evade_rate: f64,
12819}
12820
12821impl Default for EvidenceAnchorsConfig {
12822    fn default() -> Self {
12823        Self {
12824            enabled: false,
12825            min_materiality_share: default_min_materiality_share(),
12826            corroboration_rate: default_corroboration_rate(),
12827            fabrication_evade_rate: default_fabrication_evade_rate(),
12828        }
12829    }
12830}
12831
12832fn default_corroboration_rate() -> f64 {
12833    0.92
12834}
12835fn default_fabrication_evade_rate() -> f64 {
12836    0.10
12837}
12838
12839// ----- HR Configuration -----
12840
12841/// HR (Hire-to-Retire) process configuration.
12842#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12843pub struct HrConfig {
12844    /// Enable HR generation
12845    #[serde(default)]
12846    pub enabled: bool,
12847    /// Payroll configuration
12848    #[serde(default)]
12849    pub payroll: PayrollConfig,
12850    /// Time and attendance configuration
12851    #[serde(default)]
12852    pub time_attendance: TimeAttendanceConfig,
12853    /// Expense management configuration
12854    #[serde(default)]
12855    pub expenses: ExpenseConfig,
12856}
12857
12858/// Payroll configuration.
12859#[derive(Debug, Clone, Serialize, Deserialize)]
12860pub struct PayrollConfig {
12861    /// Enable payroll generation
12862    #[serde(default = "default_true")]
12863    pub enabled: bool,
12864    /// Pay frequency (monthly, biweekly, weekly)
12865    #[serde(default = "default_pay_frequency")]
12866    pub pay_frequency: String,
12867    /// Salary ranges by job level
12868    #[serde(default)]
12869    pub salary_ranges: PayrollSalaryRanges,
12870    /// Effective tax rates
12871    #[serde(default)]
12872    pub tax_rates: PayrollTaxRates,
12873    /// Benefits enrollment rate
12874    #[serde(default = "default_benefits_enrollment_rate")]
12875    pub benefits_enrollment_rate: f64,
12876    /// Retirement plan participation rate
12877    #[serde(default = "default_retirement_participation_rate")]
12878    pub retirement_participation_rate: f64,
12879}
12880
12881impl Default for PayrollConfig {
12882    fn default() -> Self {
12883        Self {
12884            enabled: true,
12885            pay_frequency: default_pay_frequency(),
12886            salary_ranges: PayrollSalaryRanges::default(),
12887            tax_rates: PayrollTaxRates::default(),
12888            benefits_enrollment_rate: default_benefits_enrollment_rate(),
12889            retirement_participation_rate: default_retirement_participation_rate(),
12890        }
12891    }
12892}
12893
12894fn default_pay_frequency() -> String {
12895    "monthly".to_string()
12896}
12897fn default_benefits_enrollment_rate() -> f64 {
12898    0.60
12899}
12900fn default_retirement_participation_rate() -> f64 {
12901    0.45
12902}
12903
12904/// Salary ranges by job level.
12905#[derive(Debug, Clone, Serialize, Deserialize)]
12906pub struct PayrollSalaryRanges {
12907    /// Staff level min/max
12908    #[serde(default = "default_staff_min")]
12909    pub staff_min: f64,
12910    #[serde(default = "default_staff_max")]
12911    pub staff_max: f64,
12912    /// Manager level min/max
12913    #[serde(default = "default_manager_min")]
12914    pub manager_min: f64,
12915    #[serde(default = "default_manager_max")]
12916    pub manager_max: f64,
12917    /// Director level min/max
12918    #[serde(default = "default_director_min")]
12919    pub director_min: f64,
12920    #[serde(default = "default_director_max")]
12921    pub director_max: f64,
12922    /// Executive level min/max
12923    #[serde(default = "default_executive_min")]
12924    pub executive_min: f64,
12925    #[serde(default = "default_executive_max")]
12926    pub executive_max: f64,
12927}
12928
12929impl Default for PayrollSalaryRanges {
12930    fn default() -> Self {
12931        Self {
12932            staff_min: default_staff_min(),
12933            staff_max: default_staff_max(),
12934            manager_min: default_manager_min(),
12935            manager_max: default_manager_max(),
12936            director_min: default_director_min(),
12937            director_max: default_director_max(),
12938            executive_min: default_executive_min(),
12939            executive_max: default_executive_max(),
12940        }
12941    }
12942}
12943
12944fn default_staff_min() -> f64 {
12945    50_000.0
12946}
12947fn default_staff_max() -> f64 {
12948    70_000.0
12949}
12950fn default_manager_min() -> f64 {
12951    80_000.0
12952}
12953fn default_manager_max() -> f64 {
12954    120_000.0
12955}
12956fn default_director_min() -> f64 {
12957    120_000.0
12958}
12959fn default_director_max() -> f64 {
12960    180_000.0
12961}
12962fn default_executive_min() -> f64 {
12963    180_000.0
12964}
12965fn default_executive_max() -> f64 {
12966    350_000.0
12967}
12968
12969/// Effective tax rates for payroll.
12970#[derive(Debug, Clone, Serialize, Deserialize)]
12971pub struct PayrollTaxRates {
12972    /// Federal effective tax rate
12973    #[serde(default = "default_federal_rate")]
12974    pub federal_effective: f64,
12975    /// State effective tax rate
12976    #[serde(default = "default_state_rate")]
12977    pub state_effective: f64,
12978    /// FICA/social security rate
12979    #[serde(default = "default_fica_rate")]
12980    pub fica: f64,
12981}
12982
12983impl Default for PayrollTaxRates {
12984    fn default() -> Self {
12985        Self {
12986            federal_effective: default_federal_rate(),
12987            state_effective: default_state_rate(),
12988            fica: default_fica_rate(),
12989        }
12990    }
12991}
12992
12993fn default_federal_rate() -> f64 {
12994    0.22
12995}
12996fn default_state_rate() -> f64 {
12997    0.05
12998}
12999fn default_fica_rate() -> f64 {
13000    0.0765
13001}
13002
13003/// Time and attendance configuration.
13004#[derive(Debug, Clone, Serialize, Deserialize)]
13005pub struct TimeAttendanceConfig {
13006    /// Enable time tracking
13007    #[serde(default = "default_true")]
13008    pub enabled: bool,
13009    /// Overtime rate (% of employees with overtime in a period)
13010    #[serde(default = "default_overtime_rate")]
13011    pub overtime_rate: f64,
13012}
13013
13014impl Default for TimeAttendanceConfig {
13015    fn default() -> Self {
13016        Self {
13017            enabled: true,
13018            overtime_rate: default_overtime_rate(),
13019        }
13020    }
13021}
13022
13023fn default_overtime_rate() -> f64 {
13024    0.10
13025}
13026
13027/// Expense management configuration.
13028#[derive(Debug, Clone, Serialize, Deserialize)]
13029pub struct ExpenseConfig {
13030    /// Enable expense report generation
13031    #[serde(default = "default_true")]
13032    pub enabled: bool,
13033    /// Rate of employees submitting expenses per month
13034    #[serde(default = "default_expense_submission_rate")]
13035    pub submission_rate: f64,
13036    /// Rate of policy violations
13037    #[serde(default = "default_policy_violation_rate")]
13038    pub policy_violation_rate: f64,
13039}
13040
13041impl Default for ExpenseConfig {
13042    fn default() -> Self {
13043        Self {
13044            enabled: true,
13045            submission_rate: default_expense_submission_rate(),
13046            policy_violation_rate: default_policy_violation_rate(),
13047        }
13048    }
13049}
13050
13051fn default_expense_submission_rate() -> f64 {
13052    0.30
13053}
13054fn default_policy_violation_rate() -> f64 {
13055    0.08
13056}
13057
13058// ----- Manufacturing Configuration -----
13059
13060/// Manufacturing process configuration (production orders, WIP, routing).
13061#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13062pub struct ManufacturingProcessConfig {
13063    /// Enable manufacturing generation
13064    #[serde(default)]
13065    pub enabled: bool,
13066    /// Production order configuration
13067    #[serde(default)]
13068    pub production_orders: ProductionOrderConfig,
13069    /// Costing configuration
13070    #[serde(default)]
13071    pub costing: ManufacturingCostingConfig,
13072    /// Routing configuration
13073    #[serde(default)]
13074    pub routing: RoutingConfig,
13075}
13076
13077/// Production order configuration.
13078#[derive(Debug, Clone, Serialize, Deserialize)]
13079pub struct ProductionOrderConfig {
13080    /// Orders per month
13081    #[serde(default = "default_prod_orders_per_month")]
13082    pub orders_per_month: u32,
13083    /// Average batch size
13084    #[serde(default = "default_prod_avg_batch_size")]
13085    pub avg_batch_size: u32,
13086    /// Yield rate
13087    #[serde(default = "default_prod_yield_rate")]
13088    pub yield_rate: f64,
13089    /// Make-to-order rate (vs make-to-stock)
13090    #[serde(default = "default_prod_make_to_order_rate")]
13091    pub make_to_order_rate: f64,
13092    /// Rework rate
13093    #[serde(default = "default_prod_rework_rate")]
13094    pub rework_rate: f64,
13095}
13096
13097impl Default for ProductionOrderConfig {
13098    fn default() -> Self {
13099        Self {
13100            orders_per_month: default_prod_orders_per_month(),
13101            avg_batch_size: default_prod_avg_batch_size(),
13102            yield_rate: default_prod_yield_rate(),
13103            make_to_order_rate: default_prod_make_to_order_rate(),
13104            rework_rate: default_prod_rework_rate(),
13105        }
13106    }
13107}
13108
13109fn default_prod_orders_per_month() -> u32 {
13110    50
13111}
13112fn default_prod_avg_batch_size() -> u32 {
13113    100
13114}
13115fn default_prod_yield_rate() -> f64 {
13116    0.97
13117}
13118fn default_prod_make_to_order_rate() -> f64 {
13119    0.20
13120}
13121fn default_prod_rework_rate() -> f64 {
13122    0.03
13123}
13124
13125/// Manufacturing costing configuration.
13126#[derive(Debug, Clone, Serialize, Deserialize)]
13127pub struct ManufacturingCostingConfig {
13128    /// Labor rate per hour
13129    #[serde(default = "default_labor_rate")]
13130    pub labor_rate_per_hour: f64,
13131    /// Overhead application rate (multiplier on direct labor)
13132    #[serde(default = "default_overhead_rate")]
13133    pub overhead_rate: f64,
13134    /// Standard cost update frequency
13135    #[serde(default = "default_cost_update_frequency")]
13136    pub standard_cost_update_frequency: String,
13137}
13138
13139impl Default for ManufacturingCostingConfig {
13140    fn default() -> Self {
13141        Self {
13142            labor_rate_per_hour: default_labor_rate(),
13143            overhead_rate: default_overhead_rate(),
13144            standard_cost_update_frequency: default_cost_update_frequency(),
13145        }
13146    }
13147}
13148
13149fn default_labor_rate() -> f64 {
13150    35.0
13151}
13152fn default_overhead_rate() -> f64 {
13153    1.50
13154}
13155fn default_cost_update_frequency() -> String {
13156    "quarterly".to_string()
13157}
13158
13159/// Routing configuration for production operations.
13160#[derive(Debug, Clone, Serialize, Deserialize)]
13161pub struct RoutingConfig {
13162    /// Average number of operations per routing
13163    #[serde(default = "default_avg_operations")]
13164    pub avg_operations: u32,
13165    /// Average setup time in hours
13166    #[serde(default = "default_setup_time")]
13167    pub setup_time_hours: f64,
13168    /// Run time variation coefficient
13169    #[serde(default = "default_run_time_variation")]
13170    pub run_time_variation: f64,
13171}
13172
13173impl Default for RoutingConfig {
13174    fn default() -> Self {
13175        Self {
13176            avg_operations: default_avg_operations(),
13177            setup_time_hours: default_setup_time(),
13178            run_time_variation: default_run_time_variation(),
13179        }
13180    }
13181}
13182
13183fn default_avg_operations() -> u32 {
13184    4
13185}
13186fn default_setup_time() -> f64 {
13187    1.5
13188}
13189fn default_run_time_variation() -> f64 {
13190    0.15
13191}
13192
13193// ----- Sales Quote Configuration -----
13194
13195/// Sales quote (quote-to-order) pipeline configuration.
13196#[derive(Debug, Clone, Serialize, Deserialize)]
13197pub struct SalesQuoteConfig {
13198    /// Enable sales quote generation
13199    #[serde(default)]
13200    pub enabled: bool,
13201    /// Quotes per month
13202    #[serde(default = "default_quotes_per_month")]
13203    pub quotes_per_month: u32,
13204    /// Win rate (fraction of quotes that convert to orders)
13205    #[serde(default = "default_quote_win_rate")]
13206    pub win_rate: f64,
13207    /// Average quote validity in days
13208    #[serde(default = "default_quote_validity_days")]
13209    pub validity_days: u32,
13210}
13211
13212impl Default for SalesQuoteConfig {
13213    fn default() -> Self {
13214        Self {
13215            enabled: false,
13216            quotes_per_month: default_quotes_per_month(),
13217            win_rate: default_quote_win_rate(),
13218            validity_days: default_quote_validity_days(),
13219        }
13220    }
13221}
13222
13223fn default_quotes_per_month() -> u32 {
13224    30
13225}
13226fn default_quote_win_rate() -> f64 {
13227    0.35
13228}
13229fn default_quote_validity_days() -> u32 {
13230    30
13231}
13232
13233// =============================================================================
13234// Tax Accounting Configuration
13235// =============================================================================
13236
13237/// Tax accounting configuration.
13238///
13239/// Controls generation of tax-related data including VAT/GST, sales tax,
13240/// withholding tax, tax provisions, and payroll tax across multiple jurisdictions.
13241#[derive(Debug, Clone, Serialize, Deserialize)]
13242pub struct TaxConfig {
13243    /// Whether tax generation is enabled.
13244    #[serde(default)]
13245    pub enabled: bool,
13246    /// Tax jurisdiction configuration.
13247    #[serde(default)]
13248    pub jurisdictions: TaxJurisdictionConfig,
13249    /// VAT/GST configuration.
13250    #[serde(default)]
13251    pub vat_gst: VatGstConfig,
13252    /// Sales tax configuration.
13253    #[serde(default)]
13254    pub sales_tax: SalesTaxConfig,
13255    /// Withholding tax configuration.
13256    #[serde(default)]
13257    pub withholding: WithholdingTaxSchemaConfig,
13258    /// Tax provision configuration.
13259    #[serde(default)]
13260    pub provisions: TaxProvisionSchemaConfig,
13261    /// Payroll tax configuration.
13262    #[serde(default)]
13263    pub payroll_tax: PayrollTaxSchemaConfig,
13264    /// Anomaly injection rate for tax data (0.0 to 1.0).
13265    #[serde(default = "default_tax_anomaly_rate")]
13266    pub anomaly_rate: f64,
13267}
13268
13269fn default_tax_anomaly_rate() -> f64 {
13270    0.03
13271}
13272
13273impl Default for TaxConfig {
13274    fn default() -> Self {
13275        Self {
13276            enabled: false,
13277            jurisdictions: TaxJurisdictionConfig::default(),
13278            vat_gst: VatGstConfig::default(),
13279            sales_tax: SalesTaxConfig::default(),
13280            withholding: WithholdingTaxSchemaConfig::default(),
13281            provisions: TaxProvisionSchemaConfig::default(),
13282            payroll_tax: PayrollTaxSchemaConfig::default(),
13283            anomaly_rate: default_tax_anomaly_rate(),
13284        }
13285    }
13286}
13287
13288/// Tax jurisdiction configuration.
13289///
13290/// Specifies which countries and subnational jurisdictions to include
13291/// when generating tax data.
13292#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13293pub struct TaxJurisdictionConfig {
13294    /// List of country codes to include (e.g., ["US", "DE", "GB"]).
13295    #[serde(default)]
13296    pub countries: Vec<String>,
13297    /// Whether to include subnational jurisdictions (e.g., US states, Canadian provinces).
13298    #[serde(default)]
13299    pub include_subnational: bool,
13300}
13301
13302/// VAT/GST configuration.
13303///
13304/// Controls generation of Value Added Tax / Goods and Services Tax data,
13305/// including standard and reduced rates, exempt categories, and reverse charge.
13306#[derive(Debug, Clone, Serialize, Deserialize)]
13307pub struct VatGstConfig {
13308    /// Whether VAT/GST generation is enabled.
13309    #[serde(default)]
13310    pub enabled: bool,
13311    /// Standard VAT/GST rates by country code (e.g., {"DE": 0.19, "GB": 0.20}).
13312    #[serde(default)]
13313    pub standard_rates: std::collections::HashMap<String, f64>,
13314    /// Reduced VAT/GST rates by country code (e.g., {"DE": 0.07, "GB": 0.05}).
13315    #[serde(default)]
13316    pub reduced_rates: std::collections::HashMap<String, f64>,
13317    /// Categories exempt from VAT/GST (e.g., ["financial_services", "healthcare"]).
13318    #[serde(default)]
13319    pub exempt_categories: Vec<String>,
13320    /// Whether to apply reverse charge mechanism for cross-border B2B transactions.
13321    #[serde(default = "default_true")]
13322    pub reverse_charge: bool,
13323}
13324
13325impl Default for VatGstConfig {
13326    fn default() -> Self {
13327        Self {
13328            enabled: false,
13329            standard_rates: std::collections::HashMap::new(),
13330            reduced_rates: std::collections::HashMap::new(),
13331            exempt_categories: Vec::new(),
13332            reverse_charge: true,
13333        }
13334    }
13335}
13336
13337/// Sales tax configuration.
13338///
13339/// Controls generation of US-style sales tax data including nexus determination.
13340#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13341pub struct SalesTaxConfig {
13342    /// Whether sales tax generation is enabled.
13343    #[serde(default)]
13344    pub enabled: bool,
13345    /// US states where the company has nexus (e.g., ["CA", "NY", "TX"]).
13346    #[serde(default)]
13347    pub nexus_states: Vec<String>,
13348}
13349
13350/// Withholding tax configuration.
13351///
13352/// Controls generation of withholding tax data for cross-border payments,
13353/// including treaty network and rate overrides.
13354#[derive(Debug, Clone, Serialize, Deserialize)]
13355pub struct WithholdingTaxSchemaConfig {
13356    /// Whether withholding tax generation is enabled.
13357    #[serde(default)]
13358    pub enabled: bool,
13359    /// Whether to simulate a treaty network with reduced rates.
13360    #[serde(default = "default_true")]
13361    pub treaty_network: bool,
13362    /// Default withholding tax rate for non-treaty countries (0.0 to 1.0).
13363    #[serde(default = "default_withholding_rate")]
13364    pub default_rate: f64,
13365    /// Reduced withholding tax rate for treaty countries (0.0 to 1.0).
13366    #[serde(default = "default_treaty_reduced_rate")]
13367    pub treaty_reduced_rate: f64,
13368}
13369
13370fn default_withholding_rate() -> f64 {
13371    0.30
13372}
13373
13374fn default_treaty_reduced_rate() -> f64 {
13375    0.15
13376}
13377
13378impl Default for WithholdingTaxSchemaConfig {
13379    fn default() -> Self {
13380        Self {
13381            enabled: false,
13382            treaty_network: true,
13383            default_rate: default_withholding_rate(),
13384            treaty_reduced_rate: default_treaty_reduced_rate(),
13385        }
13386    }
13387}
13388
13389/// Tax provision configuration.
13390///
13391/// Controls generation of tax provision data including statutory rates
13392/// and uncertain tax positions (ASC 740 / IAS 12).
13393#[derive(Debug, Clone, Serialize, Deserialize)]
13394pub struct TaxProvisionSchemaConfig {
13395    /// Whether tax provision generation is enabled.
13396    /// Defaults to true when tax is enabled, as provisions are typically required.
13397    #[serde(default = "default_true")]
13398    pub enabled: bool,
13399    /// Statutory corporate tax rate (0.0 to 1.0).
13400    #[serde(default = "default_statutory_rate")]
13401    pub statutory_rate: f64,
13402    /// Whether to generate uncertain tax positions (FIN 48 / IFRIC 23).
13403    #[serde(default = "default_true")]
13404    pub uncertain_positions: bool,
13405}
13406
13407fn default_statutory_rate() -> f64 {
13408    0.21
13409}
13410
13411impl Default for TaxProvisionSchemaConfig {
13412    fn default() -> Self {
13413        Self {
13414            enabled: true,
13415            statutory_rate: default_statutory_rate(),
13416            uncertain_positions: true,
13417        }
13418    }
13419}
13420
13421/// Payroll tax configuration.
13422///
13423/// Controls generation of payroll tax data (employer/employee contributions,
13424/// social security, Medicare, etc.).
13425#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13426pub struct PayrollTaxSchemaConfig {
13427    /// Whether payroll tax generation is enabled.
13428    #[serde(default)]
13429    pub enabled: bool,
13430}
13431
13432// ---------------------------------------------------------------------------
13433// Treasury & Cash Management Configuration
13434// ---------------------------------------------------------------------------
13435
13436/// Treasury and cash management configuration.
13437///
13438/// Controls generation of cash positions, forecasts, pooling, hedging
13439/// instruments (ASC 815 / IFRS 9), debt instruments with covenants,
13440/// bank guarantees, and intercompany netting runs.
13441#[derive(Debug, Clone, Serialize, Deserialize)]
13442pub struct TreasuryConfig {
13443    /// Whether treasury generation is enabled.
13444    #[serde(default)]
13445    pub enabled: bool,
13446    /// Cash positioning configuration.
13447    #[serde(default)]
13448    pub cash_positioning: CashPositioningConfig,
13449    /// Cash forecasting configuration.
13450    #[serde(default)]
13451    pub cash_forecasting: CashForecastingConfig,
13452    /// Cash pooling configuration.
13453    #[serde(default)]
13454    pub cash_pooling: CashPoolingConfig,
13455    /// Hedging configuration (FX forwards, IR swaps, etc.).
13456    #[serde(default)]
13457    pub hedging: HedgingSchemaConfig,
13458    /// Debt instrument and covenant configuration.
13459    #[serde(default)]
13460    pub debt: DebtSchemaConfig,
13461    /// Intercompany netting configuration.
13462    #[serde(default)]
13463    pub netting: NettingSchemaConfig,
13464    /// Bank guarantee / letter of credit configuration.
13465    #[serde(default)]
13466    pub bank_guarantees: BankGuaranteeSchemaConfig,
13467    /// Anomaly injection rate for treasury data (0.0 to 1.0).
13468    #[serde(default = "default_treasury_anomaly_rate")]
13469    pub anomaly_rate: f64,
13470}
13471
13472fn default_treasury_anomaly_rate() -> f64 {
13473    0.02
13474}
13475
13476impl Default for TreasuryConfig {
13477    fn default() -> Self {
13478        Self {
13479            enabled: false,
13480            cash_positioning: CashPositioningConfig::default(),
13481            cash_forecasting: CashForecastingConfig::default(),
13482            cash_pooling: CashPoolingConfig::default(),
13483            hedging: HedgingSchemaConfig::default(),
13484            debt: DebtSchemaConfig::default(),
13485            netting: NettingSchemaConfig::default(),
13486            bank_guarantees: BankGuaranteeSchemaConfig::default(),
13487            anomaly_rate: default_treasury_anomaly_rate(),
13488        }
13489    }
13490}
13491
13492/// Cash positioning configuration.
13493///
13494/// Controls daily cash position generation per entity/bank account.
13495#[derive(Debug, Clone, Serialize, Deserialize)]
13496pub struct CashPositioningConfig {
13497    /// Whether cash positioning is enabled.
13498    #[serde(default = "default_true")]
13499    pub enabled: bool,
13500    /// Position generation frequency.
13501    #[serde(default = "default_cash_frequency")]
13502    pub frequency: String,
13503    /// Minimum cash balance policy threshold.
13504    #[serde(default = "default_minimum_balance_policy")]
13505    pub minimum_balance_policy: f64,
13506}
13507
13508fn default_cash_frequency() -> String {
13509    "daily".to_string()
13510}
13511
13512fn default_minimum_balance_policy() -> f64 {
13513    100_000.0
13514}
13515
13516impl Default for CashPositioningConfig {
13517    fn default() -> Self {
13518        Self {
13519            enabled: true,
13520            frequency: default_cash_frequency(),
13521            minimum_balance_policy: default_minimum_balance_policy(),
13522        }
13523    }
13524}
13525
13526/// Cash forecasting configuration.
13527///
13528/// Controls forward-looking cash forecast generation with probability-weighted items.
13529#[derive(Debug, Clone, Serialize, Deserialize)]
13530pub struct CashForecastingConfig {
13531    /// Whether cash forecasting is enabled.
13532    #[serde(default = "default_true")]
13533    pub enabled: bool,
13534    /// Number of days to forecast into the future.
13535    #[serde(default = "default_horizon_days")]
13536    pub horizon_days: u32,
13537    /// AR collection probability curve type ("aging" or "flat").
13538    #[serde(default = "default_ar_probability_curve")]
13539    pub ar_collection_probability_curve: String,
13540    /// Confidence interval for the forecast (0.0 to 1.0).
13541    #[serde(default = "default_confidence_interval")]
13542    pub confidence_interval: f64,
13543}
13544
13545fn default_horizon_days() -> u32 {
13546    90
13547}
13548
13549fn default_ar_probability_curve() -> String {
13550    "aging".to_string()
13551}
13552
13553fn default_confidence_interval() -> f64 {
13554    0.90
13555}
13556
13557impl Default for CashForecastingConfig {
13558    fn default() -> Self {
13559        Self {
13560            enabled: true,
13561            horizon_days: default_horizon_days(),
13562            ar_collection_probability_curve: default_ar_probability_curve(),
13563            confidence_interval: default_confidence_interval(),
13564        }
13565    }
13566}
13567
13568/// Cash pooling configuration.
13569///
13570/// Controls cash pool structure generation (physical, notional, zero-balancing).
13571#[derive(Debug, Clone, Serialize, Deserialize)]
13572pub struct CashPoolingConfig {
13573    /// Whether cash pooling is enabled.
13574    #[serde(default)]
13575    pub enabled: bool,
13576    /// Pool type: "physical_pooling", "notional_pooling", or "zero_balancing".
13577    #[serde(default = "default_pool_type")]
13578    pub pool_type: String,
13579    /// Time of day when sweeps occur (HH:MM format).
13580    #[serde(default = "default_sweep_time")]
13581    pub sweep_time: String,
13582}
13583
13584fn default_pool_type() -> String {
13585    "zero_balancing".to_string()
13586}
13587
13588fn default_sweep_time() -> String {
13589    "16:00".to_string()
13590}
13591
13592impl Default for CashPoolingConfig {
13593    fn default() -> Self {
13594        Self {
13595            enabled: false,
13596            pool_type: default_pool_type(),
13597            sweep_time: default_sweep_time(),
13598        }
13599    }
13600}
13601
13602/// Hedging configuration.
13603///
13604/// Controls generation of hedging instruments and hedge relationship designations
13605/// under ASC 815 / IFRS 9.
13606#[derive(Debug, Clone, Serialize, Deserialize)]
13607pub struct HedgingSchemaConfig {
13608    /// Whether hedging generation is enabled.
13609    #[serde(default)]
13610    pub enabled: bool,
13611    /// Target hedge ratio (0.0 to 1.0). Proportion of FX exposure to hedge.
13612    #[serde(default = "default_hedge_ratio")]
13613    pub hedge_ratio: f64,
13614    /// Types of instruments to generate (e.g., ["fx_forward", "interest_rate_swap"]).
13615    #[serde(default = "default_hedge_instruments")]
13616    pub instruments: Vec<String>,
13617    /// Whether to designate formal hedge accounting relationships.
13618    #[serde(default = "default_true")]
13619    pub hedge_accounting: bool,
13620    /// Effectiveness testing method: "dollar_offset", "regression", or "critical_terms".
13621    #[serde(default = "default_effectiveness_method")]
13622    pub effectiveness_method: String,
13623}
13624
13625fn default_hedge_ratio() -> f64 {
13626    0.75
13627}
13628
13629fn default_hedge_instruments() -> Vec<String> {
13630    vec!["fx_forward".to_string(), "interest_rate_swap".to_string()]
13631}
13632
13633fn default_effectiveness_method() -> String {
13634    "regression".to_string()
13635}
13636
13637impl Default for HedgingSchemaConfig {
13638    fn default() -> Self {
13639        Self {
13640            enabled: false,
13641            hedge_ratio: default_hedge_ratio(),
13642            instruments: default_hedge_instruments(),
13643            hedge_accounting: true,
13644            effectiveness_method: default_effectiveness_method(),
13645        }
13646    }
13647}
13648
13649/// Debt instrument configuration.
13650///
13651/// Controls generation of debt instruments (term loans, revolving credit, bonds)
13652/// with amortization schedules and financial covenants.
13653#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13654pub struct DebtSchemaConfig {
13655    /// Whether debt instrument generation is enabled.
13656    #[serde(default)]
13657    pub enabled: bool,
13658    /// Debt instrument definitions.
13659    #[serde(default)]
13660    pub instruments: Vec<DebtInstrumentDef>,
13661    /// Covenant definitions.
13662    #[serde(default)]
13663    pub covenants: Vec<CovenantDef>,
13664}
13665
13666/// Definition of a debt instrument in configuration.
13667#[derive(Debug, Clone, Serialize, Deserialize)]
13668pub struct DebtInstrumentDef {
13669    /// Instrument type: "term_loan", "revolving_credit", "bond", "commercial_paper", "bridge_loan".
13670    #[serde(rename = "type")]
13671    pub instrument_type: String,
13672    /// Principal amount (for term loans, bonds).
13673    #[serde(default)]
13674    pub principal: Option<f64>,
13675    /// Interest rate (annual, as decimal fraction).
13676    #[serde(default)]
13677    pub rate: Option<f64>,
13678    /// Maturity in months.
13679    #[serde(default)]
13680    pub maturity_months: Option<u32>,
13681    /// Facility limit (for revolving credit).
13682    #[serde(default)]
13683    pub facility: Option<f64>,
13684}
13685
13686/// Definition of a debt covenant in configuration.
13687#[derive(Debug, Clone, Serialize, Deserialize)]
13688pub struct CovenantDef {
13689    /// Covenant type: "debt_to_equity", "interest_coverage", "current_ratio",
13690    /// "net_worth", "debt_to_ebitda", "fixed_charge_coverage".
13691    #[serde(rename = "type")]
13692    pub covenant_type: String,
13693    /// Covenant threshold value.
13694    pub threshold: f64,
13695}
13696
13697/// Intercompany netting configuration.
13698///
13699/// Controls generation of multilateral netting runs.
13700#[derive(Debug, Clone, Serialize, Deserialize)]
13701pub struct NettingSchemaConfig {
13702    /// Whether netting generation is enabled.
13703    #[serde(default)]
13704    pub enabled: bool,
13705    /// Netting cycle: "daily", "weekly", or "monthly".
13706    #[serde(default = "default_netting_cycle")]
13707    pub cycle: String,
13708}
13709
13710fn default_netting_cycle() -> String {
13711    "monthly".to_string()
13712}
13713
13714impl Default for NettingSchemaConfig {
13715    fn default() -> Self {
13716        Self {
13717            enabled: false,
13718            cycle: default_netting_cycle(),
13719        }
13720    }
13721}
13722
13723/// Bank guarantee and letter of credit configuration.
13724///
13725/// Controls generation of bank guarantees, standby LCs, and performance bonds.
13726#[derive(Debug, Clone, Serialize, Deserialize)]
13727pub struct BankGuaranteeSchemaConfig {
13728    /// Whether bank guarantee generation is enabled.
13729    #[serde(default)]
13730    pub enabled: bool,
13731    /// Number of guarantees to generate.
13732    #[serde(default = "default_guarantee_count")]
13733    pub count: u32,
13734}
13735
13736fn default_guarantee_count() -> u32 {
13737    5
13738}
13739
13740impl Default for BankGuaranteeSchemaConfig {
13741    fn default() -> Self {
13742        Self {
13743            enabled: false,
13744            count: default_guarantee_count(),
13745        }
13746    }
13747}
13748
13749// ===========================================================================
13750// Project Accounting Configuration
13751// ===========================================================================
13752
13753/// Project accounting configuration.
13754///
13755/// Controls generation of project cost lines, revenue recognition,
13756/// milestones, change orders, retainage, and earned value metrics.
13757#[derive(Debug, Clone, Serialize, Deserialize)]
13758pub struct ProjectAccountingConfig {
13759    /// Whether project accounting is enabled.
13760    #[serde(default)]
13761    pub enabled: bool,
13762    /// Number of projects to generate.
13763    #[serde(default = "default_project_count")]
13764    pub project_count: u32,
13765    /// Distribution of project types (capital, internal, customer, r_and_d, maintenance, technology).
13766    #[serde(default)]
13767    pub project_types: ProjectTypeDistribution,
13768    /// WBS structure configuration.
13769    #[serde(default)]
13770    pub wbs: WbsSchemaConfig,
13771    /// Cost allocation rates (what % of source documents get project-tagged).
13772    #[serde(default)]
13773    pub cost_allocation: CostAllocationConfig,
13774    /// Revenue recognition configuration for project accounting.
13775    #[serde(default)]
13776    pub revenue_recognition: ProjectRevenueRecognitionConfig,
13777    /// Milestone configuration.
13778    #[serde(default)]
13779    pub milestones: MilestoneSchemaConfig,
13780    /// Change order configuration.
13781    #[serde(default)]
13782    pub change_orders: ChangeOrderSchemaConfig,
13783    /// Retainage configuration.
13784    #[serde(default)]
13785    pub retainage: RetainageSchemaConfig,
13786    /// Earned value management configuration.
13787    #[serde(default)]
13788    pub earned_value: EarnedValueSchemaConfig,
13789    /// Anomaly injection rate for project accounting data (0.0 to 1.0).
13790    #[serde(default = "default_project_anomaly_rate")]
13791    pub anomaly_rate: f64,
13792}
13793
13794fn default_project_count() -> u32 {
13795    10
13796}
13797
13798fn default_project_anomaly_rate() -> f64 {
13799    0.03
13800}
13801
13802impl Default for ProjectAccountingConfig {
13803    fn default() -> Self {
13804        Self {
13805            enabled: false,
13806            project_count: default_project_count(),
13807            project_types: ProjectTypeDistribution::default(),
13808            wbs: WbsSchemaConfig::default(),
13809            cost_allocation: CostAllocationConfig::default(),
13810            revenue_recognition: ProjectRevenueRecognitionConfig::default(),
13811            milestones: MilestoneSchemaConfig::default(),
13812            change_orders: ChangeOrderSchemaConfig::default(),
13813            retainage: RetainageSchemaConfig::default(),
13814            earned_value: EarnedValueSchemaConfig::default(),
13815            anomaly_rate: default_project_anomaly_rate(),
13816        }
13817    }
13818}
13819
13820/// Distribution of project types by weight.
13821#[derive(Debug, Clone, Serialize, Deserialize)]
13822pub struct ProjectTypeDistribution {
13823    /// Weight for capital projects (default 0.25).
13824    #[serde(default = "default_capital_weight")]
13825    pub capital: f64,
13826    /// Weight for internal projects (default 0.20).
13827    #[serde(default = "default_internal_weight")]
13828    pub internal: f64,
13829    /// Weight for customer projects (default 0.30).
13830    #[serde(default = "default_customer_weight")]
13831    pub customer: f64,
13832    /// Weight for R&D projects (default 0.10).
13833    #[serde(default = "default_rnd_weight")]
13834    pub r_and_d: f64,
13835    /// Weight for maintenance projects (default 0.10).
13836    #[serde(default = "default_maintenance_weight")]
13837    pub maintenance: f64,
13838    /// Weight for technology projects (default 0.05).
13839    #[serde(default = "default_technology_weight")]
13840    pub technology: f64,
13841}
13842
13843fn default_capital_weight() -> f64 {
13844    0.25
13845}
13846fn default_internal_weight() -> f64 {
13847    0.20
13848}
13849fn default_customer_weight() -> f64 {
13850    0.30
13851}
13852fn default_rnd_weight() -> f64 {
13853    0.10
13854}
13855fn default_maintenance_weight() -> f64 {
13856    0.10
13857}
13858fn default_technology_weight() -> f64 {
13859    0.05
13860}
13861
13862impl Default for ProjectTypeDistribution {
13863    fn default() -> Self {
13864        Self {
13865            capital: default_capital_weight(),
13866            internal: default_internal_weight(),
13867            customer: default_customer_weight(),
13868            r_and_d: default_rnd_weight(),
13869            maintenance: default_maintenance_weight(),
13870            technology: default_technology_weight(),
13871        }
13872    }
13873}
13874
13875/// WBS structure configuration.
13876#[derive(Debug, Clone, Serialize, Deserialize)]
13877pub struct WbsSchemaConfig {
13878    /// Maximum depth of WBS hierarchy (default 3).
13879    #[serde(default = "default_wbs_max_depth")]
13880    pub max_depth: u32,
13881    /// Minimum elements per level-1 WBS (default 2).
13882    #[serde(default = "default_wbs_min_elements")]
13883    pub min_elements_per_level: u32,
13884    /// Maximum elements per level-1 WBS (default 6).
13885    #[serde(default = "default_wbs_max_elements")]
13886    pub max_elements_per_level: u32,
13887}
13888
13889fn default_wbs_max_depth() -> u32 {
13890    3
13891}
13892fn default_wbs_min_elements() -> u32 {
13893    2
13894}
13895fn default_wbs_max_elements() -> u32 {
13896    6
13897}
13898
13899impl Default for WbsSchemaConfig {
13900    fn default() -> Self {
13901        Self {
13902            max_depth: default_wbs_max_depth(),
13903            min_elements_per_level: default_wbs_min_elements(),
13904            max_elements_per_level: default_wbs_max_elements(),
13905        }
13906    }
13907}
13908
13909/// Cost allocation rates — what fraction of each document type gets linked to a project.
13910#[derive(Debug, Clone, Serialize, Deserialize)]
13911pub struct CostAllocationConfig {
13912    /// Fraction of time entries assigned to projects (0.0 to 1.0).
13913    #[serde(default = "default_time_entry_rate")]
13914    pub time_entry_project_rate: f64,
13915    /// Fraction of expense reports assigned to projects (0.0 to 1.0).
13916    #[serde(default = "default_expense_rate")]
13917    pub expense_project_rate: f64,
13918    /// Fraction of purchase orders assigned to projects (0.0 to 1.0).
13919    #[serde(default = "default_po_rate")]
13920    pub purchase_order_project_rate: f64,
13921    /// Fraction of vendor invoices assigned to projects (0.0 to 1.0).
13922    #[serde(default = "default_vi_rate")]
13923    pub vendor_invoice_project_rate: f64,
13924}
13925
13926fn default_time_entry_rate() -> f64 {
13927    0.60
13928}
13929fn default_expense_rate() -> f64 {
13930    0.30
13931}
13932fn default_po_rate() -> f64 {
13933    0.40
13934}
13935fn default_vi_rate() -> f64 {
13936    0.35
13937}
13938
13939impl Default for CostAllocationConfig {
13940    fn default() -> Self {
13941        Self {
13942            time_entry_project_rate: default_time_entry_rate(),
13943            expense_project_rate: default_expense_rate(),
13944            purchase_order_project_rate: default_po_rate(),
13945            vendor_invoice_project_rate: default_vi_rate(),
13946        }
13947    }
13948}
13949
13950/// Revenue recognition configuration for project accounting.
13951#[derive(Debug, Clone, Serialize, Deserialize)]
13952pub struct ProjectRevenueRecognitionConfig {
13953    /// Whether revenue recognition is enabled for customer projects.
13954    #[serde(default = "default_true")]
13955    pub enabled: bool,
13956    /// Default method: "percentage_of_completion", "completed_contract", "milestone_based".
13957    #[serde(default = "default_revenue_method")]
13958    pub method: String,
13959    /// Default completion measure: "cost_to_cost", "labor_hours", "physical_completion".
13960    #[serde(default = "default_completion_measure")]
13961    pub completion_measure: String,
13962    /// Average contract value for customer projects.
13963    #[serde(default = "default_avg_contract_value")]
13964    pub avg_contract_value: f64,
13965}
13966
13967fn default_revenue_method() -> String {
13968    "percentage_of_completion".to_string()
13969}
13970fn default_completion_measure() -> String {
13971    "cost_to_cost".to_string()
13972}
13973fn default_avg_contract_value() -> f64 {
13974    500_000.0
13975}
13976
13977impl Default for ProjectRevenueRecognitionConfig {
13978    fn default() -> Self {
13979        Self {
13980            enabled: true,
13981            method: default_revenue_method(),
13982            completion_measure: default_completion_measure(),
13983            avg_contract_value: default_avg_contract_value(),
13984        }
13985    }
13986}
13987
13988/// Milestone configuration.
13989#[derive(Debug, Clone, Serialize, Deserialize)]
13990pub struct MilestoneSchemaConfig {
13991    /// Whether milestone generation is enabled.
13992    #[serde(default = "default_true")]
13993    pub enabled: bool,
13994    /// Average number of milestones per project.
13995    #[serde(default = "default_milestones_per_project")]
13996    pub avg_per_project: u32,
13997    /// Fraction of milestones that are payment milestones (0.0 to 1.0).
13998    #[serde(default = "default_payment_milestone_rate")]
13999    pub payment_milestone_rate: f64,
14000}
14001
14002fn default_milestones_per_project() -> u32 {
14003    4
14004}
14005fn default_payment_milestone_rate() -> f64 {
14006    0.50
14007}
14008
14009impl Default for MilestoneSchemaConfig {
14010    fn default() -> Self {
14011        Self {
14012            enabled: true,
14013            avg_per_project: default_milestones_per_project(),
14014            payment_milestone_rate: default_payment_milestone_rate(),
14015        }
14016    }
14017}
14018
14019/// Change order configuration.
14020#[derive(Debug, Clone, Serialize, Deserialize)]
14021pub struct ChangeOrderSchemaConfig {
14022    /// Whether change order generation is enabled.
14023    #[serde(default = "default_true")]
14024    pub enabled: bool,
14025    /// Probability that a project will have at least one change order (0.0 to 1.0).
14026    #[serde(default = "default_change_order_probability")]
14027    pub probability: f64,
14028    /// Maximum change orders per project.
14029    #[serde(default = "default_max_change_orders")]
14030    pub max_per_project: u32,
14031    /// Approval rate for change orders (0.0 to 1.0).
14032    #[serde(default = "default_change_order_approval_rate")]
14033    pub approval_rate: f64,
14034}
14035
14036fn default_change_order_probability() -> f64 {
14037    0.40
14038}
14039fn default_max_change_orders() -> u32 {
14040    3
14041}
14042fn default_change_order_approval_rate() -> f64 {
14043    0.75
14044}
14045
14046impl Default for ChangeOrderSchemaConfig {
14047    fn default() -> Self {
14048        Self {
14049            enabled: true,
14050            probability: default_change_order_probability(),
14051            max_per_project: default_max_change_orders(),
14052            approval_rate: default_change_order_approval_rate(),
14053        }
14054    }
14055}
14056
14057/// Retainage configuration.
14058#[derive(Debug, Clone, Serialize, Deserialize)]
14059pub struct RetainageSchemaConfig {
14060    /// Whether retainage is enabled.
14061    #[serde(default)]
14062    pub enabled: bool,
14063    /// Default retainage percentage (0.0 to 1.0, e.g., 0.10 for 10%).
14064    #[serde(default = "default_retainage_pct")]
14065    pub default_percentage: f64,
14066}
14067
14068fn default_retainage_pct() -> f64 {
14069    0.10
14070}
14071
14072impl Default for RetainageSchemaConfig {
14073    fn default() -> Self {
14074        Self {
14075            enabled: false,
14076            default_percentage: default_retainage_pct(),
14077        }
14078    }
14079}
14080
14081/// Earned value management (EVM) configuration.
14082#[derive(Debug, Clone, Serialize, Deserialize)]
14083pub struct EarnedValueSchemaConfig {
14084    /// Whether EVM metrics are generated.
14085    #[serde(default = "default_true")]
14086    pub enabled: bool,
14087    /// Measurement frequency: "weekly", "biweekly", "monthly".
14088    #[serde(default = "default_evm_frequency")]
14089    pub frequency: String,
14090}
14091
14092fn default_evm_frequency() -> String {
14093    "monthly".to_string()
14094}
14095
14096impl Default for EarnedValueSchemaConfig {
14097    fn default() -> Self {
14098        Self {
14099            enabled: true,
14100            frequency: default_evm_frequency(),
14101        }
14102    }
14103}
14104
14105// =============================================================================
14106// ESG / Sustainability Configuration
14107// =============================================================================
14108
14109/// Top-level ESG / sustainability reporting configuration.
14110#[derive(Debug, Clone, Serialize, Deserialize)]
14111pub struct EsgConfig {
14112    /// Whether ESG generation is enabled.
14113    #[serde(default)]
14114    pub enabled: bool,
14115    /// Environmental metrics (emissions, energy, water, waste).
14116    #[serde(default)]
14117    pub environmental: EnvironmentalConfig,
14118    /// Social metrics (diversity, pay equity, safety).
14119    #[serde(default)]
14120    pub social: SocialConfig,
14121    /// Governance metrics (board composition, ethics, compliance).
14122    #[serde(default)]
14123    pub governance: GovernanceSchemaConfig,
14124    /// Supply-chain ESG assessment settings.
14125    #[serde(default)]
14126    pub supply_chain_esg: SupplyChainEsgConfig,
14127    /// ESG reporting / disclosure framework settings.
14128    #[serde(default)]
14129    pub reporting: EsgReportingConfig,
14130    /// Climate scenario analysis settings.
14131    #[serde(default)]
14132    pub climate_scenarios: ClimateScenarioConfig,
14133    /// Anomaly injection rate for ESG data (0.0 to 1.0).
14134    #[serde(default = "default_esg_anomaly_rate")]
14135    pub anomaly_rate: f64,
14136}
14137
14138fn default_esg_anomaly_rate() -> f64 {
14139    0.02
14140}
14141
14142impl Default for EsgConfig {
14143    fn default() -> Self {
14144        Self {
14145            enabled: false,
14146            environmental: EnvironmentalConfig::default(),
14147            social: SocialConfig::default(),
14148            governance: GovernanceSchemaConfig::default(),
14149            supply_chain_esg: SupplyChainEsgConfig::default(),
14150            reporting: EsgReportingConfig::default(),
14151            climate_scenarios: ClimateScenarioConfig::default(),
14152            anomaly_rate: default_esg_anomaly_rate(),
14153        }
14154    }
14155}
14156
14157/// Country pack configuration.
14158///
14159/// Controls where to load additional country packs and per-country overrides.
14160/// When omitted, only the built-in packs (_default, US, DE, GB) are used.
14161#[derive(Debug, Clone, Serialize, Deserialize, Default)]
14162pub struct CountryPacksSchemaConfig {
14163    /// Optional directory containing additional `*.json` country packs.
14164    #[serde(default)]
14165    pub external_dir: Option<PathBuf>,
14166    /// Per-country overrides applied after loading.
14167    /// Keys are ISO 3166-1 alpha-2 codes; values are partial JSON objects
14168    /// that are deep-merged on top of the loaded pack.
14169    #[serde(default)]
14170    pub overrides: std::collections::HashMap<String, serde_json::Value>,
14171}
14172
14173/// Environmental metrics configuration.
14174#[derive(Debug, Clone, Serialize, Deserialize)]
14175pub struct EnvironmentalConfig {
14176    /// Whether environmental metrics are generated.
14177    #[serde(default = "default_true")]
14178    pub enabled: bool,
14179    /// Scope 1 (direct) emission generation settings.
14180    #[serde(default)]
14181    pub scope1: EmissionScopeConfig,
14182    /// Scope 2 (purchased energy) emission generation settings.
14183    #[serde(default)]
14184    pub scope2: EmissionScopeConfig,
14185    /// Scope 3 (value chain) emission generation settings.
14186    #[serde(default)]
14187    pub scope3: Scope3Config,
14188    /// Energy consumption tracking settings.
14189    #[serde(default)]
14190    pub energy: EnergySchemaConfig,
14191    /// Water usage tracking settings.
14192    #[serde(default)]
14193    pub water: WaterSchemaConfig,
14194    /// Waste management tracking settings.
14195    #[serde(default)]
14196    pub waste: WasteSchemaConfig,
14197}
14198
14199impl Default for EnvironmentalConfig {
14200    fn default() -> Self {
14201        Self {
14202            enabled: true,
14203            scope1: EmissionScopeConfig::default(),
14204            scope2: EmissionScopeConfig::default(),
14205            scope3: Scope3Config::default(),
14206            energy: EnergySchemaConfig::default(),
14207            water: WaterSchemaConfig::default(),
14208            waste: WasteSchemaConfig::default(),
14209        }
14210    }
14211}
14212
14213/// Configuration for a single emission scope (Scope 1 or 2).
14214#[derive(Debug, Clone, Serialize, Deserialize)]
14215pub struct EmissionScopeConfig {
14216    /// Whether this scope is enabled.
14217    #[serde(default = "default_true")]
14218    pub enabled: bool,
14219    /// Emission factor region (e.g., "US", "EU", "global").
14220    #[serde(default = "default_emission_region")]
14221    pub factor_region: String,
14222}
14223
14224fn default_emission_region() -> String {
14225    "US".to_string()
14226}
14227
14228impl Default for EmissionScopeConfig {
14229    fn default() -> Self {
14230        Self {
14231            enabled: true,
14232            factor_region: default_emission_region(),
14233        }
14234    }
14235}
14236
14237/// Scope 3 (value chain) emission configuration.
14238#[derive(Debug, Clone, Serialize, Deserialize)]
14239pub struct Scope3Config {
14240    /// Whether Scope 3 emissions are generated.
14241    #[serde(default = "default_true")]
14242    pub enabled: bool,
14243    /// Categories to include (e.g., "purchased_goods", "business_travel", "commuting").
14244    #[serde(default = "default_scope3_categories")]
14245    pub categories: Vec<String>,
14246    /// Spend-based emission intensity (kg CO2e per USD).
14247    #[serde(default = "default_spend_intensity")]
14248    pub default_spend_intensity_kg_per_usd: f64,
14249}
14250
14251fn default_scope3_categories() -> Vec<String> {
14252    vec![
14253        "purchased_goods".to_string(),
14254        "business_travel".to_string(),
14255        "employee_commuting".to_string(),
14256    ]
14257}
14258
14259fn default_spend_intensity() -> f64 {
14260    0.5
14261}
14262
14263impl Default for Scope3Config {
14264    fn default() -> Self {
14265        Self {
14266            enabled: true,
14267            categories: default_scope3_categories(),
14268            default_spend_intensity_kg_per_usd: default_spend_intensity(),
14269        }
14270    }
14271}
14272
14273/// Energy consumption configuration.
14274#[derive(Debug, Clone, Serialize, Deserialize)]
14275pub struct EnergySchemaConfig {
14276    /// Whether energy consumption tracking is enabled.
14277    #[serde(default = "default_true")]
14278    pub enabled: bool,
14279    /// Number of facilities to generate.
14280    #[serde(default = "default_facility_count")]
14281    pub facility_count: u32,
14282    /// Target percentage of energy from renewable sources (0.0 to 1.0).
14283    #[serde(default = "default_renewable_target")]
14284    pub renewable_target: f64,
14285}
14286
14287fn default_facility_count() -> u32 {
14288    5
14289}
14290
14291fn default_renewable_target() -> f64 {
14292    0.30
14293}
14294
14295impl Default for EnergySchemaConfig {
14296    fn default() -> Self {
14297        Self {
14298            enabled: true,
14299            facility_count: default_facility_count(),
14300            renewable_target: default_renewable_target(),
14301        }
14302    }
14303}
14304
14305/// Water usage configuration.
14306#[derive(Debug, Clone, Serialize, Deserialize)]
14307pub struct WaterSchemaConfig {
14308    /// Whether water usage tracking is enabled.
14309    #[serde(default = "default_true")]
14310    pub enabled: bool,
14311    /// Number of facilities with water tracking.
14312    #[serde(default = "default_water_facility_count")]
14313    pub facility_count: u32,
14314}
14315
14316fn default_water_facility_count() -> u32 {
14317    3
14318}
14319
14320impl Default for WaterSchemaConfig {
14321    fn default() -> Self {
14322        Self {
14323            enabled: true,
14324            facility_count: default_water_facility_count(),
14325        }
14326    }
14327}
14328
14329/// Waste management configuration.
14330#[derive(Debug, Clone, Serialize, Deserialize)]
14331pub struct WasteSchemaConfig {
14332    /// Whether waste tracking is enabled.
14333    #[serde(default = "default_true")]
14334    pub enabled: bool,
14335    /// Target diversion rate (0.0 to 1.0).
14336    #[serde(default = "default_diversion_target")]
14337    pub diversion_target: f64,
14338}
14339
14340fn default_diversion_target() -> f64 {
14341    0.50
14342}
14343
14344impl Default for WasteSchemaConfig {
14345    fn default() -> Self {
14346        Self {
14347            enabled: true,
14348            diversion_target: default_diversion_target(),
14349        }
14350    }
14351}
14352
14353/// Social metrics configuration.
14354#[derive(Debug, Clone, Serialize, Deserialize)]
14355pub struct SocialConfig {
14356    /// Whether social metrics are generated.
14357    #[serde(default = "default_true")]
14358    pub enabled: bool,
14359    /// Workforce diversity tracking settings.
14360    #[serde(default)]
14361    pub diversity: DiversitySchemaConfig,
14362    /// Pay equity analysis settings.
14363    #[serde(default)]
14364    pub pay_equity: PayEquitySchemaConfig,
14365    /// Safety incident and metrics settings.
14366    #[serde(default)]
14367    pub safety: SafetySchemaConfig,
14368}
14369
14370impl Default for SocialConfig {
14371    fn default() -> Self {
14372        Self {
14373            enabled: true,
14374            diversity: DiversitySchemaConfig::default(),
14375            pay_equity: PayEquitySchemaConfig::default(),
14376            safety: SafetySchemaConfig::default(),
14377        }
14378    }
14379}
14380
14381/// Workforce diversity configuration.
14382#[derive(Debug, Clone, Serialize, Deserialize)]
14383pub struct DiversitySchemaConfig {
14384    /// Whether diversity metrics are generated.
14385    #[serde(default = "default_true")]
14386    pub enabled: bool,
14387    /// Dimensions to track (e.g., "gender", "ethnicity", "age_group").
14388    #[serde(default = "default_diversity_dimensions")]
14389    pub dimensions: Vec<String>,
14390}
14391
14392fn default_diversity_dimensions() -> Vec<String> {
14393    vec![
14394        "gender".to_string(),
14395        "ethnicity".to_string(),
14396        "age_group".to_string(),
14397    ]
14398}
14399
14400impl Default for DiversitySchemaConfig {
14401    fn default() -> Self {
14402        Self {
14403            enabled: true,
14404            dimensions: default_diversity_dimensions(),
14405        }
14406    }
14407}
14408
14409/// Pay equity analysis configuration.
14410#[derive(Debug, Clone, Serialize, Deserialize)]
14411pub struct PayEquitySchemaConfig {
14412    /// Whether pay equity analysis is generated.
14413    #[serde(default = "default_true")]
14414    pub enabled: bool,
14415    /// Target pay gap threshold for flagging (e.g., 0.05 = 5% gap).
14416    #[serde(default = "default_pay_gap_threshold")]
14417    pub gap_threshold: f64,
14418}
14419
14420fn default_pay_gap_threshold() -> f64 {
14421    0.05
14422}
14423
14424impl Default for PayEquitySchemaConfig {
14425    fn default() -> Self {
14426        Self {
14427            enabled: true,
14428            gap_threshold: default_pay_gap_threshold(),
14429        }
14430    }
14431}
14432
14433/// Safety metrics configuration.
14434#[derive(Debug, Clone, Serialize, Deserialize)]
14435pub struct SafetySchemaConfig {
14436    /// Whether safety metrics are generated.
14437    #[serde(default = "default_true")]
14438    pub enabled: bool,
14439    /// Average annual recordable incidents per 200,000 hours.
14440    #[serde(default = "default_trir_target")]
14441    pub target_trir: f64,
14442    /// Number of safety incidents to generate.
14443    #[serde(default = "default_incident_count")]
14444    pub incident_count: u32,
14445}
14446
14447fn default_trir_target() -> f64 {
14448    2.5
14449}
14450
14451fn default_incident_count() -> u32 {
14452    20
14453}
14454
14455impl Default for SafetySchemaConfig {
14456    fn default() -> Self {
14457        Self {
14458            enabled: true,
14459            target_trir: default_trir_target(),
14460            incident_count: default_incident_count(),
14461        }
14462    }
14463}
14464
14465/// Governance metrics configuration.
14466#[derive(Debug, Clone, Serialize, Deserialize)]
14467pub struct GovernanceSchemaConfig {
14468    /// Whether governance metrics are generated.
14469    #[serde(default = "default_true")]
14470    pub enabled: bool,
14471    /// Number of board members.
14472    #[serde(default = "default_board_size")]
14473    pub board_size: u32,
14474    /// Target independent director ratio (0.0 to 1.0).
14475    #[serde(default = "default_independence_target")]
14476    pub independence_target: f64,
14477}
14478
14479fn default_board_size() -> u32 {
14480    11
14481}
14482
14483fn default_independence_target() -> f64 {
14484    0.67
14485}
14486
14487impl Default for GovernanceSchemaConfig {
14488    fn default() -> Self {
14489        Self {
14490            enabled: true,
14491            board_size: default_board_size(),
14492            independence_target: default_independence_target(),
14493        }
14494    }
14495}
14496
14497/// Supply-chain ESG assessment configuration.
14498#[derive(Debug, Clone, Serialize, Deserialize)]
14499pub struct SupplyChainEsgConfig {
14500    /// Whether supply chain ESG assessments are generated.
14501    #[serde(default = "default_true")]
14502    pub enabled: bool,
14503    /// Proportion of vendors to assess (0.0 to 1.0).
14504    #[serde(default = "default_assessment_coverage")]
14505    pub assessment_coverage: f64,
14506    /// High-risk country codes for automatic flagging.
14507    #[serde(default = "default_high_risk_countries")]
14508    pub high_risk_countries: Vec<String>,
14509}
14510
14511fn default_assessment_coverage() -> f64 {
14512    0.80
14513}
14514
14515fn default_high_risk_countries() -> Vec<String> {
14516    vec!["CN".to_string(), "BD".to_string(), "MM".to_string()]
14517}
14518
14519impl Default for SupplyChainEsgConfig {
14520    fn default() -> Self {
14521        Self {
14522            enabled: true,
14523            assessment_coverage: default_assessment_coverage(),
14524            high_risk_countries: default_high_risk_countries(),
14525        }
14526    }
14527}
14528
14529/// ESG reporting / disclosure framework configuration.
14530#[derive(Debug, Clone, Serialize, Deserialize)]
14531pub struct EsgReportingConfig {
14532    /// Whether ESG disclosures are generated.
14533    #[serde(default = "default_true")]
14534    pub enabled: bool,
14535    /// Frameworks to generate disclosures for.
14536    #[serde(default = "default_esg_frameworks")]
14537    pub frameworks: Vec<String>,
14538    /// Whether materiality assessment is performed.
14539    #[serde(default = "default_true")]
14540    pub materiality_assessment: bool,
14541    /// Materiality threshold for impact dimension (0.0 to 1.0).
14542    #[serde(default = "default_materiality_threshold")]
14543    pub impact_threshold: f64,
14544    /// Materiality threshold for financial dimension (0.0 to 1.0).
14545    #[serde(default = "default_materiality_threshold")]
14546    pub financial_threshold: f64,
14547}
14548
14549fn default_esg_frameworks() -> Vec<String> {
14550    vec!["GRI".to_string(), "ESRS".to_string()]
14551}
14552
14553fn default_materiality_threshold() -> f64 {
14554    0.6
14555}
14556
14557impl Default for EsgReportingConfig {
14558    fn default() -> Self {
14559        Self {
14560            enabled: true,
14561            frameworks: default_esg_frameworks(),
14562            materiality_assessment: true,
14563            impact_threshold: default_materiality_threshold(),
14564            financial_threshold: default_materiality_threshold(),
14565        }
14566    }
14567}
14568
14569/// Climate scenario analysis configuration.
14570#[derive(Debug, Clone, Serialize, Deserialize)]
14571pub struct ClimateScenarioConfig {
14572    /// Whether climate scenario analysis is generated.
14573    #[serde(default)]
14574    pub enabled: bool,
14575    /// Scenarios to model (e.g., "net_zero_2050", "stated_policies", "current_trajectory").
14576    #[serde(default = "default_climate_scenarios")]
14577    pub scenarios: Vec<String>,
14578    /// Time horizons in years to project.
14579    #[serde(default = "default_time_horizons")]
14580    pub time_horizons: Vec<u32>,
14581}
14582
14583fn default_climate_scenarios() -> Vec<String> {
14584    vec![
14585        "net_zero_2050".to_string(),
14586        "stated_policies".to_string(),
14587        "current_trajectory".to_string(),
14588    ]
14589}
14590
14591fn default_time_horizons() -> Vec<u32> {
14592    vec![5, 10, 30]
14593}
14594
14595impl Default for ClimateScenarioConfig {
14596    fn default() -> Self {
14597        Self {
14598            enabled: false,
14599            scenarios: default_climate_scenarios(),
14600            time_horizons: default_time_horizons(),
14601        }
14602    }
14603}
14604
14605// ===== Counterfactual Simulation Scenarios =====
14606
14607/// Configuration for counterfactual simulation scenarios.
14608#[derive(Debug, Clone, Serialize, Deserialize, Default)]
14609pub struct ScenariosConfig {
14610    /// Whether scenario generation is enabled.
14611    #[serde(default)]
14612    pub enabled: bool,
14613    /// List of scenario definitions.
14614    #[serde(default)]
14615    pub scenarios: Vec<ScenarioSchemaConfig>,
14616    /// Causal model configuration.
14617    #[serde(default)]
14618    pub causal_model: CausalModelSchemaConfig,
14619    /// Default settings applied to all scenarios.
14620    #[serde(default)]
14621    pub defaults: ScenarioDefaultsConfig,
14622    /// Generate counterfactual (original, mutated) JE pairs for ML training.
14623    /// When true, the orchestrator produces paired clean/anomalous journal entries.
14624    #[serde(default)]
14625    pub generate_counterfactuals: bool,
14626}
14627
14628/// A single scenario definition in the config.
14629#[derive(Debug, Clone, Serialize, Deserialize)]
14630pub struct ScenarioSchemaConfig {
14631    /// Scenario name (must be unique).
14632    pub name: String,
14633    /// Human-readable description.
14634    #[serde(default)]
14635    pub description: String,
14636    /// Tags for categorization.
14637    #[serde(default)]
14638    pub tags: Vec<String>,
14639    /// Base scenario name (None = default config).
14640    pub base: Option<String>,
14641    /// IFRS 9-style probability weight.
14642    pub probability_weight: Option<f64>,
14643    /// List of interventions to apply.
14644    #[serde(default)]
14645    pub interventions: Vec<InterventionSchemaConfig>,
14646    /// Constraint overrides for this scenario.
14647    #[serde(default)]
14648    pub constraints: ScenarioConstraintsSchemaConfig,
14649    /// Output configuration for this scenario.
14650    #[serde(default)]
14651    pub output: ScenarioOutputSchemaConfig,
14652    /// Arbitrary metadata.
14653    #[serde(default)]
14654    pub metadata: std::collections::HashMap<String, String>,
14655}
14656
14657/// An intervention definition in the config.
14658#[derive(Debug, Clone, Serialize, Deserialize)]
14659pub struct InterventionSchemaConfig {
14660    /// Intervention type and parameters (flattened tagged enum).
14661    #[serde(flatten)]
14662    pub intervention_type: serde_json::Value,
14663    /// Timing configuration.
14664    #[serde(default)]
14665    pub timing: InterventionTimingSchemaConfig,
14666    /// Human-readable label.
14667    pub label: Option<String>,
14668    /// Priority for conflict resolution (higher wins).
14669    #[serde(default)]
14670    pub priority: u32,
14671}
14672
14673/// Timing configuration for an intervention.
14674#[derive(Debug, Clone, Serialize, Deserialize)]
14675pub struct InterventionTimingSchemaConfig {
14676    /// Month offset from start (1-indexed).
14677    #[serde(default = "default_start_month")]
14678    pub start_month: u32,
14679    /// Duration in months.
14680    pub duration_months: Option<u32>,
14681    /// Onset type: "sudden", "gradual", "oscillating", "custom".
14682    #[serde(default = "default_onset")]
14683    pub onset: String,
14684    /// Ramp period in months.
14685    pub ramp_months: Option<u32>,
14686}
14687
14688fn default_start_month() -> u32 {
14689    1
14690}
14691
14692fn default_onset() -> String {
14693    "sudden".to_string()
14694}
14695
14696impl Default for InterventionTimingSchemaConfig {
14697    fn default() -> Self {
14698        Self {
14699            start_month: 1,
14700            duration_months: None,
14701            onset: "sudden".to_string(),
14702            ramp_months: None,
14703        }
14704    }
14705}
14706
14707/// Scenario constraint overrides.
14708#[derive(Debug, Clone, Serialize, Deserialize)]
14709pub struct ScenarioConstraintsSchemaConfig {
14710    #[serde(default = "default_true")]
14711    pub preserve_accounting_identity: bool,
14712    #[serde(default = "default_true")]
14713    pub preserve_document_chains: bool,
14714    #[serde(default = "default_true")]
14715    pub preserve_period_close: bool,
14716    #[serde(default = "default_true")]
14717    pub preserve_balance_coherence: bool,
14718    #[serde(default)]
14719    pub custom: Vec<CustomConstraintSchemaConfig>,
14720}
14721
14722impl Default for ScenarioConstraintsSchemaConfig {
14723    fn default() -> Self {
14724        Self {
14725            preserve_accounting_identity: true,
14726            preserve_document_chains: true,
14727            preserve_period_close: true,
14728            preserve_balance_coherence: true,
14729            custom: Vec::new(),
14730        }
14731    }
14732}
14733
14734/// Custom constraint in config.
14735#[derive(Debug, Clone, Serialize, Deserialize)]
14736pub struct CustomConstraintSchemaConfig {
14737    pub config_path: String,
14738    pub min: Option<f64>,
14739    pub max: Option<f64>,
14740    #[serde(default)]
14741    pub description: String,
14742}
14743
14744/// Output configuration for a scenario.
14745#[derive(Debug, Clone, Serialize, Deserialize)]
14746pub struct ScenarioOutputSchemaConfig {
14747    #[serde(default = "default_true")]
14748    pub paired: bool,
14749    #[serde(default = "default_diff_formats_schema")]
14750    pub diff_formats: Vec<String>,
14751    #[serde(default)]
14752    pub diff_scope: Vec<String>,
14753}
14754
14755fn default_diff_formats_schema() -> Vec<String> {
14756    vec!["summary".to_string(), "aggregate".to_string()]
14757}
14758
14759impl Default for ScenarioOutputSchemaConfig {
14760    fn default() -> Self {
14761        Self {
14762            paired: true,
14763            diff_formats: default_diff_formats_schema(),
14764            diff_scope: Vec::new(),
14765        }
14766    }
14767}
14768
14769/// Causal model configuration.
14770#[derive(Debug, Clone, Serialize, Deserialize)]
14771pub struct CausalModelSchemaConfig {
14772    /// Preset name: "default", "minimal", or "custom".
14773    #[serde(default = "default_causal_preset")]
14774    pub preset: String,
14775    /// Custom nodes (merged with preset).
14776    #[serde(default)]
14777    pub nodes: Vec<serde_json::Value>,
14778    /// Custom edges (merged with preset).
14779    #[serde(default)]
14780    pub edges: Vec<serde_json::Value>,
14781}
14782
14783fn default_causal_preset() -> String {
14784    "default".to_string()
14785}
14786
14787impl Default for CausalModelSchemaConfig {
14788    fn default() -> Self {
14789        Self {
14790            preset: "default".to_string(),
14791            nodes: Vec::new(),
14792            edges: Vec::new(),
14793        }
14794    }
14795}
14796
14797/// Default settings applied to all scenarios.
14798#[derive(Debug, Clone, Serialize, Deserialize, Default)]
14799pub struct ScenarioDefaultsConfig {
14800    #[serde(default)]
14801    pub constraints: ScenarioConstraintsSchemaConfig,
14802    #[serde(default)]
14803    pub output: ScenarioOutputSchemaConfig,
14804}
14805
14806// =====================================================================
14807// Compliance Regulations Framework Configuration
14808// =====================================================================
14809
14810/// Top-level configuration for the compliance regulations framework.
14811///
14812/// Controls standards registry, jurisdiction profiles, temporal versioning,
14813/// audit procedure templates, compliance graph integration, and output settings.
14814///
14815/// # Example
14816///
14817/// ```yaml
14818/// compliance_regulations:
14819///   enabled: true
14820///   jurisdictions: [US, DE, GB]
14821///   reference_date: "2025-06-30"
14822///   standards_selection:
14823///     categories: [accounting, auditing, regulatory]
14824///     include: ["IFRS-16", "ASC-606"]
14825///   audit_procedures:
14826///     enabled: true
14827///     procedures_per_standard: 3
14828///   findings:
14829///     enabled: true
14830///     finding_rate: 0.05
14831///   filings:
14832///     enabled: true
14833///   graph:
14834///     enabled: true
14835///     include_compliance_nodes: true
14836///     include_compliance_edges: true
14837/// ```
14838#[derive(Debug, Clone, Default, Serialize, Deserialize)]
14839pub struct ComplianceRegulationsConfig {
14840    /// Master switch for the compliance regulations framework.
14841    #[serde(default)]
14842    pub enabled: bool,
14843    /// Jurisdictions to generate compliance data for (ISO 3166-1 alpha-2 codes).
14844    /// If empty, inferred from company countries in the config.
14845    #[serde(default)]
14846    pub jurisdictions: Vec<String>,
14847    /// Reference date for temporal standard resolution (YYYY-MM-DD).
14848    /// Defaults to the global start_date if not set.
14849    #[serde(default)]
14850    pub reference_date: Option<String>,
14851    /// Standards selection filters.
14852    #[serde(default)]
14853    pub standards_selection: StandardsSelectionConfig,
14854    /// Audit procedure generation settings.
14855    #[serde(default)]
14856    pub audit_procedures: AuditProcedureGenConfig,
14857    /// Compliance finding generation settings.
14858    #[serde(default)]
14859    pub findings: ComplianceFindingGenConfig,
14860    /// Regulatory filing generation settings.
14861    #[serde(default)]
14862    pub filings: ComplianceFilingGenConfig,
14863    /// Compliance graph integration settings.
14864    #[serde(default)]
14865    pub graph: ComplianceGraphConfig,
14866    /// Output settings for compliance-specific files.
14867    #[serde(default)]
14868    pub output: ComplianceOutputConfig,
14869    /// v3.3.0: legal-document generation (engagement letters,
14870    /// management reps, legal opinions, regulatory filings, board
14871    /// resolutions). Requires `compliance_regulations.enabled = true`
14872    /// AND `legal_documents.enabled = true` to take effect.
14873    #[serde(default)]
14874    pub legal_documents: LegalDocumentsConfig,
14875}
14876
14877/// Legal-document generation settings (v3.3.0+).
14878///
14879/// Wires `LegalDocumentGenerator` into the orchestrator. Generates one
14880/// batch per audit engagement when enabled.
14881#[derive(Debug, Clone, Serialize, Deserialize)]
14882pub struct LegalDocumentsConfig {
14883    /// Master switch.
14884    #[serde(default)]
14885    pub enabled: bool,
14886    /// Probability of including a legal-opinion document in an engagement.
14887    #[serde(default = "default_legal_opinion_probability")]
14888    pub legal_opinion_probability: f64,
14889}
14890
14891fn default_legal_opinion_probability() -> f64 {
14892    0.40
14893}
14894
14895impl Default for LegalDocumentsConfig {
14896    fn default() -> Self {
14897        Self {
14898            enabled: false,
14899            legal_opinion_probability: default_legal_opinion_probability(),
14900        }
14901    }
14902}
14903
14904/// Filters which standards are included in the generation.
14905#[derive(Debug, Clone, Default, Serialize, Deserialize)]
14906pub struct StandardsSelectionConfig {
14907    /// Standard categories to include (accounting, auditing, regulatory, tax, esg).
14908    /// Empty = all categories.
14909    #[serde(default)]
14910    pub categories: Vec<String>,
14911    /// Explicit standard IDs to include (e.g., ["IFRS-16", "ASC-606"]).
14912    /// When non-empty, only these standards (plus mandatory ones for selected jurisdictions) are used.
14913    #[serde(default)]
14914    pub include: Vec<String>,
14915    /// Standard IDs to exclude.
14916    #[serde(default)]
14917    pub exclude: Vec<String>,
14918    /// Include superseded standards in the output (for historical analysis).
14919    #[serde(default)]
14920    pub include_superseded: bool,
14921}
14922
14923/// Configuration for audit procedure template generation.
14924#[derive(Debug, Clone, Serialize, Deserialize)]
14925pub struct AuditProcedureGenConfig {
14926    /// Whether audit procedure generation is enabled.
14927    #[serde(default)]
14928    pub enabled: bool,
14929    /// Number of procedures to generate per applicable standard.
14930    #[serde(default = "default_procedures_per_standard")]
14931    pub procedures_per_standard: usize,
14932    /// Sampling methodology: "statistical", "non_statistical", "mixed".
14933    #[serde(default = "default_sampling_method")]
14934    pub sampling_method: String,
14935    /// Confidence level for statistical sampling (0.0-1.0).
14936    #[serde(default = "default_confidence_level")]
14937    pub confidence_level: f64,
14938    /// Tolerable misstatement rate for sampling (0.0-1.0).
14939    #[serde(default = "default_tolerable_misstatement")]
14940    pub tolerable_misstatement: f64,
14941}
14942
14943fn default_procedures_per_standard() -> usize {
14944    3
14945}
14946
14947fn default_sampling_method() -> String {
14948    "statistical".to_string()
14949}
14950
14951fn default_confidence_level() -> f64 {
14952    0.95
14953}
14954
14955fn default_tolerable_misstatement() -> f64 {
14956    0.05
14957}
14958
14959impl Default for AuditProcedureGenConfig {
14960    fn default() -> Self {
14961        Self {
14962            enabled: false,
14963            procedures_per_standard: default_procedures_per_standard(),
14964            sampling_method: default_sampling_method(),
14965            confidence_level: default_confidence_level(),
14966            tolerable_misstatement: default_tolerable_misstatement(),
14967        }
14968    }
14969}
14970
14971/// Configuration for compliance finding generation.
14972#[derive(Debug, Clone, Serialize, Deserialize)]
14973pub struct ComplianceFindingGenConfig {
14974    /// Whether finding generation is enabled.
14975    #[serde(default)]
14976    pub enabled: bool,
14977    /// Rate of findings per audit procedure (0.0-1.0).
14978    #[serde(default = "default_finding_rate")]
14979    pub finding_rate: f64,
14980    /// Rate of material weakness findings among all findings (0.0-1.0).
14981    #[serde(default = "default_cr_material_weakness_rate")]
14982    pub material_weakness_rate: f64,
14983    /// Rate of significant deficiency findings among all findings (0.0-1.0).
14984    #[serde(default = "default_cr_significant_deficiency_rate")]
14985    pub significant_deficiency_rate: f64,
14986    /// Whether to generate remediation plans for findings.
14987    #[serde(default = "default_true")]
14988    pub generate_remediation: bool,
14989}
14990
14991fn default_finding_rate() -> f64 {
14992    0.05
14993}
14994
14995fn default_cr_material_weakness_rate() -> f64 {
14996    0.02
14997}
14998
14999fn default_cr_significant_deficiency_rate() -> f64 {
15000    0.08
15001}
15002
15003impl Default for ComplianceFindingGenConfig {
15004    fn default() -> Self {
15005        Self {
15006            enabled: false,
15007            finding_rate: default_finding_rate(),
15008            material_weakness_rate: default_cr_material_weakness_rate(),
15009            significant_deficiency_rate: default_cr_significant_deficiency_rate(),
15010            generate_remediation: true,
15011        }
15012    }
15013}
15014
15015/// Configuration for regulatory filing generation.
15016#[derive(Debug, Clone, Serialize, Deserialize)]
15017pub struct ComplianceFilingGenConfig {
15018    /// Whether filing generation is enabled.
15019    #[serde(default)]
15020    pub enabled: bool,
15021    /// Filing types to include (e.g., ["10-K", "10-Q", "Jahresabschluss"]).
15022    /// Empty = all applicable filings for the selected jurisdictions.
15023    #[serde(default)]
15024    pub filing_types: Vec<String>,
15025    /// Generate filing status progression (draft → filed → accepted).
15026    #[serde(default = "default_true")]
15027    pub generate_status_progression: bool,
15028}
15029
15030impl Default for ComplianceFilingGenConfig {
15031    fn default() -> Self {
15032        Self {
15033            enabled: false,
15034            filing_types: Vec::new(),
15035            generate_status_progression: true,
15036        }
15037    }
15038}
15039
15040/// Configuration for compliance graph integration.
15041#[derive(Debug, Clone, Serialize, Deserialize)]
15042pub struct ComplianceGraphConfig {
15043    /// Whether compliance graph integration is enabled.
15044    #[serde(default)]
15045    pub enabled: bool,
15046    /// Include compliance nodes (Standard, Regulation, Jurisdiction, etc.).
15047    #[serde(default = "default_true")]
15048    pub include_compliance_nodes: bool,
15049    /// Include compliance edges (MapsToStandard, TestsControl, etc.).
15050    #[serde(default = "default_true")]
15051    pub include_compliance_edges: bool,
15052    /// Include cross-reference edges between standards.
15053    #[serde(default = "default_true")]
15054    pub include_cross_references: bool,
15055    /// Include temporal supersession edges.
15056    #[serde(default)]
15057    pub include_supersession_edges: bool,
15058    /// Include edges linking standards to the GL account types they govern.
15059    #[serde(default = "default_true")]
15060    pub include_account_links: bool,
15061    /// Include edges linking standards to the internal controls that implement them.
15062    #[serde(default = "default_true")]
15063    pub include_control_links: bool,
15064    /// Include edges linking filings and jurisdictions to the originating company.
15065    #[serde(default = "default_true")]
15066    pub include_company_links: bool,
15067}
15068
15069impl Default for ComplianceGraphConfig {
15070    fn default() -> Self {
15071        Self {
15072            enabled: false,
15073            include_compliance_nodes: true,
15074            include_compliance_edges: true,
15075            include_cross_references: true,
15076            include_supersession_edges: false,
15077            include_account_links: true,
15078            include_control_links: true,
15079            include_company_links: true,
15080        }
15081    }
15082}
15083
15084/// Output settings for compliance-specific data files.
15085#[derive(Debug, Clone, Serialize, Deserialize)]
15086pub struct ComplianceOutputConfig {
15087    /// Export the standards registry catalog.
15088    #[serde(default = "default_true")]
15089    pub export_registry: bool,
15090    /// Export jurisdiction profiles.
15091    #[serde(default = "default_true")]
15092    pub export_jurisdictions: bool,
15093    /// Export cross-reference map.
15094    #[serde(default = "default_true")]
15095    pub export_cross_references: bool,
15096    /// Export temporal version history.
15097    #[serde(default)]
15098    pub export_version_history: bool,
15099}
15100
15101impl Default for ComplianceOutputConfig {
15102    fn default() -> Self {
15103        Self {
15104            export_registry: true,
15105            export_jurisdictions: true,
15106            export_cross_references: true,
15107            export_version_history: false,
15108        }
15109    }
15110}
15111
15112#[cfg(test)]
15113mod tests {
15114    use super::*;
15115    use crate::presets::demo_preset;
15116
15117    // ==========================================================================
15118    // External-expectation config (ISA-520 substantive-analytics layer, Phase 2)
15119    // ==========================================================================
15120
15121    #[test]
15122    fn external_expectations_config_yaml_roundtrip() {
15123        // snake_case and camelCase aliases both parse; the driver enum parses from snake_case.
15124        let yaml = "enabled: true\n\
15125                    driver: market_index\n\
15126                    tolerance_pct: 0.08\n\
15127                    forecastNoise: 0.03\n\
15128                    growth_rate: 0.04\n\
15129                    minMaterialityShare: 0.01\n";
15130        let c: ExternalExpectationsConfig = serde_yaml::from_str(yaml).unwrap();
15131        assert!(c.enabled);
15132        assert_eq!(c.driver, ExpectationDriver::MarketIndex);
15133        assert_eq!(c.tolerance_pct, 0.08);
15134        assert_eq!(c.forecast_noise, 0.03);
15135        assert_eq!(c.growth_rate, 0.04);
15136        assert_eq!(c.min_materiality_share, 0.01);
15137    }
15138
15139    #[test]
15140    fn evidence_anchors_config_yaml_roundtrip_and_defaults() {
15141        let yaml = "enabled: true\n\
15142                    minMaterialityShare: 0.01\n\
15143                    corroboration_rate: 0.95\n\
15144                    fabricationEvadeRate: 0.05\n";
15145        let c: EvidenceAnchorsConfig = serde_yaml::from_str(yaml).unwrap();
15146        assert!(c.enabled);
15147        assert_eq!(c.min_materiality_share, 0.01);
15148        assert_eq!(c.corroboration_rate, 0.95);
15149        assert_eq!(c.fabrication_evade_rate, 0.05);
15150
15151        let d = EvidenceAnchorsConfig::default();
15152        assert!(!d.enabled, "default off → byte-identical output");
15153        assert_eq!(d.corroboration_rate, 0.92);
15154        assert_eq!(d.fabrication_evade_rate, 0.10);
15155
15156        let fr: FinancialReportingConfig =
15157            serde_yaml::from_str("enabled: true\nevidence_anchors:\n  enabled: true\n").unwrap();
15158        assert!(fr.evidence_anchors.enabled);
15159        let fr_default: FinancialReportingConfig = serde_yaml::from_str("enabled: true\n").unwrap();
15160        assert!(!fr_default.evidence_anchors.enabled);
15161    }
15162
15163    #[test]
15164    fn external_expectations_defaults_and_nesting() {
15165        let d = ExternalExpectationsConfig::default();
15166        assert!(!d.enabled, "default off → byte-identical output");
15167        assert_eq!(d.driver, ExpectationDriver::PriorYear);
15168        assert_eq!(d.tolerance_pct, 0.10);
15169        assert_eq!(d.min_materiality_share, 0.005);
15170
15171        // nests under financial_reporting and defaults when omitted
15172        let fr: FinancialReportingConfig = serde_yaml::from_str(
15173            "enabled: true\nexternal_expectations:\n  enabled: true\n  driver: budget\n",
15174        )
15175        .unwrap();
15176        assert!(fr.external_expectations.enabled);
15177        assert_eq!(fr.external_expectations.driver, ExpectationDriver::Budget);
15178        let fr_default: FinancialReportingConfig = serde_yaml::from_str("enabled: true\n").unwrap();
15179        assert!(!fr_default.external_expectations.enabled);
15180    }
15181
15182    // ==========================================================================
15183    // Fraud behavioral-bias config (exposed in v5.x — was hardcoded)
15184    // ==========================================================================
15185
15186    #[test]
15187    fn fraud_bias_defaults_match_core_engine() {
15188        // The YAML-facing defaults must equal the engine's historical hardcoded values, so
15189        // configs that don't set `fraud.bias` produce byte-identical output.
15190        let c = FraudBiasConfig::default();
15191        assert!(c.enabled);
15192        assert_eq!(c.weekend_bias, 0.30);
15193        assert_eq!(c.round_dollar_bias, 0.40);
15194        assert_eq!(c.off_hours_bias, 0.35);
15195        assert_eq!(c.post_close_bias, 0.25);
15196        let core = c.to_core();
15197        assert!(core.enabled);
15198        assert_eq!(core.weekend_bias, 0.30);
15199        assert_eq!(core.round_dollar_bias, 0.40);
15200        assert_eq!(core.off_hours_bias, 0.35);
15201        assert_eq!(core.post_close_bias, 0.25);
15202        // A FraudConfig with no bias key carries the defaults.
15203        assert_eq!(FraudConfig::default().bias.weekend_bias, 0.30);
15204    }
15205
15206    #[test]
15207    fn fraud_bias_deserializes_overrides_and_maps_to_core() {
15208        // The adversary's subtlety lever: lower the detectable signatures via YAML.
15209        let yaml = r#"
15210enabled: true
15211fraud_rate: 0.04
15212bias:
15213  weekend_bias: 0.0
15214  round_dollar_bias: 0.05
15215  off_hours_bias: 0.0
15216  post_close_bias: 0.10
15217"#;
15218        let fc: FraudConfig = serde_yaml::from_str(yaml).expect("fraud config with bias overrides");
15219        assert_eq!(fc.bias.weekend_bias, 0.0);
15220        assert_eq!(fc.bias.round_dollar_bias, 0.05);
15221        assert_eq!(fc.bias.off_hours_bias, 0.0);
15222        assert_eq!(fc.bias.post_close_bias, 0.10);
15223        assert!(fc.bias.enabled, "enabled defaults to true when omitted");
15224        // camelCase aliases also parse.
15225        let fc2: FraudConfig =
15226            serde_yaml::from_str("bias:\n  weekendBias: 0.5\n  roundDollarBias: 0.5\n").unwrap();
15227        assert_eq!(fc2.bias.weekend_bias, 0.5);
15228        assert_eq!(fc2.bias.round_dollar_bias, 0.5);
15229        // unset fields fall back to defaults
15230        assert_eq!(fc2.bias.off_hours_bias, 0.35);
15231        // mapping to the core engine config is field-for-field
15232        let core = fc.bias.to_core();
15233        assert_eq!(core.weekend_bias, 0.0);
15234        assert_eq!(core.post_close_bias, 0.10);
15235    }
15236
15237    #[test]
15238    fn fraud_config_without_bias_key_uses_defaults() {
15239        // Back-compat: a pre-existing fraud config (no `bias:` key) deserializes with default bias.
15240        let fc: FraudConfig = serde_yaml::from_str("enabled: true\nfraud_rate: 0.02\n").unwrap();
15241        assert_eq!(fc.bias.weekend_bias, 0.30);
15242        assert!(fc.bias.enabled);
15243    }
15244
15245    #[test]
15246    fn fraud_campaigns_default_off_and_back_compat() {
15247        // Default is disabled / inactive → byte-identical behaviour.
15248        let c = FraudCampaignConfig::default();
15249        assert!(!c.enabled && !c.is_active());
15250        assert_eq!(c.count, 1);
15251        assert_eq!(c.booking_leg_pool, 6);
15252        // A pre-existing fraud config (no `campaigns:` key) deserializes with campaigns off.
15253        let fc: FraudConfig = serde_yaml::from_str("enabled: true\nfraud_rate: 0.02\n").unwrap();
15254        assert!(!fc.campaigns.is_active());
15255    }
15256
15257    #[test]
15258    fn fraud_campaigns_deserialize_camelcase_and_is_active() {
15259        let fc: FraudConfig = serde_yaml::from_str(
15260            "enabled: true\ncampaigns:\n  enabled: true\n  count: 2\n  perPeriodCount: 3\n  bookingLegPool: 8\n  rotateEveryPeriods: 2\n  periodDays: 30\n",
15261        )
15262        .unwrap();
15263        assert!(fc.campaigns.is_active());
15264        assert_eq!(fc.campaigns.count, 2);
15265        assert_eq!(fc.campaigns.per_period_count, 3);
15266        assert_eq!(fc.campaigns.booking_leg_pool, 8);
15267        assert_eq!(fc.campaigns.rotate_every_periods, 2);
15268        // A degenerate spec (period_days 0) is not active.
15269        let degenerate = FraudCampaignConfig {
15270            enabled: true,
15271            period_days: 0,
15272            ..FraudCampaignConfig::default()
15273        };
15274        assert!(!degenerate.is_active());
15275    }
15276
15277    #[test]
15278    fn fraud_difficulty_resolves_to_bias_presets() {
15279        // Standard (default) returns the explicit bias unchanged → byte-identical.
15280        let mut fc = FraudConfig::default();
15281        assert_eq!(fc.difficulty, FraudDifficulty::Standard);
15282        assert_eq!(fc.effective_bias().weekend_bias, fc.bias.weekend_bias);
15283        assert!(fc.effective_bias().enabled);
15284        // Adversarial disables bias entirely (residual-faint, §44).
15285        fc.difficulty = FraudDifficulty::Adversarial;
15286        assert!(!fc.effective_bias().enabled);
15287        // Forensic is louder than Subtle on every signature; both stay enabled.
15288        fc.difficulty = FraudDifficulty::Forensic;
15289        let forensic = fc.effective_bias();
15290        fc.difficulty = FraudDifficulty::Subtle;
15291        let subtle = fc.effective_bias();
15292        assert!(forensic.enabled && subtle.enabled);
15293        assert!(forensic.weekend_bias > subtle.weekend_bias);
15294        assert!(forensic.round_dollar_bias > subtle.round_dollar_bias);
15295        assert!(forensic.off_hours_bias > subtle.off_hours_bias);
15296        assert!(forensic.post_close_bias > subtle.post_close_bias);
15297        // snake_case YAML.
15298        let parsed: FraudConfig =
15299            serde_yaml::from_str("enabled: true\ndifficulty: adversarial\n").unwrap();
15300        assert_eq!(parsed.difficulty, FraudDifficulty::Adversarial);
15301    }
15302
15303    // ==========================================================================
15304    // Serialization/Deserialization Tests
15305    // ==========================================================================
15306
15307    #[test]
15308    fn test_config_yaml_roundtrip() {
15309        let config = demo_preset();
15310        let yaml = serde_yaml::to_string(&config).expect("Failed to serialize to YAML");
15311        let deserialized: GeneratorConfig =
15312            serde_yaml::from_str(&yaml).expect("Failed to deserialize from YAML");
15313
15314        assert_eq!(
15315            config.global.period_months,
15316            deserialized.global.period_months
15317        );
15318        assert_eq!(config.global.industry, deserialized.global.industry);
15319        assert_eq!(config.companies.len(), deserialized.companies.len());
15320        assert_eq!(config.companies[0].code, deserialized.companies[0].code);
15321    }
15322
15323    #[test]
15324    fn test_config_json_roundtrip() {
15325        // Create a config without infinity values (JSON can't serialize f64::INFINITY)
15326        let mut config = demo_preset();
15327        // Replace infinity with a large but finite value for JSON compatibility
15328        config.master_data.employees.approval_limits.executive = 1e12;
15329
15330        let json = serde_json::to_string(&config).expect("Failed to serialize to JSON");
15331        let deserialized: GeneratorConfig =
15332            serde_json::from_str(&json).expect("Failed to deserialize from JSON");
15333
15334        assert_eq!(
15335            config.global.period_months,
15336            deserialized.global.period_months
15337        );
15338        assert_eq!(config.global.industry, deserialized.global.industry);
15339        assert_eq!(config.companies.len(), deserialized.companies.len());
15340    }
15341
15342    #[test]
15343    fn test_transaction_volume_serialization() {
15344        // Test various transaction volumes serialize correctly
15345        let volumes = vec![
15346            (TransactionVolume::TenK, "ten_k"),
15347            (TransactionVolume::HundredK, "hundred_k"),
15348            (TransactionVolume::OneM, "one_m"),
15349            (TransactionVolume::TenM, "ten_m"),
15350            (TransactionVolume::HundredM, "hundred_m"),
15351        ];
15352
15353        for (volume, expected_key) in volumes {
15354            let json = serde_json::to_string(&volume).expect("Failed to serialize");
15355            assert!(
15356                json.contains(expected_key),
15357                "Expected {} in JSON: {}",
15358                expected_key,
15359                json
15360            );
15361        }
15362    }
15363
15364    #[test]
15365    fn test_transaction_volume_custom_serialization() {
15366        let volume = TransactionVolume::Custom(12345);
15367        let json = serde_json::to_string(&volume).expect("Failed to serialize");
15368        let deserialized: TransactionVolume =
15369            serde_json::from_str(&json).expect("Failed to deserialize");
15370        assert_eq!(deserialized.count(), 12345);
15371    }
15372
15373    #[test]
15374    fn test_output_mode_serialization() {
15375        let modes = vec![
15376            OutputMode::Streaming,
15377            OutputMode::FlatFile,
15378            OutputMode::Both,
15379        ];
15380
15381        for mode in modes {
15382            let json = serde_json::to_string(&mode).expect("Failed to serialize");
15383            let deserialized: OutputMode =
15384                serde_json::from_str(&json).expect("Failed to deserialize");
15385            assert!(format!("{:?}", mode) == format!("{:?}", deserialized));
15386        }
15387    }
15388
15389    #[test]
15390    fn test_file_format_serialization() {
15391        let formats = vec![
15392            FileFormat::Csv,
15393            FileFormat::Parquet,
15394            FileFormat::Json,
15395            FileFormat::JsonLines,
15396        ];
15397
15398        for format in formats {
15399            let json = serde_json::to_string(&format).expect("Failed to serialize");
15400            let deserialized: FileFormat =
15401                serde_json::from_str(&json).expect("Failed to deserialize");
15402            assert!(format!("{:?}", format) == format!("{:?}", deserialized));
15403        }
15404    }
15405
15406    #[test]
15407    fn test_compression_algorithm_serialization() {
15408        let algos = vec![
15409            CompressionAlgorithm::Gzip,
15410            CompressionAlgorithm::Zstd,
15411            CompressionAlgorithm::Lz4,
15412            CompressionAlgorithm::Snappy,
15413        ];
15414
15415        for algo in algos {
15416            let json = serde_json::to_string(&algo).expect("Failed to serialize");
15417            let deserialized: CompressionAlgorithm =
15418                serde_json::from_str(&json).expect("Failed to deserialize");
15419            assert!(format!("{:?}", algo) == format!("{:?}", deserialized));
15420        }
15421    }
15422
15423    #[test]
15424    fn test_transfer_pricing_method_serialization() {
15425        let methods = vec![
15426            TransferPricingMethod::CostPlus,
15427            TransferPricingMethod::ComparableUncontrolled,
15428            TransferPricingMethod::ResalePrice,
15429            TransferPricingMethod::TransactionalNetMargin,
15430            TransferPricingMethod::ProfitSplit,
15431        ];
15432
15433        for method in methods {
15434            let json = serde_json::to_string(&method).expect("Failed to serialize");
15435            let deserialized: TransferPricingMethod =
15436                serde_json::from_str(&json).expect("Failed to deserialize");
15437            assert!(format!("{:?}", method) == format!("{:?}", deserialized));
15438        }
15439    }
15440
15441    #[test]
15442    fn test_benford_exemption_serialization() {
15443        let exemptions = vec![
15444            BenfordExemption::Recurring,
15445            BenfordExemption::Payroll,
15446            BenfordExemption::FixedFees,
15447            BenfordExemption::RoundAmounts,
15448        ];
15449
15450        for exemption in exemptions {
15451            let json = serde_json::to_string(&exemption).expect("Failed to serialize");
15452            let deserialized: BenfordExemption =
15453                serde_json::from_str(&json).expect("Failed to deserialize");
15454            assert!(format!("{:?}", exemption) == format!("{:?}", deserialized));
15455        }
15456    }
15457
15458    // ==========================================================================
15459    // Default Value Tests
15460    // ==========================================================================
15461
15462    #[test]
15463    fn test_global_config_defaults() {
15464        let yaml = r#"
15465            industry: manufacturing
15466            start_date: "2024-01-01"
15467            period_months: 6
15468        "#;
15469        let config: GlobalConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15470        assert_eq!(config.group_currency, "USD");
15471        assert!(config.parallel);
15472        assert_eq!(config.worker_threads, 0);
15473        assert_eq!(config.memory_limit_mb, 0);
15474    }
15475
15476    #[test]
15477    fn test_fraud_config_defaults() {
15478        let config = FraudConfig::default();
15479        assert!(!config.enabled);
15480        assert_eq!(config.fraud_rate, 0.005);
15481        assert!(!config.clustering_enabled);
15482    }
15483
15484    #[test]
15485    fn test_internal_controls_config_defaults() {
15486        let config = InternalControlsConfig::default();
15487        assert!(!config.enabled);
15488        assert_eq!(config.exception_rate, 0.02);
15489        assert_eq!(config.sod_violation_rate, 0.01);
15490        assert!(config.export_control_master_data);
15491        assert_eq!(config.sox_materiality_threshold, 10000.0);
15492        // COSO fields
15493        assert!(config.coso_enabled);
15494        assert!(!config.include_entity_level_controls);
15495        assert_eq!(config.target_maturity_level, "mixed");
15496    }
15497
15498    #[test]
15499    fn test_output_config_defaults() {
15500        let config = OutputConfig::default();
15501        assert!(matches!(config.mode, OutputMode::FlatFile));
15502        assert_eq!(config.formats, vec![FileFormat::Parquet]);
15503        assert!(config.compression.enabled);
15504        assert!(matches!(
15505            config.compression.algorithm,
15506            CompressionAlgorithm::Zstd
15507        ));
15508        assert!(config.include_acdoca);
15509        assert!(!config.include_bseg);
15510        assert!(config.partition_by_period);
15511        assert!(!config.partition_by_company);
15512    }
15513
15514    #[test]
15515    fn test_approval_config_defaults() {
15516        let config = ApprovalConfig::default();
15517        assert!(!config.enabled);
15518        assert_eq!(config.auto_approve_threshold, 1000.0);
15519        assert_eq!(config.rejection_rate, 0.02);
15520        assert_eq!(config.revision_rate, 0.05);
15521        assert_eq!(config.average_approval_delay_hours, 4.0);
15522        assert_eq!(config.thresholds.len(), 4);
15523    }
15524
15525    #[test]
15526    fn test_p2p_flow_config_defaults() {
15527        let config = P2PFlowConfig::default();
15528        assert!(config.enabled);
15529        assert_eq!(config.three_way_match_rate, 0.95);
15530        assert_eq!(config.partial_delivery_rate, 0.15);
15531        assert_eq!(config.average_po_to_gr_days, 14);
15532    }
15533
15534    #[test]
15535    fn test_o2c_flow_config_defaults() {
15536        let config = O2CFlowConfig::default();
15537        assert!(config.enabled);
15538        assert_eq!(config.credit_check_failure_rate, 0.02);
15539        assert_eq!(config.return_rate, 0.03);
15540        assert_eq!(config.bad_debt_rate, 0.01);
15541    }
15542
15543    #[test]
15544    fn test_balance_config_defaults() {
15545        let config = BalanceConfig::default();
15546        assert!(!config.generate_opening_balances);
15547        assert!(config.generate_trial_balances);
15548        assert_eq!(config.target_gross_margin, 0.35);
15549        assert!(config.validate_balance_equation);
15550        assert!(config.reconcile_subledgers);
15551    }
15552
15553    // ==========================================================================
15554    // Partial Config Deserialization Tests
15555    // ==========================================================================
15556
15557    #[test]
15558    fn test_partial_config_with_defaults() {
15559        // Minimal config that should use all defaults
15560        let yaml = r#"
15561            global:
15562              industry: manufacturing
15563              start_date: "2024-01-01"
15564              period_months: 3
15565            companies:
15566              - code: "TEST"
15567                name: "Test Company"
15568                currency: "USD"
15569                country: "US"
15570                annual_transaction_volume: ten_k
15571            chart_of_accounts:
15572              complexity: small
15573            output:
15574              output_directory: "./output"
15575        "#;
15576
15577        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15578        assert_eq!(config.global.period_months, 3);
15579        assert_eq!(config.companies.len(), 1);
15580        assert!(!config.fraud.enabled); // Default
15581        assert!(!config.internal_controls.enabled); // Default
15582    }
15583
15584    #[test]
15585    fn test_config_with_fraud_enabled() {
15586        let yaml = r#"
15587            global:
15588              industry: retail
15589              start_date: "2024-01-01"
15590              period_months: 12
15591            companies:
15592              - code: "RETAIL"
15593                name: "Retail Co"
15594                currency: "USD"
15595                country: "US"
15596                annual_transaction_volume: hundred_k
15597            chart_of_accounts:
15598              complexity: medium
15599            output:
15600              output_directory: "./output"
15601            fraud:
15602              enabled: true
15603              fraud_rate: 0.05
15604              clustering_enabled: true
15605        "#;
15606
15607        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15608        assert!(config.fraud.enabled);
15609        assert_eq!(config.fraud.fraud_rate, 0.05);
15610        assert!(config.fraud.clustering_enabled);
15611    }
15612
15613    #[test]
15614    fn test_config_with_multiple_companies() {
15615        let yaml = r#"
15616            global:
15617              industry: manufacturing
15618              start_date: "2024-01-01"
15619              period_months: 6
15620            companies:
15621              - code: "HQ"
15622                name: "Headquarters"
15623                currency: "USD"
15624                country: "US"
15625                annual_transaction_volume: hundred_k
15626                volume_weight: 1.0
15627              - code: "EU"
15628                name: "European Subsidiary"
15629                currency: "EUR"
15630                country: "DE"
15631                annual_transaction_volume: hundred_k
15632                volume_weight: 0.5
15633              - code: "APAC"
15634                name: "Asia Pacific"
15635                currency: "JPY"
15636                country: "JP"
15637                annual_transaction_volume: ten_k
15638                volume_weight: 0.3
15639            chart_of_accounts:
15640              complexity: large
15641            output:
15642              output_directory: "./output"
15643        "#;
15644
15645        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15646        assert_eq!(config.companies.len(), 3);
15647        assert_eq!(config.companies[0].code, "HQ");
15648        assert_eq!(config.companies[1].currency, "EUR");
15649        assert_eq!(config.companies[2].volume_weight, 0.3);
15650    }
15651
15652    #[test]
15653    fn test_intercompany_config() {
15654        let yaml = r#"
15655            enabled: true
15656            ic_transaction_rate: 0.20
15657            transfer_pricing_method: cost_plus
15658            markup_percent: 0.08
15659            generate_matched_pairs: true
15660            generate_eliminations: true
15661        "#;
15662
15663        let config: IntercompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15664        assert!(config.enabled);
15665        assert_eq!(config.ic_transaction_rate, 0.20);
15666        assert!(matches!(
15667            config.transfer_pricing_method,
15668            TransferPricingMethod::CostPlus
15669        ));
15670        assert_eq!(config.markup_percent, 0.08);
15671        assert!(config.generate_eliminations);
15672    }
15673
15674    // ==========================================================================
15675    // Company Config Tests
15676    // ==========================================================================
15677
15678    #[test]
15679    fn test_company_config_defaults() {
15680        let yaml = r#"
15681            code: "TEST"
15682            name: "Test Company"
15683            currency: "USD"
15684            country: "US"
15685            annual_transaction_volume: ten_k
15686        "#;
15687
15688        let config: CompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15689        assert_eq!(config.fiscal_year_variant, "K4"); // Default
15690        assert_eq!(config.volume_weight, 1.0); // Default
15691    }
15692
15693    // ==========================================================================
15694    // Chart of Accounts Config Tests
15695    // ==========================================================================
15696
15697    #[test]
15698    fn test_coa_config_defaults() {
15699        let yaml = r#"
15700            complexity: medium
15701        "#;
15702
15703        let config: ChartOfAccountsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15704        assert!(config.industry_specific); // Default true
15705        assert!(config.custom_accounts.is_none());
15706        assert_eq!(config.min_hierarchy_depth, 2); // Default
15707        assert_eq!(config.max_hierarchy_depth, 5); // Default
15708    }
15709
15710    // ==========================================================================
15711    // Accounting Standards Config Tests
15712    // ==========================================================================
15713
15714    #[test]
15715    fn test_accounting_standards_config_defaults() {
15716        let config = AccountingStandardsConfig::default();
15717        assert!(!config.enabled);
15718        assert!(config.framework.is_none());
15719        assert!(!config.revenue_recognition.enabled);
15720        assert!(!config.leases.enabled);
15721        assert!(!config.fair_value.enabled);
15722        assert!(!config.impairment.enabled);
15723        assert!(!config.generate_differences);
15724    }
15725
15726    #[test]
15727    fn test_accounting_standards_config_yaml() {
15728        let yaml = r#"
15729            enabled: true
15730            framework: ifrs
15731            revenue_recognition:
15732              enabled: true
15733              generate_contracts: true
15734              avg_obligations_per_contract: 2.5
15735              variable_consideration_rate: 0.20
15736              over_time_recognition_rate: 0.35
15737              contract_count: 150
15738            leases:
15739              enabled: true
15740              lease_count: 75
15741              finance_lease_percent: 0.25
15742              avg_lease_term_months: 48
15743            generate_differences: true
15744        "#;
15745
15746        let config: AccountingStandardsConfig =
15747            serde_yaml::from_str(yaml).expect("Failed to parse");
15748        assert!(config.enabled);
15749        assert!(matches!(
15750            config.framework,
15751            Some(AccountingFrameworkConfig::Ifrs)
15752        ));
15753        assert!(config.revenue_recognition.enabled);
15754        assert_eq!(config.revenue_recognition.contract_count, 150);
15755        assert_eq!(config.revenue_recognition.avg_obligations_per_contract, 2.5);
15756        assert!(config.leases.enabled);
15757        assert_eq!(config.leases.lease_count, 75);
15758        assert_eq!(config.leases.finance_lease_percent, 0.25);
15759        assert!(config.generate_differences);
15760    }
15761
15762    #[test]
15763    fn test_accounting_framework_serialization() {
15764        let frameworks = [
15765            AccountingFrameworkConfig::UsGaap,
15766            AccountingFrameworkConfig::Ifrs,
15767            AccountingFrameworkConfig::DualReporting,
15768            AccountingFrameworkConfig::FrenchGaap,
15769            AccountingFrameworkConfig::GermanGaap,
15770        ];
15771
15772        for framework in frameworks {
15773            let json = serde_json::to_string(&framework).expect("Failed to serialize");
15774            let deserialized: AccountingFrameworkConfig =
15775                serde_json::from_str(&json).expect("Failed to deserialize");
15776            assert!(format!("{:?}", framework) == format!("{:?}", deserialized));
15777        }
15778    }
15779
15780    #[test]
15781    fn test_revenue_recognition_config_defaults() {
15782        let config = RevenueRecognitionConfig::default();
15783        assert!(!config.enabled);
15784        assert!(config.generate_contracts);
15785        assert_eq!(config.avg_obligations_per_contract, 2.0);
15786        assert_eq!(config.variable_consideration_rate, 0.15);
15787        assert_eq!(config.over_time_recognition_rate, 0.30);
15788        assert_eq!(config.contract_count, 100);
15789    }
15790
15791    #[test]
15792    fn test_lease_accounting_config_defaults() {
15793        let config = LeaseAccountingConfig::default();
15794        assert!(!config.enabled);
15795        assert_eq!(config.lease_count, 50);
15796        assert_eq!(config.finance_lease_percent, 0.30);
15797        assert_eq!(config.avg_lease_term_months, 60);
15798        assert!(config.generate_amortization);
15799        assert_eq!(config.real_estate_percent, 0.40);
15800    }
15801
15802    #[test]
15803    fn test_fair_value_config_defaults() {
15804        let config = FairValueConfig::default();
15805        assert!(!config.enabled);
15806        assert_eq!(config.measurement_count, 25);
15807        assert_eq!(config.level1_percent, 0.40);
15808        assert_eq!(config.level2_percent, 0.35);
15809        assert_eq!(config.level3_percent, 0.25);
15810        assert!(!config.include_sensitivity_analysis);
15811    }
15812
15813    #[test]
15814    fn test_impairment_config_defaults() {
15815        let config = ImpairmentConfig::default();
15816        assert!(!config.enabled);
15817        assert_eq!(config.test_count, 15);
15818        assert_eq!(config.impairment_rate, 0.10);
15819        assert!(config.generate_projections);
15820        assert!(!config.include_goodwill);
15821    }
15822
15823    // ==========================================================================
15824    // Audit Standards Config Tests
15825    // ==========================================================================
15826
15827    #[test]
15828    fn test_audit_standards_config_defaults() {
15829        let config = AuditStandardsConfig::default();
15830        assert!(!config.enabled);
15831        assert!(!config.isa_compliance.enabled);
15832        assert!(!config.analytical_procedures.enabled);
15833        assert!(!config.confirmations.enabled);
15834        assert!(!config.opinion.enabled);
15835        assert!(!config.generate_audit_trail);
15836        assert!(!config.sox.enabled);
15837        assert!(!config.pcaob.enabled);
15838    }
15839
15840    #[test]
15841    fn test_audit_standards_config_yaml() {
15842        let yaml = r#"
15843            enabled: true
15844            isa_compliance:
15845              enabled: true
15846              compliance_level: comprehensive
15847              generate_isa_mappings: true
15848              include_pcaob: true
15849              framework: dual
15850            analytical_procedures:
15851              enabled: true
15852              procedures_per_account: 5
15853              variance_probability: 0.25
15854            confirmations:
15855              enabled: true
15856              confirmation_count: 75
15857              positive_response_rate: 0.90
15858              exception_rate: 0.08
15859            opinion:
15860              enabled: true
15861              generate_kam: true
15862              average_kam_count: 4
15863            sox:
15864              enabled: true
15865              generate_302_certifications: true
15866              generate_404_assessments: true
15867              material_weakness_rate: 0.03
15868            pcaob:
15869              enabled: true
15870              is_pcaob_audit: true
15871              include_icfr_opinion: true
15872            generate_audit_trail: true
15873        "#;
15874
15875        let config: AuditStandardsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15876        assert!(config.enabled);
15877        assert!(config.isa_compliance.enabled);
15878        assert_eq!(config.isa_compliance.compliance_level, "comprehensive");
15879        assert!(config.isa_compliance.include_pcaob);
15880        assert_eq!(config.isa_compliance.framework, "dual");
15881        assert!(config.analytical_procedures.enabled);
15882        assert_eq!(config.analytical_procedures.procedures_per_account, 5);
15883        assert!(config.confirmations.enabled);
15884        assert_eq!(config.confirmations.confirmation_count, 75);
15885        assert!(config.opinion.enabled);
15886        assert_eq!(config.opinion.average_kam_count, 4);
15887        assert!(config.sox.enabled);
15888        assert!(config.sox.generate_302_certifications);
15889        assert_eq!(config.sox.material_weakness_rate, 0.03);
15890        assert!(config.pcaob.enabled);
15891        assert!(config.pcaob.is_pcaob_audit);
15892        assert!(config.pcaob.include_icfr_opinion);
15893        assert!(config.generate_audit_trail);
15894    }
15895
15896    #[test]
15897    fn test_isa_compliance_config_defaults() {
15898        let config = IsaComplianceConfig::default();
15899        assert!(!config.enabled);
15900        assert_eq!(config.compliance_level, "standard");
15901        assert!(config.generate_isa_mappings);
15902        assert!(config.generate_coverage_summary);
15903        assert!(!config.include_pcaob);
15904        assert_eq!(config.framework, "isa");
15905    }
15906
15907    #[test]
15908    fn test_sox_compliance_config_defaults() {
15909        let config = SoxComplianceConfig::default();
15910        assert!(!config.enabled);
15911        assert!(config.generate_302_certifications);
15912        assert!(config.generate_404_assessments);
15913        assert_eq!(config.materiality_threshold, 10000.0);
15914        assert_eq!(config.material_weakness_rate, 0.02);
15915        assert_eq!(config.significant_deficiency_rate, 0.08);
15916    }
15917
15918    #[test]
15919    fn test_pcaob_config_defaults() {
15920        let config = PcaobConfig::default();
15921        assert!(!config.enabled);
15922        assert!(!config.is_pcaob_audit);
15923        assert!(config.generate_cam);
15924        assert!(!config.include_icfr_opinion);
15925        assert!(!config.generate_standard_mappings);
15926    }
15927
15928    #[test]
15929    fn test_config_with_standards_enabled() {
15930        let yaml = r#"
15931            global:
15932              industry: financial_services
15933              start_date: "2024-01-01"
15934              period_months: 12
15935            companies:
15936              - code: "BANK"
15937                name: "Test Bank"
15938                currency: "USD"
15939                country: "US"
15940                annual_transaction_volume: hundred_k
15941            chart_of_accounts:
15942              complexity: large
15943            output:
15944              output_directory: "./output"
15945            accounting_standards:
15946              enabled: true
15947              framework: us_gaap
15948              revenue_recognition:
15949                enabled: true
15950              leases:
15951                enabled: true
15952            audit_standards:
15953              enabled: true
15954              isa_compliance:
15955                enabled: true
15956              sox:
15957                enabled: true
15958        "#;
15959
15960        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15961        assert!(config.accounting_standards.enabled);
15962        assert!(matches!(
15963            config.accounting_standards.framework,
15964            Some(AccountingFrameworkConfig::UsGaap)
15965        ));
15966        assert!(config.accounting_standards.revenue_recognition.enabled);
15967        assert!(config.accounting_standards.leases.enabled);
15968        assert!(config.audit_standards.enabled);
15969        assert!(config.audit_standards.isa_compliance.enabled);
15970        assert!(config.audit_standards.sox.enabled);
15971    }
15972
15973    // ==========================================================================
15974    // Industry-Specific Config Tests
15975    // ==========================================================================
15976
15977    #[test]
15978    fn test_industry_specific_config_defaults() {
15979        let config = IndustrySpecificConfig::default();
15980        assert!(!config.enabled);
15981        assert!(!config.manufacturing.enabled);
15982        assert!(!config.retail.enabled);
15983        assert!(!config.healthcare.enabled);
15984        assert!(!config.technology.enabled);
15985        assert!(!config.financial_services.enabled);
15986        assert!(!config.professional_services.enabled);
15987    }
15988
15989    #[test]
15990    fn test_manufacturing_config_defaults() {
15991        let config = ManufacturingConfig::default();
15992        assert!(!config.enabled);
15993        assert_eq!(config.bom_depth, 4);
15994        assert!(!config.just_in_time);
15995        assert_eq!(config.supplier_tiers, 2);
15996        assert_eq!(config.target_yield_rate, 0.97);
15997        assert_eq!(config.scrap_alert_threshold, 0.03);
15998    }
15999
16000    #[test]
16001    fn test_retail_config_defaults() {
16002        let config = RetailConfig::default();
16003        assert!(!config.enabled);
16004        assert_eq!(config.avg_daily_transactions, 500);
16005        assert!(config.loss_prevention);
16006        assert_eq!(config.shrinkage_rate, 0.015);
16007    }
16008
16009    #[test]
16010    fn test_healthcare_config_defaults() {
16011        let config = HealthcareConfig::default();
16012        assert!(!config.enabled);
16013        assert_eq!(config.facility_type, "hospital");
16014        assert_eq!(config.avg_daily_encounters, 150);
16015        assert!(config.compliance.hipaa);
16016        assert!(config.compliance.stark_law);
16017        assert!(config.coding_systems.icd10);
16018        assert!(config.coding_systems.cpt);
16019    }
16020
16021    #[test]
16022    fn test_technology_config_defaults() {
16023        let config = TechnologyConfig::default();
16024        assert!(!config.enabled);
16025        assert_eq!(config.revenue_model, "saas");
16026        assert_eq!(config.subscription_revenue_pct, 0.60);
16027        assert!(config.rd_capitalization.enabled);
16028    }
16029
16030    #[test]
16031    fn test_config_with_industry_specific() {
16032        let yaml = r#"
16033            global:
16034              industry: healthcare
16035              start_date: "2024-01-01"
16036              period_months: 12
16037            companies:
16038              - code: "HOSP"
16039                name: "Test Hospital"
16040                currency: "USD"
16041                country: "US"
16042                annual_transaction_volume: hundred_k
16043            chart_of_accounts:
16044              complexity: medium
16045            output:
16046              output_directory: "./output"
16047            industry_specific:
16048              enabled: true
16049              healthcare:
16050                enabled: true
16051                facility_type: hospital
16052                payer_mix:
16053                  medicare: 0.45
16054                  medicaid: 0.15
16055                  commercial: 0.35
16056                  self_pay: 0.05
16057                coding_systems:
16058                  icd10: true
16059                  cpt: true
16060                  drg: true
16061                compliance:
16062                  hipaa: true
16063                  stark_law: true
16064                anomaly_rates:
16065                  upcoding: 0.03
16066                  unbundling: 0.02
16067        "#;
16068
16069        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
16070        assert!(config.industry_specific.enabled);
16071        assert!(config.industry_specific.healthcare.enabled);
16072        assert_eq!(
16073            config.industry_specific.healthcare.facility_type,
16074            "hospital"
16075        );
16076        assert_eq!(config.industry_specific.healthcare.payer_mix.medicare, 0.45);
16077        assert_eq!(config.industry_specific.healthcare.payer_mix.self_pay, 0.05);
16078        assert!(config.industry_specific.healthcare.coding_systems.icd10);
16079        assert!(config.industry_specific.healthcare.compliance.hipaa);
16080        assert_eq!(
16081            config.industry_specific.healthcare.anomaly_rates.upcoding,
16082            0.03
16083        );
16084    }
16085
16086    #[test]
16087    fn test_config_with_manufacturing_specific() {
16088        let yaml = r#"
16089            global:
16090              industry: manufacturing
16091              start_date: "2024-01-01"
16092              period_months: 12
16093            companies:
16094              - code: "MFG"
16095                name: "Test Manufacturing"
16096                currency: "USD"
16097                country: "US"
16098                annual_transaction_volume: hundred_k
16099            chart_of_accounts:
16100              complexity: medium
16101            output:
16102              output_directory: "./output"
16103            industry_specific:
16104              enabled: true
16105              manufacturing:
16106                enabled: true
16107                bom_depth: 5
16108                just_in_time: true
16109                supplier_tiers: 3
16110                target_yield_rate: 0.98
16111                anomaly_rates:
16112                  yield_manipulation: 0.02
16113                  phantom_production: 0.01
16114        "#;
16115
16116        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
16117        assert!(config.industry_specific.enabled);
16118        assert!(config.industry_specific.manufacturing.enabled);
16119        assert_eq!(config.industry_specific.manufacturing.bom_depth, 5);
16120        assert!(config.industry_specific.manufacturing.just_in_time);
16121        assert_eq!(config.industry_specific.manufacturing.supplier_tiers, 3);
16122        assert_eq!(
16123            config.industry_specific.manufacturing.target_yield_rate,
16124            0.98
16125        );
16126        assert_eq!(
16127            config
16128                .industry_specific
16129                .manufacturing
16130                .anomaly_rates
16131                .yield_manipulation,
16132            0.02
16133        );
16134    }
16135
16136    // ==========================================================================
16137    // Tax Configuration Tests
16138    // ==========================================================================
16139
16140    #[test]
16141    fn test_tax_config_defaults() {
16142        let tax = TaxConfig::default();
16143        assert!(!tax.enabled);
16144        assert!(tax.jurisdictions.countries.is_empty());
16145        assert!(!tax.jurisdictions.include_subnational);
16146        assert!(!tax.vat_gst.enabled);
16147        assert!(tax.vat_gst.standard_rates.is_empty());
16148        assert!(tax.vat_gst.reduced_rates.is_empty());
16149        assert!(tax.vat_gst.exempt_categories.is_empty());
16150        assert!(tax.vat_gst.reverse_charge);
16151        assert!(!tax.sales_tax.enabled);
16152        assert!(tax.sales_tax.nexus_states.is_empty());
16153        assert!(!tax.withholding.enabled);
16154        assert!(tax.withholding.treaty_network);
16155        assert_eq!(tax.withholding.default_rate, 0.30);
16156        assert_eq!(tax.withholding.treaty_reduced_rate, 0.15);
16157        assert!(tax.provisions.enabled);
16158        assert_eq!(tax.provisions.statutory_rate, 0.21);
16159        assert!(tax.provisions.uncertain_positions);
16160        assert!(!tax.payroll_tax.enabled);
16161        assert_eq!(tax.anomaly_rate, 0.03);
16162    }
16163
16164    #[test]
16165    fn test_tax_config_from_yaml() {
16166        let yaml = r#"
16167            global:
16168              seed: 42
16169              start_date: "2024-01-01"
16170              period_months: 12
16171              industry: retail
16172            companies:
16173              - code: C001
16174                name: Test Corp
16175                currency: USD
16176                country: US
16177                annual_transaction_volume: ten_k
16178            chart_of_accounts:
16179              complexity: small
16180            output:
16181              output_directory: ./output
16182            tax:
16183              enabled: true
16184              anomaly_rate: 0.05
16185              jurisdictions:
16186                countries: ["US", "DE", "GB"]
16187                include_subnational: true
16188              vat_gst:
16189                enabled: true
16190                standard_rates:
16191                  DE: 0.19
16192                  GB: 0.20
16193                reduced_rates:
16194                  DE: 0.07
16195                  GB: 0.05
16196                exempt_categories:
16197                  - financial_services
16198                  - healthcare
16199                reverse_charge: false
16200              sales_tax:
16201                enabled: true
16202                nexus_states: ["CA", "NY", "TX"]
16203              withholding:
16204                enabled: true
16205                treaty_network: false
16206                default_rate: 0.25
16207                treaty_reduced_rate: 0.10
16208              provisions:
16209                enabled: false
16210                statutory_rate: 0.28
16211                uncertain_positions: false
16212              payroll_tax:
16213                enabled: true
16214        "#;
16215
16216        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
16217        assert!(config.tax.enabled);
16218        assert_eq!(config.tax.anomaly_rate, 0.05);
16219
16220        // Jurisdictions
16221        assert_eq!(config.tax.jurisdictions.countries.len(), 3);
16222        assert!(config
16223            .tax
16224            .jurisdictions
16225            .countries
16226            .contains(&"DE".to_string()));
16227        assert!(config.tax.jurisdictions.include_subnational);
16228
16229        // VAT/GST
16230        assert!(config.tax.vat_gst.enabled);
16231        assert_eq!(config.tax.vat_gst.standard_rates.get("DE"), Some(&0.19));
16232        assert_eq!(config.tax.vat_gst.standard_rates.get("GB"), Some(&0.20));
16233        assert_eq!(config.tax.vat_gst.reduced_rates.get("DE"), Some(&0.07));
16234        assert_eq!(config.tax.vat_gst.exempt_categories.len(), 2);
16235        assert!(!config.tax.vat_gst.reverse_charge);
16236
16237        // Sales tax
16238        assert!(config.tax.sales_tax.enabled);
16239        assert_eq!(config.tax.sales_tax.nexus_states.len(), 3);
16240        assert!(config
16241            .tax
16242            .sales_tax
16243            .nexus_states
16244            .contains(&"CA".to_string()));
16245
16246        // Withholding
16247        assert!(config.tax.withholding.enabled);
16248        assert!(!config.tax.withholding.treaty_network);
16249        assert_eq!(config.tax.withholding.default_rate, 0.25);
16250        assert_eq!(config.tax.withholding.treaty_reduced_rate, 0.10);
16251
16252        // Provisions
16253        assert!(!config.tax.provisions.enabled);
16254        assert_eq!(config.tax.provisions.statutory_rate, 0.28);
16255        assert!(!config.tax.provisions.uncertain_positions);
16256
16257        // Payroll tax
16258        assert!(config.tax.payroll_tax.enabled);
16259    }
16260
16261    #[test]
16262    fn test_generator_config_with_tax_default() {
16263        let yaml = r#"
16264            global:
16265              seed: 42
16266              start_date: "2024-01-01"
16267              period_months: 12
16268              industry: retail
16269            companies:
16270              - code: C001
16271                name: Test Corp
16272                currency: USD
16273                country: US
16274                annual_transaction_volume: ten_k
16275            chart_of_accounts:
16276              complexity: small
16277            output:
16278              output_directory: ./output
16279        "#;
16280
16281        let config: GeneratorConfig =
16282            serde_yaml::from_str(yaml).expect("Failed to parse config without tax section");
16283        // Tax should be present with defaults when not specified in YAML
16284        assert!(!config.tax.enabled);
16285        assert!(config.tax.jurisdictions.countries.is_empty());
16286        assert_eq!(config.tax.anomaly_rate, 0.03);
16287        assert!(config.tax.provisions.enabled); // provisions default to enabled=true
16288        assert_eq!(config.tax.provisions.statutory_rate, 0.21);
16289    }
16290
16291    // ==========================================================================
16292    // SessionSchemaConfig Tests
16293    // ==========================================================================
16294
16295    #[test]
16296    fn test_session_config_default_disabled() {
16297        let yaml = "{}";
16298        let config: SessionSchemaConfig =
16299            serde_yaml::from_str(yaml).expect("Failed to parse empty session config");
16300        assert!(!config.enabled);
16301        assert!(config.checkpoint_path.is_none());
16302        assert!(config.per_period_output);
16303        assert!(config.consolidated_output);
16304    }
16305
16306    #[test]
16307    fn test_config_backward_compatible_without_session() {
16308        let yaml = r#"
16309            global:
16310              seed: 42
16311              start_date: "2024-01-01"
16312              period_months: 12
16313              industry: retail
16314            companies:
16315              - code: C001
16316                name: Test Corp
16317                currency: USD
16318                country: US
16319                annual_transaction_volume: ten_k
16320            chart_of_accounts:
16321              complexity: small
16322            output:
16323              output_directory: ./output
16324        "#;
16325
16326        let config: GeneratorConfig =
16327            serde_yaml::from_str(yaml).expect("Failed to parse config without session");
16328        // Session should default to disabled
16329        assert!(!config.session.enabled);
16330        assert!(config.session.per_period_output);
16331        assert!(config.session.consolidated_output);
16332        // fiscal_year_months should be None
16333        assert!(config.global.fiscal_year_months.is_none());
16334    }
16335
16336    #[test]
16337    fn test_fiscal_year_months_parsed() {
16338        let yaml = r#"
16339            global:
16340              seed: 42
16341              start_date: "2024-01-01"
16342              period_months: 24
16343              industry: retail
16344              fiscal_year_months: 12
16345            companies:
16346              - code: C001
16347                name: Test Corp
16348                currency: USD
16349                country: US
16350                annual_transaction_volume: ten_k
16351            chart_of_accounts:
16352              complexity: small
16353            output:
16354              output_directory: ./output
16355            session:
16356              enabled: true
16357              checkpoint_path: /tmp/checkpoints
16358              per_period_output: true
16359              consolidated_output: false
16360        "#;
16361
16362        let config: GeneratorConfig =
16363            serde_yaml::from_str(yaml).expect("Failed to parse config with fiscal_year_months");
16364        assert_eq!(config.global.fiscal_year_months, Some(12));
16365        assert!(config.session.enabled);
16366        assert_eq!(
16367            config.session.checkpoint_path,
16368            Some("/tmp/checkpoints".to_string())
16369        );
16370        assert!(config.session.per_period_output);
16371        assert!(!config.session.consolidated_output);
16372    }
16373
16374    // -----------------------------------------------------------------------
16375    // SP3 — IndustryProfileField / IndustryPriorsConfig tests
16376    // -----------------------------------------------------------------------
16377
16378    #[test]
16379    fn industry_profile_legacy_string_form_parses() {
16380        // Legacy YAML: bare enum variant name.  Must round-trip without changes
16381        // to existing config files.
16382        let yaml = r#"
16383enabled: true
16384industry_profile: retail
16385"#;
16386        let cfg: AdvancedDistributionConfig =
16387            serde_yaml::from_str(yaml).expect("parse legacy industry_profile string");
16388        let profile = cfg.industry_profile.expect("Some");
16389        assert_eq!(profile.profile_type(), IndustryProfileType::Retail);
16390        assert!(profile.priors().is_none());
16391    }
16392
16393    #[test]
16394    fn industry_profile_full_form_with_priors_parses() {
16395        let yaml = r#"
16396enabled: true
16397industry_profile:
16398  name: healthcare
16399  priors:
16400    enabled: true
16401    source: bundled
16402"#;
16403        let cfg: AdvancedDistributionConfig =
16404            serde_yaml::from_str(yaml).expect("parse full industry_profile struct");
16405        let profile = cfg.industry_profile.expect("Some");
16406        assert_eq!(profile.profile_type(), IndustryProfileType::Healthcare);
16407        let priors = profile.priors().expect("priors present");
16408        assert!(priors.enabled);
16409        assert_eq!(priors.source, PriorsSource::Bundled);
16410        assert!(priors.path.is_none());
16411    }
16412
16413    #[test]
16414    fn industry_profile_full_form_without_priors_parses() {
16415        // Struct form with only `name` and no priors block.
16416        let yaml = r#"
16417enabled: true
16418industry_profile:
16419  name: manufacturing
16420"#;
16421        let cfg: AdvancedDistributionConfig =
16422            serde_yaml::from_str(yaml).expect("parse struct without priors");
16423        let profile = cfg.industry_profile.expect("Some");
16424        assert_eq!(profile.profile_type(), IndustryProfileType::Manufacturing);
16425        assert!(profile.priors().is_none());
16426    }
16427
16428    #[test]
16429    fn industry_profile_priors_file_without_path_fails_validation() {
16430        use crate::validation::validate_config;
16431
16432        // Minimal valid config plumbing.
16433        let yaml = r#"
16434global:
16435  seed: 42
16436  start_date: "2024-01-01"
16437  period_months: 1
16438  industry: retail
16439companies:
16440  - code: C001
16441    name: Test Corp
16442    currency: USD
16443    country: US
16444    annual_transaction_volume: ten_k
16445chart_of_accounts:
16446  complexity: small
16447output:
16448  output_directory: ./output
16449distributions:
16450  enabled: true
16451  industry_profile:
16452    name: retail
16453    priors:
16454      enabled: true
16455      source: file
16456"#;
16457        let cfg: GeneratorConfig = serde_yaml::from_str(yaml).expect("serde parse should succeed");
16458        let err = validate_config(&cfg).expect_err("path required when source=file");
16459        let msg = err.to_string();
16460        assert!(
16461            msg.contains("path") || msg.contains("required"),
16462            "unexpected error message: {msg}"
16463        );
16464    }
16465
16466    #[test]
16467    fn industry_profile_priors_file_with_path_passes_validation() {
16468        use crate::validation::validate_config;
16469
16470        let yaml = r#"
16471global:
16472  seed: 42
16473  start_date: "2024-01-01"
16474  period_months: 1
16475  industry: retail
16476companies:
16477  - code: C001
16478    name: Test Corp
16479    currency: USD
16480    country: US
16481    annual_transaction_volume: ten_k
16482chart_of_accounts:
16483  complexity: small
16484output:
16485  output_directory: ./output
16486distributions:
16487  enabled: true
16488  industry_profile:
16489    name: retail
16490    priors:
16491      enabled: true
16492      source: file
16493      path: /tmp/priors.json
16494"#;
16495        let cfg: GeneratorConfig = serde_yaml::from_str(yaml).expect("serde parse should succeed");
16496        validate_config(&cfg).expect("validation should pass with path supplied");
16497    }
16498}