Skip to main content

datasynth_config/
schema.rs

1//! Configuration schema for synthetic data generation.
2
3use datasynth_core::distributions::{
4    AmountDistributionConfig, DebitCreditDistributionConfig, EvenOddDistributionConfig,
5    LineItemDistributionConfig, SeasonalityConfig,
6};
7use datasynth_core::models::{CoAComplexity, IndustrySector};
8use serde::{Deserialize, Serialize};
9use std::path::PathBuf;
10
11/// Root configuration for the synthetic data generator.
12///
13/// # camelCase alias policy
14///
15/// Every multi-word field carries `#[serde(alias = "camelCaseName")]`
16/// so SDK clients that follow JSON conventions can submit configs
17/// without round-tripping through a snake_case transformer.
18///
19/// Before v4.4.1 several fields — `documentFlows`, `accountingStandards`,
20/// `complianceRegulations`, `analyticsMetadata` — had no alias, so SDK
21/// submissions silently fell through to defaults. The symptom was
22/// "enabling the 6 feature subsections together collapses the archive
23/// from 99 files to 19". Root cause: those four fields never parsed;
24/// the orchestrator produced far less data than requested, and
25/// `output.exportFormat` similarly fell through so journal_entries
26/// landed as the default Parquet/CSV rather than JSON.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct GeneratorConfig {
29    /// Global settings
30    pub global: GlobalConfig,
31    /// Company configuration
32    pub companies: Vec<CompanyConfig>,
33    /// Chart of Accounts configuration
34    #[serde(alias = "chartOfAccounts")]
35    pub chart_of_accounts: ChartOfAccountsConfig,
36    /// Transaction generation settings
37    #[serde(default)]
38    pub transactions: TransactionConfig,
39    /// Output configuration
40    pub output: OutputConfig,
41    /// Fraud simulation settings
42    #[serde(default)]
43    pub fraud: FraudConfig,
44    /// Data quality variation settings
45    #[serde(default, alias = "dataQuality")]
46    pub data_quality: DataQualitySchemaConfig,
47    /// Internal Controls System settings
48    #[serde(default, alias = "internalControls")]
49    pub internal_controls: InternalControlsConfig,
50    /// Business process mix
51    #[serde(default, alias = "businessProcesses")]
52    pub business_processes: BusinessProcessConfig,
53    /// User persona distribution
54    #[serde(default, alias = "userPersonas")]
55    pub user_personas: UserPersonaConfig,
56    /// Template configuration for realistic data
57    #[serde(default)]
58    pub templates: TemplateConfig,
59    /// Approval workflow configuration
60    #[serde(default)]
61    pub approval: ApprovalConfig,
62    /// Department structure configuration
63    #[serde(default)]
64    pub departments: DepartmentConfig,
65    /// Master data generation settings
66    #[serde(default, alias = "masterData")]
67    pub master_data: MasterDataConfig,
68    /// Document flow generation settings
69    #[serde(default, alias = "documentFlows")]
70    pub document_flows: DocumentFlowConfig,
71    /// Intercompany transaction settings
72    #[serde(default)]
73    pub intercompany: IntercompanyConfig,
74    /// Balance and trial balance settings
75    #[serde(default)]
76    pub balance: BalanceConfig,
77    /// OCPM (Object-Centric Process Mining) settings
78    #[serde(default)]
79    pub ocpm: OcpmConfig,
80    /// Audit engagement and workpaper generation settings
81    #[serde(default)]
82    pub audit: AuditGenerationConfig,
83    /// Banking KYC/AML transaction generation settings
84    #[serde(default)]
85    pub banking: datasynth_banking::BankingConfig,
86    /// Scenario configuration for metadata and tagging (Phase 1.3)
87    #[serde(default)]
88    pub scenario: ScenarioConfig,
89    /// Temporal drift configuration for simulating distribution changes over time (Phase 2.2)
90    #[serde(default)]
91    pub temporal: TemporalDriftConfig,
92    /// Graph export configuration for accounting network export
93    #[serde(default, alias = "graphExport")]
94    pub graph_export: GraphExportConfig,
95    /// Streaming output API configuration
96    #[serde(default)]
97    pub streaming: StreamingSchemaConfig,
98    /// Rate limiting configuration
99    #[serde(default, alias = "rateLimit")]
100    pub rate_limit: RateLimitSchemaConfig,
101    /// Temporal attribute generation configuration
102    #[serde(default, alias = "temporalAttributes")]
103    pub temporal_attributes: TemporalAttributeSchemaConfig,
104    /// Relationship generation configuration
105    #[serde(default)]
106    pub relationships: RelationshipSchemaConfig,
107    /// Accounting standards framework configuration (IFRS, US GAAP)
108    #[serde(default, alias = "accountingStandards")]
109    pub accounting_standards: AccountingStandardsConfig,
110    /// Audit standards framework configuration (ISA, PCAOB)
111    #[serde(default, alias = "auditStandards")]
112    pub audit_standards: AuditStandardsConfig,
113    /// Advanced distribution configuration (mixture models, correlations, regime changes)
114    #[serde(default)]
115    pub distributions: AdvancedDistributionConfig,
116    /// Temporal patterns configuration (business days, period-end dynamics, processing lags)
117    #[serde(default, alias = "temporalPatterns")]
118    pub temporal_patterns: TemporalPatternsConfig,
119    /// Vendor network configuration (multi-tier supply chain modeling)
120    #[serde(default, alias = "vendorNetwork")]
121    pub vendor_network: VendorNetworkSchemaConfig,
122    /// Customer segmentation configuration (value segments, lifecycle stages)
123    #[serde(default, alias = "customerSegmentation")]
124    pub customer_segmentation: CustomerSegmentationSchemaConfig,
125    /// Relationship strength calculation configuration
126    #[serde(default, alias = "relationshipStrength")]
127    pub relationship_strength: RelationshipStrengthSchemaConfig,
128    /// Cross-process link configuration (P2P ↔ O2C via inventory)
129    #[serde(default, alias = "crossProcessLinks")]
130    pub cross_process_links: CrossProcessLinksSchemaConfig,
131    /// Organizational events configuration (acquisitions, divestitures, etc.)
132    #[serde(default, alias = "organizationalEvents")]
133    pub organizational_events: OrganizationalEventsSchemaConfig,
134    /// Behavioral drift configuration (vendor, customer, employee behavior)
135    #[serde(default, alias = "behavioralDrift")]
136    pub behavioral_drift: BehavioralDriftSchemaConfig,
137    /// Market drift configuration (economic cycles, commodities, price shocks)
138    #[serde(default, alias = "marketDrift")]
139    pub market_drift: MarketDriftSchemaConfig,
140    /// Drift labeling configuration for ground truth generation
141    #[serde(default, alias = "driftLabeling")]
142    pub drift_labeling: DriftLabelingSchemaConfig,
143    /// Enhanced anomaly injection configuration (multi-stage schemes, correlated injection, near-miss)
144    #[serde(default, alias = "anomalyInjection")]
145    pub anomaly_injection: EnhancedAnomalyConfig,
146    /// Industry-specific transaction and anomaly generation configuration
147    #[serde(default, alias = "industrySpecific")]
148    pub industry_specific: IndustrySpecificConfig,
149    /// Fingerprint privacy configuration for extraction/synthesis
150    #[serde(default, alias = "fingerprintPrivacy")]
151    pub fingerprint_privacy: FingerprintPrivacyConfig,
152    /// Quality gate configuration for pass/fail thresholds
153    #[serde(default, alias = "qualityGates")]
154    pub quality_gates: QualityGatesSchemaConfig,
155    /// Compliance configuration (EU AI Act, content marking)
156    #[serde(default)]
157    pub compliance: ComplianceSchemaConfig,
158    /// Webhook notification configuration
159    #[serde(default)]
160    pub webhooks: WebhookSchemaConfig,
161    /// LLM enrichment configuration (AI-augmented vendor names, descriptions, explanations)
162    #[serde(default)]
163    pub llm: LlmSchemaConfig,
164    /// Diffusion model configuration (statistical diffusion-based data enhancement)
165    #[serde(default)]
166    pub diffusion: DiffusionSchemaConfig,
167    /// Causal generation configuration (structural causal models, interventions)
168    #[serde(default)]
169    pub causal: CausalSchemaConfig,
170
171    // ===== Enterprise Process Chain Extensions =====
172    /// Source-to-Pay (S2C/S2P) configuration (sourcing, contracts, catalogs, scorecards)
173    #[serde(default, alias = "sourceToPay")]
174    pub source_to_pay: SourceToPayConfig,
175    /// Financial reporting configuration (financial statements, KPIs, budgets)
176    #[serde(default, alias = "financialReporting")]
177    pub financial_reporting: FinancialReportingConfig,
178    /// HR process configuration (payroll, time & attendance, expenses)
179    #[serde(default)]
180    pub hr: HrConfig,
181    /// Manufacturing configuration (production orders, WIP, routing)
182    #[serde(default)]
183    pub manufacturing: ManufacturingProcessConfig,
184    /// Sales quote configuration (quote-to-order pipeline)
185    #[serde(default, alias = "salesQuotes")]
186    pub sales_quotes: SalesQuoteConfig,
187    /// Tax accounting configuration (VAT/GST, sales tax, withholding, provisions, payroll tax)
188    #[serde(default)]
189    pub tax: TaxConfig,
190    /// Treasury and cash management configuration
191    #[serde(default)]
192    pub treasury: TreasuryConfig,
193    /// Project accounting configuration
194    #[serde(default, alias = "projectAccounting")]
195    pub project_accounting: ProjectAccountingConfig,
196    /// ESG / Sustainability reporting configuration
197    #[serde(default)]
198    pub esg: EsgConfig,
199    /// Country pack configuration (external packs directory, per-country overrides)
200    #[serde(default, alias = "countryPacks")]
201    pub country_packs: Option<CountryPacksSchemaConfig>,
202    /// Counterfactual simulation scenario configuration
203    #[serde(default)]
204    pub scenarios: ScenariosConfig,
205    /// Generation session configuration (period-by-period generation with balance carry-forward)
206    #[serde(default)]
207    pub session: SessionSchemaConfig,
208    /// Compliance regulations framework configuration (standards registry, jurisdictions, temporal versioning, audit templates, graph integration)
209    #[serde(default, alias = "complianceRegulations")]
210    pub compliance_regulations: ComplianceRegulationsConfig,
211    /// v3.3.0: analytics metadata phase — prior-year comparatives,
212    /// industry benchmarks, management reports, drift events. Off by
213    /// default so v3.2.1 archives are byte-identical.
214    #[serde(default, alias = "analyticsMetadata")]
215    pub analytics_metadata: AnalyticsMetadataConfig,
216    /// Phase 1 of the central concentration abstraction (#143). Post-generation
217    /// passes over the JE batch that reshape distributional structure toward a
218    /// corpus-derived target. Off by default — see
219    /// `docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md`.
220    #[serde(default)]
221    pub concentration: ConcentrationConfig,
222}
223
224/// v3.3.0: analytics-metadata phase configuration.
225///
226/// Gates the `phase_analytics_metadata` pass that runs AFTER all
227/// JE-adding phases (including the fraud-bias sweep at Phase 20b).
228/// When enabled, the orchestrator calls `PriorYearGenerator`,
229/// `IndustryBenchmarkGenerator`, `ManagementReportGenerator`, and
230/// `DriftEventGenerator` in sequence; each sub-flag below controls
231/// whether that specific generator fires.
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct AnalyticsMetadataConfig {
234    /// Master switch for the whole analytics phase.
235    #[serde(default)]
236    pub enabled: bool,
237    /// Emit `PriorYearComparative` records derived from current
238    /// period's account balances.
239    #[serde(default = "default_true")]
240    pub prior_year: bool,
241    /// Emit `IndustryBenchmark` records for the configured industry.
242    #[serde(default = "default_true")]
243    pub industry_benchmark: bool,
244    /// Emit management-report artefacts.
245    #[serde(default = "default_true")]
246    pub management_reports: bool,
247    /// Emit `LabeledDriftEvent` records — post-generation sweep over
248    /// journal entries to label detected drift patterns.
249    #[serde(default = "default_true")]
250    pub drift_events: bool,
251}
252
253impl Default for AnalyticsMetadataConfig {
254    fn default() -> Self {
255        Self {
256            enabled: false,
257            prior_year: true,
258            industry_benchmark: true,
259            management_reports: true,
260            drift_events: true,
261        }
262    }
263}
264
265/// LLM enrichment configuration.
266///
267/// Controls AI-augmented metadata enrichment using LLM providers.
268/// When enabled, vendor names, transaction descriptions, and anomaly explanations
269/// are enriched using the configured provider (mock by default).
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct LlmSchemaConfig {
272    /// Whether LLM enrichment is enabled.
273    #[serde(default)]
274    pub enabled: bool,
275    /// Provider type: "mock", "openai", "anthropic", "custom".
276    #[serde(default = "default_llm_provider")]
277    pub provider: String,
278    /// Model name/ID for the provider.
279    #[serde(default = "default_llm_model_name")]
280    pub model: String,
281    /// Maximum number of vendor names to enrich per run.
282    #[serde(default = "default_llm_batch_size")]
283    pub max_vendor_enrichments: usize,
284
285    /// v4.1.1+: also enrich customer names at generate time.
286    /// Default `false` preserves v4.1.0 behaviour.
287    #[serde(default)]
288    pub enrich_customers: bool,
289
290    /// v4.1.1+: also enrich material descriptions at generate time.
291    /// Default `false`.
292    #[serde(default)]
293    pub enrich_materials: bool,
294
295    /// v4.1.1+: also enrich audit finding titles at generate time
296    /// (the finding narratives remain on their existing template path
297    /// because they're richer and locale-specific). Default `false`.
298    #[serde(default)]
299    pub enrich_findings: bool,
300
301    /// v4.1.1+: upper bound on customer enrichments per run. Matches
302    /// `max_vendor_enrichments` semantics.
303    #[serde(default = "default_llm_batch_size")]
304    pub max_customer_enrichments: usize,
305
306    /// v4.1.1+: upper bound on material enrichments per run.
307    #[serde(default = "default_llm_batch_size")]
308    pub max_material_enrichments: usize,
309
310    /// v4.1.1+: upper bound on finding enrichments per run.
311    #[serde(default = "default_llm_batch_size")]
312    pub max_finding_enrichments: usize,
313}
314
315fn default_llm_provider() -> String {
316    "mock".to_string()
317}
318
319fn default_llm_model_name() -> String {
320    "gpt-4o-mini".to_string()
321}
322
323fn default_llm_batch_size() -> usize {
324    50
325}
326
327impl Default for LlmSchemaConfig {
328    fn default() -> Self {
329        Self {
330            enabled: false,
331            provider: default_llm_provider(),
332            model: default_llm_model_name(),
333            max_vendor_enrichments: default_llm_batch_size(),
334            enrich_customers: false,
335            enrich_materials: false,
336            enrich_findings: false,
337            max_customer_enrichments: default_llm_batch_size(),
338            max_material_enrichments: default_llm_batch_size(),
339            max_finding_enrichments: default_llm_batch_size(),
340        }
341    }
342}
343
344/// Diffusion model configuration.
345///
346/// Controls statistical diffusion-based data enhancement that generates samples
347/// matching target distribution properties (means, standard deviations, correlations).
348#[derive(Debug, Clone, Serialize, Deserialize)]
349pub struct DiffusionSchemaConfig {
350    /// Whether diffusion enhancement is enabled.
351    #[serde(default)]
352    pub enabled: bool,
353    /// Number of diffusion steps (higher = better quality, slower).
354    #[serde(default = "default_diffusion_steps")]
355    pub n_steps: usize,
356    /// Noise schedule type: "linear", "cosine", "sigmoid".
357    #[serde(default = "default_diffusion_schedule")]
358    pub schedule: String,
359    /// Number of sample rows to generate for demonstration.
360    #[serde(default = "default_diffusion_sample_size")]
361    pub sample_size: usize,
362    /// Backend type: "statistical" (default), "neural", "hybrid".
363    #[serde(default = "default_diffusion_backend")]
364    pub backend: String,
365    /// Neural diffusion backend configuration (used when backend is "neural" or "hybrid").
366    #[serde(default)]
367    pub neural: NeuralDiffusionSchemaConfig,
368}
369
370fn default_diffusion_steps() -> usize {
371    100
372}
373
374fn default_diffusion_schedule() -> String {
375    "linear".to_string()
376}
377
378fn default_diffusion_sample_size() -> usize {
379    100
380}
381
382fn default_diffusion_backend() -> String {
383    "statistical".to_string()
384}
385
386impl Default for DiffusionSchemaConfig {
387    fn default() -> Self {
388        Self {
389            enabled: false,
390            n_steps: default_diffusion_steps(),
391            schedule: default_diffusion_schedule(),
392            sample_size: default_diffusion_sample_size(),
393            backend: default_diffusion_backend(),
394            neural: NeuralDiffusionSchemaConfig::default(),
395        }
396    }
397}
398
399/// Neural diffusion backend configuration.
400///
401/// Controls the `candle`-based neural score network that learns joint distributions
402/// from training data for the neural and hybrid diffusion backends.
403#[derive(Debug, Clone, Serialize, Deserialize)]
404pub struct NeuralDiffusionSchemaConfig {
405    /// Hidden layer dimensions for the score network MLP.
406    #[serde(default = "default_neural_hidden_dims")]
407    pub hidden_dims: Vec<usize>,
408    /// Dimensionality of the timestep embedding.
409    #[serde(default = "default_neural_timestep_embed_dim")]
410    pub timestep_embed_dim: usize,
411    /// Learning rate for training.
412    #[serde(default = "default_neural_learning_rate")]
413    pub learning_rate: f64,
414    /// Number of training epochs.
415    #[serde(default = "default_neural_training_epochs")]
416    pub training_epochs: usize,
417    /// Training batch size.
418    #[serde(default = "default_neural_batch_size")]
419    pub batch_size: usize,
420    /// Blend weight for hybrid mode (0.0 = all statistical, 1.0 = all neural).
421    #[serde(default = "default_neural_hybrid_weight")]
422    pub hybrid_weight: f64,
423    /// Hybrid blending strategy: "weighted_average", "column_select", "threshold".
424    #[serde(default = "default_neural_hybrid_strategy")]
425    pub hybrid_strategy: String,
426    /// Columns to apply neural generation to (empty = all numeric columns).
427    #[serde(default)]
428    pub neural_columns: Vec<String>,
429    /// v4.4.0+ Optional path to a pre-trained score-network checkpoint
430    /// (`.safetensors`). When set, the orchestrator loads the
431    /// checkpoint instead of training from the first batch — useful
432    /// for long-running production deployments where training cost
433    /// dominates per-run cost. When empty, the orchestrator trains
434    /// on the first generated JE amounts.
435    #[serde(default, skip_serializing_if = "Option::is_none")]
436    pub checkpoint_path: Option<String>,
437}
438
439fn default_neural_hidden_dims() -> Vec<usize> {
440    vec![256, 256, 128]
441}
442
443fn default_neural_timestep_embed_dim() -> usize {
444    64
445}
446
447fn default_neural_learning_rate() -> f64 {
448    0.001
449}
450
451fn default_neural_training_epochs() -> usize {
452    100
453}
454
455fn default_neural_batch_size() -> usize {
456    64
457}
458
459fn default_neural_hybrid_weight() -> f64 {
460    0.5
461}
462
463fn default_neural_hybrid_strategy() -> String {
464    "weighted_average".to_string()
465}
466
467impl Default for NeuralDiffusionSchemaConfig {
468    fn default() -> Self {
469        Self {
470            hidden_dims: default_neural_hidden_dims(),
471            timestep_embed_dim: default_neural_timestep_embed_dim(),
472            learning_rate: default_neural_learning_rate(),
473            training_epochs: default_neural_training_epochs(),
474            batch_size: default_neural_batch_size(),
475            hybrid_weight: default_neural_hybrid_weight(),
476            hybrid_strategy: default_neural_hybrid_strategy(),
477            neural_columns: Vec::new(),
478            checkpoint_path: None,
479        }
480    }
481}
482
483/// Causal generation configuration.
484///
485/// Controls structural causal model (SCM) based data generation that respects
486/// causal relationships between variables, supports do-calculus interventions,
487/// and enables counterfactual scenarios.
488#[derive(Debug, Clone, Serialize, Deserialize)]
489pub struct CausalSchemaConfig {
490    /// Whether causal generation is enabled.
491    #[serde(default)]
492    pub enabled: bool,
493    /// Built-in template to use: "fraud_detection", "revenue_cycle", or "custom".
494    #[serde(default = "default_causal_template")]
495    pub template: String,
496    /// Number of causal samples to generate.
497    #[serde(default = "default_causal_sample_size")]
498    pub sample_size: usize,
499    /// Whether to run causal validation on the output.
500    #[serde(default = "default_true")]
501    pub validate: bool,
502}
503
504fn default_causal_template() -> String {
505    "fraud_detection".to_string()
506}
507
508fn default_causal_sample_size() -> usize {
509    500
510}
511
512impl Default for CausalSchemaConfig {
513    fn default() -> Self {
514        Self {
515            enabled: false,
516            template: default_causal_template(),
517            sample_size: default_causal_sample_size(),
518            validate: true,
519        }
520    }
521}
522
523/// Graph export configuration for accounting network and ML training exports.
524///
525/// This section enables exporting generated data as graphs for:
526/// - Network reconstruction algorithms
527/// - Graph neural network training
528/// - Neo4j graph database import
529#[derive(Debug, Clone, Serialize, Deserialize)]
530pub struct GraphExportConfig {
531    /// Enable graph export.
532    #[serde(default)]
533    pub enabled: bool,
534
535    /// Graph types to generate.
536    #[serde(default = "default_graph_types")]
537    pub graph_types: Vec<GraphTypeConfig>,
538
539    /// Export formats to generate.
540    #[serde(default = "default_graph_formats")]
541    pub formats: Vec<GraphExportFormat>,
542
543    /// Train split ratio for ML datasets.
544    #[serde(default = "default_train_ratio")]
545    pub train_ratio: f64,
546
547    /// Validation split ratio for ML datasets.
548    #[serde(default = "default_val_ratio")]
549    pub validation_ratio: f64,
550
551    /// Random seed for train/val/test splits.
552    #[serde(default)]
553    pub split_seed: Option<u64>,
554
555    /// Output subdirectory for graph exports (relative to output directory).
556    #[serde(default = "default_graph_subdir")]
557    pub output_subdirectory: String,
558
559    /// Multi-layer hypergraph export settings for RustGraph integration.
560    #[serde(default)]
561    pub hypergraph: HypergraphExportSettings,
562
563    /// DGL-specific export settings.
564    #[serde(default)]
565    pub dgl: DglExportConfig,
566
567    /// `graphs/je_network.csv` flat edge-list export settings (v5.8.0+).
568    #[serde(default)]
569    pub je_network: JeNetworkConfig,
570}
571
572/// Method used to construct edges from journal entries when writing
573/// `graphs/je_network.csv` (v5.8.0+).
574///
575/// Reference: Ivertowski (2024), *Hardware Accelerated Method for
576/// Accounting Network Generation*, Methods A through E.
577#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
578#[serde(rename_all = "snake_case")]
579pub enum JeNetworkMethod {
580    /// Method B (full Cartesian product) for every JE — bijective on
581    /// 2-line entries (Method A) and `n × m` Cartesian for multi-line
582    /// entries with proportional amount allocation.  Produces
583    /// O(n × m) edges per JE — a 50-debit / 50-credit period-close
584    /// consolidation alone yields 2 500 edges, and a typical
585    /// HF-scale 1 M-line config can blow up to 200 M+ edges (and tens
586    /// of GB of memory). Use explicitly when downstream consumers
587    /// already depend on the Cartesian shape.
588    Cartesian,
589    /// Method A only — emit a single edge per 2-line journal entry
590    /// (1 debit + 1 credit) and skip multi-line entries entirely.
591    /// Edge count = number of 2-line JEs (≈ 60 % of entries per the
592    /// 2024 paper); per-edge confidence is exactly `1.0`.
593    ///
594    /// **Default since v5.27** (previously `Cartesian`). The Cartesian
595    /// default OOM'd small-complexity CLI smoke tests on 14-16 GB CI
596    /// runners — a 50 × 50 period-close JE alone wanted 20 GB of edge
597    /// memory. Method A is the bounded, exactness-preserving fallback
598    /// recommended for published reference datasets where size and
599    /// exactness matter more than recall on multi-line consolidations.
600    /// Set `je_network.method: cartesian` explicitly to restore the
601    /// pre-v5.27 behaviour.
602    #[default]
603    A,
604}
605
606/// Configuration for the `graphs/je_network.csv` flat edge-list
607/// export (v5.8.0+).
608#[derive(Debug, Clone, Default, Serialize, Deserialize)]
609#[serde(deny_unknown_fields)]
610pub struct JeNetworkConfig {
611    /// Edge-construction method (see [`JeNetworkMethod`]).
612    #[serde(default)]
613    pub method: JeNetworkMethod,
614}
615
616fn default_graph_types() -> Vec<GraphTypeConfig> {
617    vec![GraphTypeConfig::default()]
618}
619
620fn default_graph_formats() -> Vec<GraphExportFormat> {
621    vec![GraphExportFormat::PytorchGeometric]
622}
623
624fn default_train_ratio() -> f64 {
625    0.7
626}
627
628fn default_val_ratio() -> f64 {
629    0.15
630}
631
632fn default_graph_subdir() -> String {
633    "graphs".to_string()
634}
635
636impl Default for GraphExportConfig {
637    fn default() -> Self {
638        Self {
639            enabled: false,
640            graph_types: default_graph_types(),
641            formats: default_graph_formats(),
642            train_ratio: 0.7,
643            validation_ratio: 0.15,
644            split_seed: None,
645            output_subdirectory: "graphs".to_string(),
646            hypergraph: HypergraphExportSettings::default(),
647            dgl: DglExportConfig::default(),
648            je_network: JeNetworkConfig::default(),
649        }
650    }
651}
652
653/// DGL-specific export settings.
654#[derive(Debug, Clone, Default, Serialize, Deserialize)]
655pub struct DglExportConfig {
656    /// Export as a heterogeneous graph (distinct node/edge types).
657    ///
658    /// When `true` the DGL exporter produces a `HeteroData` object with typed
659    /// node and edge stores rather than a single homogeneous graph.
660    /// Set to `true` in `graph_export.dgl.heterogeneous: true` in YAML.
661    #[serde(default)]
662    pub heterogeneous: bool,
663}
664
665// Default derived: heterogeneous = false (bool default)
666
667/// Settings for the multi-layer hypergraph export (RustGraph integration).
668///
669/// Produces a 3-layer hypergraph:
670/// - Layer 1: Governance & Controls (COSO, SOX, internal controls, organizational)
671/// - Layer 2: Process Events (P2P/O2C document flows, OCPM events)
672/// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
673#[derive(Debug, Clone, Serialize, Deserialize)]
674pub struct HypergraphExportSettings {
675    /// Enable hypergraph export.
676    #[serde(default)]
677    pub enabled: bool,
678
679    /// Maximum total nodes across all layers (default 50000).
680    #[serde(default = "default_hypergraph_max_nodes")]
681    pub max_nodes: usize,
682
683    /// Aggregation strategy when node budget is exceeded.
684    #[serde(default = "default_aggregation_strategy")]
685    pub aggregation_strategy: String,
686
687    /// Layer 1 (Governance & Controls) settings.
688    #[serde(default)]
689    pub governance_layer: GovernanceLayerSettings,
690
691    /// Layer 2 (Process Events) settings.
692    #[serde(default)]
693    pub process_layer: ProcessLayerSettings,
694
695    /// Layer 3 (Accounting Network) settings.
696    #[serde(default)]
697    pub accounting_layer: AccountingLayerSettings,
698
699    /// Cross-layer edge generation settings.
700    #[serde(default)]
701    pub cross_layer: CrossLayerSettings,
702
703    /// Output subdirectory for hypergraph files (relative to graph output directory).
704    #[serde(default = "default_hypergraph_subdir")]
705    pub output_subdirectory: String,
706
707    /// Output format: "native" (default) for internal field names, "unified" for RustGraph format.
708    #[serde(default = "default_hypergraph_format")]
709    pub output_format: String,
710
711    /// Optional URL for streaming unified JSONL to a RustGraph ingest endpoint.
712    #[serde(default)]
713    pub stream_target: Option<String>,
714
715    /// Batch size for streaming (number of JSONL lines per HTTP POST). Default: 1000.
716    #[serde(default = "default_stream_batch_size")]
717    pub stream_batch_size: usize,
718}
719
720fn default_hypergraph_max_nodes() -> usize {
721    50_000
722}
723
724fn default_aggregation_strategy() -> String {
725    "pool_by_counterparty".to_string()
726}
727
728fn default_hypergraph_subdir() -> String {
729    "hypergraph".to_string()
730}
731
732fn default_hypergraph_format() -> String {
733    "native".to_string()
734}
735
736fn default_stream_batch_size() -> usize {
737    1000
738}
739
740impl Default for HypergraphExportSettings {
741    fn default() -> Self {
742        Self {
743            enabled: false,
744            max_nodes: 50_000,
745            aggregation_strategy: "pool_by_counterparty".to_string(),
746            governance_layer: GovernanceLayerSettings::default(),
747            process_layer: ProcessLayerSettings::default(),
748            accounting_layer: AccountingLayerSettings::default(),
749            cross_layer: CrossLayerSettings::default(),
750            output_subdirectory: "hypergraph".to_string(),
751            output_format: "native".to_string(),
752            stream_target: None,
753            stream_batch_size: 1000,
754        }
755    }
756}
757
758/// Layer 1: Governance & Controls layer settings.
759#[derive(Debug, Clone, Serialize, Deserialize)]
760pub struct GovernanceLayerSettings {
761    /// Include COSO framework nodes (5 components + 17 principles).
762    #[serde(default = "default_true")]
763    pub include_coso: bool,
764    /// Include internal control nodes.
765    #[serde(default = "default_true")]
766    pub include_controls: bool,
767    /// Include SOX assertion nodes.
768    #[serde(default = "default_true")]
769    pub include_sox: bool,
770    /// Include vendor master data nodes.
771    #[serde(default = "default_true")]
772    pub include_vendors: bool,
773    /// Include customer master data nodes.
774    #[serde(default = "default_true")]
775    pub include_customers: bool,
776    /// Include employee/organizational nodes.
777    #[serde(default = "default_true")]
778    pub include_employees: bool,
779}
780
781impl Default for GovernanceLayerSettings {
782    fn default() -> Self {
783        Self {
784            include_coso: true,
785            include_controls: true,
786            include_sox: true,
787            include_vendors: true,
788            include_customers: true,
789            include_employees: true,
790        }
791    }
792}
793
794/// Layer 2: Process Events layer settings.
795#[derive(Debug, Clone, Serialize, Deserialize)]
796pub struct ProcessLayerSettings {
797    /// Include P2P (Procure-to-Pay) document flow nodes.
798    #[serde(default = "default_true")]
799    pub include_p2p: bool,
800    /// Include O2C (Order-to-Cash) document flow nodes.
801    #[serde(default = "default_true")]
802    pub include_o2c: bool,
803    /// Include S2C (Source-to-Contract) document flow nodes.
804    #[serde(default = "default_true")]
805    pub include_s2c: bool,
806    /// Include H2R (Hire-to-Retire) document flow nodes.
807    #[serde(default = "default_true")]
808    pub include_h2r: bool,
809    /// Include MFG (Manufacturing) document flow nodes.
810    #[serde(default = "default_true")]
811    pub include_mfg: bool,
812    /// Include BANK (Banking) document flow nodes.
813    #[serde(default = "default_true")]
814    pub include_bank: bool,
815    /// Include AUDIT document flow nodes.
816    #[serde(default = "default_true")]
817    pub include_audit: bool,
818    /// Include R2R (Record-to-Report) document flow nodes (bank recon + period close).
819    #[serde(default = "default_true")]
820    pub include_r2r: bool,
821    /// Export OCPM events as hyperedges.
822    #[serde(default = "default_true")]
823    pub events_as_hyperedges: bool,
824    /// Threshold: if a counterparty has more documents than this, aggregate into pool nodes.
825    #[serde(default = "default_docs_per_counterparty_threshold")]
826    pub docs_per_counterparty_threshold: usize,
827}
828
829fn default_docs_per_counterparty_threshold() -> usize {
830    20
831}
832
833impl Default for ProcessLayerSettings {
834    fn default() -> Self {
835        Self {
836            include_p2p: true,
837            include_o2c: true,
838            include_s2c: true,
839            include_h2r: true,
840            include_mfg: true,
841            include_bank: true,
842            include_audit: true,
843            include_r2r: true,
844            events_as_hyperedges: true,
845            docs_per_counterparty_threshold: 20,
846        }
847    }
848}
849
850/// Layer 3: Accounting Network layer settings.
851#[derive(Debug, Clone, Serialize, Deserialize)]
852pub struct AccountingLayerSettings {
853    /// Include GL account nodes.
854    #[serde(default = "default_true")]
855    pub include_accounts: bool,
856    /// Export journal entries as hyperedges (debit+credit accounts as participants).
857    #[serde(default = "default_true")]
858    pub je_as_hyperedges: bool,
859}
860
861impl Default for AccountingLayerSettings {
862    fn default() -> Self {
863        Self {
864            include_accounts: true,
865            je_as_hyperedges: true,
866        }
867    }
868}
869
870/// Cross-layer edge generation settings.
871#[derive(Debug, Clone, Serialize, Deserialize)]
872pub struct CrossLayerSettings {
873    /// Generate cross-layer edges (Control→Account, Vendor→PO, etc.).
874    #[serde(default = "default_true")]
875    pub enabled: bool,
876}
877
878impl Default for CrossLayerSettings {
879    fn default() -> Self {
880        Self { enabled: true }
881    }
882}
883
884/// Configuration for a specific graph type to export.
885#[derive(Debug, Clone, Serialize, Deserialize)]
886pub struct GraphTypeConfig {
887    /// Name identifier for this graph configuration.
888    #[serde(default = "default_graph_name")]
889    pub name: String,
890
891    /// Whether to aggregate parallel edges between the same nodes.
892    #[serde(default)]
893    pub aggregate_edges: bool,
894
895    /// Minimum edge weight to include (filters out small transactions).
896    #[serde(default)]
897    pub min_edge_weight: f64,
898
899    /// Whether to include document nodes (creates hub-and-spoke structure).
900    #[serde(default)]
901    pub include_document_nodes: bool,
902}
903
904fn default_graph_name() -> String {
905    "accounting_network".to_string()
906}
907
908impl Default for GraphTypeConfig {
909    fn default() -> Self {
910        Self {
911            name: "accounting_network".to_string(),
912            aggregate_edges: false,
913            min_edge_weight: 0.0,
914            include_document_nodes: false,
915        }
916    }
917}
918
919/// Export format for graph data.
920#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
921#[serde(rename_all = "snake_case")]
922pub enum GraphExportFormat {
923    /// PyTorch Geometric format (.npy files + metadata.json).
924    PytorchGeometric,
925    /// Neo4j format (CSV files + Cypher import scripts).
926    Neo4j,
927    /// Deep Graph Library format.
928    Dgl,
929    /// RustGraph/RustAssureTwin JSON format.
930    RustGraph,
931    /// RustGraph multi-layer hypergraph format (nodes.jsonl + edges.jsonl + hyperedges.jsonl).
932    RustGraphHypergraph,
933}
934
935/// Scenario configuration for metadata, tagging, and ML training setup.
936///
937/// This section enables tracking the purpose and characteristics of a generation run.
938#[derive(Debug, Clone, Default, Serialize, Deserialize)]
939pub struct ScenarioConfig {
940    /// Tags for categorizing and filtering datasets.
941    /// Examples: "fraud_detection", "retail", "month_end_stress", "ml_training"
942    #[serde(default)]
943    pub tags: Vec<String>,
944
945    /// Data quality profile preset.
946    /// - "clean": Minimal data quality issues (0.1% missing, 0.05% typos)
947    /// - "noisy": Moderate issues (5% missing, 2% typos, 1% duplicates)
948    /// - "legacy": Heavy issues simulating legacy system data (10% missing, 5% typos)
949    #[serde(default)]
950    pub profile: Option<String>,
951
952    /// Human-readable description of the scenario purpose.
953    #[serde(default)]
954    pub description: Option<String>,
955
956    /// Whether this run is for ML training (enables balanced labeling).
957    #[serde(default)]
958    pub ml_training: bool,
959
960    /// Target anomaly class balance for ML training.
961    /// If set, anomalies will be injected to achieve this ratio.
962    #[serde(default)]
963    pub target_anomaly_ratio: Option<f64>,
964
965    /// Custom metadata key-value pairs.
966    #[serde(default)]
967    pub metadata: std::collections::HashMap<String, String>,
968}
969
970/// Temporal drift configuration for simulating distribution changes over time.
971///
972/// This enables generation of data that shows realistic temporal evolution,
973/// useful for training drift detection models and testing temporal robustness.
974#[derive(Debug, Clone, Serialize, Deserialize)]
975pub struct TemporalDriftConfig {
976    /// Enable temporal drift simulation.
977    #[serde(default)]
978    pub enabled: bool,
979
980    /// Amount mean drift per period (e.g., 0.02 = 2% mean shift per month).
981    /// Simulates gradual inflation or business growth.
982    #[serde(default = "default_amount_drift")]
983    pub amount_mean_drift: f64,
984
985    /// Amount variance drift per period (e.g., 0.01 = 1% variance increase per month).
986    /// Simulates increasing volatility over time.
987    #[serde(default)]
988    pub amount_variance_drift: f64,
989
990    /// Anomaly rate drift per period (e.g., 0.001 = 0.1% increase per month).
991    /// Simulates increasing fraud attempts or degrading controls.
992    #[serde(default)]
993    pub anomaly_rate_drift: f64,
994
995    /// Concept drift rate - how quickly feature distributions change (0.0-1.0).
996    /// Higher values cause more rapid distribution shifts.
997    #[serde(default = "default_concept_drift")]
998    pub concept_drift_rate: f64,
999
1000    /// Sudden drift events - probability of a sudden distribution shift in any period.
1001    #[serde(default)]
1002    pub sudden_drift_probability: f64,
1003
1004    /// Magnitude of sudden drift events when they occur (multiplier).
1005    #[serde(default = "default_sudden_drift_magnitude")]
1006    pub sudden_drift_magnitude: f64,
1007
1008    /// Seasonal drift - enable cyclic patterns that repeat annually.
1009    #[serde(default)]
1010    pub seasonal_drift: bool,
1011
1012    /// Drift start period (0 = from beginning). Use to simulate stable baseline before drift.
1013    #[serde(default)]
1014    pub drift_start_period: u32,
1015
1016    /// Drift type: "gradual", "sudden", "recurring", "mixed"
1017    #[serde(default = "default_drift_type")]
1018    pub drift_type: DriftType,
1019}
1020
1021fn default_amount_drift() -> f64 {
1022    0.02
1023}
1024
1025fn default_concept_drift() -> f64 {
1026    0.01
1027}
1028
1029fn default_sudden_drift_magnitude() -> f64 {
1030    2.0
1031}
1032
1033fn default_drift_type() -> DriftType {
1034    DriftType::Gradual
1035}
1036
1037impl Default for TemporalDriftConfig {
1038    fn default() -> Self {
1039        Self {
1040            enabled: false,
1041            amount_mean_drift: 0.02,
1042            amount_variance_drift: 0.0,
1043            anomaly_rate_drift: 0.0,
1044            concept_drift_rate: 0.01,
1045            sudden_drift_probability: 0.0,
1046            sudden_drift_magnitude: 2.0,
1047            seasonal_drift: false,
1048            drift_start_period: 0,
1049            drift_type: DriftType::Gradual,
1050        }
1051    }
1052}
1053
1054impl TemporalDriftConfig {
1055    /// Convert to core DriftConfig for use in generators.
1056    pub fn to_core_config(&self) -> datasynth_core::distributions::DriftConfig {
1057        datasynth_core::distributions::DriftConfig {
1058            enabled: self.enabled,
1059            amount_mean_drift: self.amount_mean_drift,
1060            amount_variance_drift: self.amount_variance_drift,
1061            anomaly_rate_drift: self.anomaly_rate_drift,
1062            concept_drift_rate: self.concept_drift_rate,
1063            sudden_drift_probability: self.sudden_drift_probability,
1064            sudden_drift_magnitude: self.sudden_drift_magnitude,
1065            seasonal_drift: self.seasonal_drift,
1066            drift_start_period: self.drift_start_period,
1067            drift_type: match self.drift_type {
1068                DriftType::Gradual => datasynth_core::distributions::DriftType::Gradual,
1069                DriftType::Sudden => datasynth_core::distributions::DriftType::Sudden,
1070                DriftType::Recurring => datasynth_core::distributions::DriftType::Recurring,
1071                DriftType::Mixed => datasynth_core::distributions::DriftType::Mixed,
1072            },
1073            regime_changes: Vec::new(),
1074            economic_cycle: Default::default(),
1075            parameter_drifts: Vec::new(),
1076        }
1077    }
1078}
1079
1080/// Types of temporal drift patterns.
1081#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1082#[serde(rename_all = "snake_case")]
1083pub enum DriftType {
1084    /// Gradual, continuous drift over time (like inflation).
1085    #[default]
1086    Gradual,
1087    /// Sudden, point-in-time shifts (like policy changes).
1088    Sudden,
1089    /// Recurring patterns that cycle (like seasonal variations).
1090    Recurring,
1091    /// Combination of gradual background drift with occasional sudden shifts.
1092    Mixed,
1093}
1094
1095// ============================================================================
1096// Streaming Output API Configuration (Phase 2)
1097// ============================================================================
1098
1099/// Configuration for streaming output API.
1100#[derive(Debug, Clone, Serialize, Deserialize)]
1101pub struct StreamingSchemaConfig {
1102    /// Enable streaming output.
1103    #[serde(default)]
1104    pub enabled: bool,
1105    /// Target events per second (0 = unlimited, default 0).
1106    #[serde(default)]
1107    pub events_per_second: f64,
1108    /// Token bucket burst size (default 100).
1109    #[serde(default = "default_burst_size")]
1110    pub burst_size: u32,
1111    /// Buffer size for streaming (number of items).
1112    #[serde(default = "default_buffer_size")]
1113    pub buffer_size: usize,
1114    /// Enable progress reporting.
1115    #[serde(default = "default_true")]
1116    pub enable_progress: bool,
1117    /// Progress reporting interval (number of items).
1118    #[serde(default = "default_progress_interval")]
1119    pub progress_interval: u64,
1120    /// Backpressure strategy.
1121    #[serde(default)]
1122    pub backpressure: BackpressureSchemaStrategy,
1123}
1124
1125fn default_buffer_size() -> usize {
1126    1000
1127}
1128
1129fn default_progress_interval() -> u64 {
1130    100
1131}
1132
1133impl Default for StreamingSchemaConfig {
1134    fn default() -> Self {
1135        Self {
1136            enabled: false,
1137            events_per_second: 0.0,
1138            burst_size: 100,
1139            buffer_size: 1000,
1140            enable_progress: true,
1141            progress_interval: 100,
1142            backpressure: BackpressureSchemaStrategy::Block,
1143        }
1144    }
1145}
1146
1147/// Backpressure strategy for streaming output.
1148#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1149#[serde(rename_all = "snake_case")]
1150pub enum BackpressureSchemaStrategy {
1151    /// Block until space is available in the buffer.
1152    #[default]
1153    Block,
1154    /// Drop oldest items when buffer is full.
1155    DropOldest,
1156    /// Drop newest items when buffer is full.
1157    DropNewest,
1158    /// Buffer overflow items up to a limit, then block.
1159    Buffer,
1160}
1161
1162// ============================================================================
1163// Rate Limiting Configuration (Phase 5)
1164// ============================================================================
1165
1166/// Configuration for rate limiting.
1167#[derive(Debug, Clone, Serialize, Deserialize)]
1168pub struct RateLimitSchemaConfig {
1169    /// Enable rate limiting.
1170    #[serde(default)]
1171    pub enabled: bool,
1172    /// Entities per second limit.
1173    #[serde(default = "default_entities_per_second")]
1174    pub entities_per_second: f64,
1175    /// Burst size (number of tokens in bucket).
1176    #[serde(default = "default_burst_size")]
1177    pub burst_size: u32,
1178    /// Backpressure strategy for rate limiting.
1179    #[serde(default)]
1180    pub backpressure: RateLimitBackpressureSchema,
1181}
1182
1183fn default_entities_per_second() -> f64 {
1184    1000.0
1185}
1186
1187fn default_burst_size() -> u32 {
1188    100
1189}
1190
1191impl Default for RateLimitSchemaConfig {
1192    fn default() -> Self {
1193        Self {
1194            enabled: false,
1195            entities_per_second: 1000.0,
1196            burst_size: 100,
1197            backpressure: RateLimitBackpressureSchema::Block,
1198        }
1199    }
1200}
1201
1202/// Backpressure strategy for rate limiting.
1203#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1204#[serde(rename_all = "snake_case")]
1205pub enum RateLimitBackpressureSchema {
1206    /// Block until rate allows.
1207    #[default]
1208    Block,
1209    /// Drop items that exceed rate.
1210    Drop,
1211    /// Buffer items and process when rate allows.
1212    Buffer,
1213}
1214
1215// ============================================================================
1216// Temporal Attribute Generation Configuration (Phase 3)
1217// ============================================================================
1218
1219/// Configuration for temporal attribute generation.
1220#[derive(Debug, Clone, Serialize, Deserialize)]
1221pub struct TemporalAttributeSchemaConfig {
1222    /// Enable temporal attribute generation.
1223    #[serde(default)]
1224    pub enabled: bool,
1225    /// Valid time configuration.
1226    #[serde(default)]
1227    pub valid_time: ValidTimeSchemaConfig,
1228    /// Transaction time configuration.
1229    #[serde(default)]
1230    pub transaction_time: TransactionTimeSchemaConfig,
1231    /// Generate version chains for entities.
1232    #[serde(default)]
1233    pub generate_version_chains: bool,
1234    /// Average number of versions per entity.
1235    #[serde(default = "default_avg_versions")]
1236    pub avg_versions_per_entity: f64,
1237}
1238
1239fn default_avg_versions() -> f64 {
1240    1.5
1241}
1242
1243impl Default for TemporalAttributeSchemaConfig {
1244    fn default() -> Self {
1245        Self {
1246            enabled: false,
1247            valid_time: ValidTimeSchemaConfig::default(),
1248            transaction_time: TransactionTimeSchemaConfig::default(),
1249            generate_version_chains: false,
1250            avg_versions_per_entity: 1.5,
1251        }
1252    }
1253}
1254
1255/// Configuration for valid time (business time) generation.
1256#[derive(Debug, Clone, Serialize, Deserialize)]
1257pub struct ValidTimeSchemaConfig {
1258    /// Probability that valid_to is set (entity has ended validity).
1259    #[serde(default = "default_closed_probability")]
1260    pub closed_probability: f64,
1261    /// Average validity duration in days.
1262    #[serde(default = "default_avg_validity_days")]
1263    pub avg_validity_days: u32,
1264    /// Standard deviation of validity duration in days.
1265    #[serde(default = "default_validity_stddev")]
1266    pub validity_stddev_days: u32,
1267}
1268
1269fn default_closed_probability() -> f64 {
1270    0.1
1271}
1272
1273fn default_avg_validity_days() -> u32 {
1274    365
1275}
1276
1277fn default_validity_stddev() -> u32 {
1278    90
1279}
1280
1281impl Default for ValidTimeSchemaConfig {
1282    fn default() -> Self {
1283        Self {
1284            closed_probability: 0.1,
1285            avg_validity_days: 365,
1286            validity_stddev_days: 90,
1287        }
1288    }
1289}
1290
1291/// Configuration for transaction time (system time) generation.
1292#[derive(Debug, Clone, Serialize, Deserialize)]
1293pub struct TransactionTimeSchemaConfig {
1294    /// Average recording delay in seconds (0 = immediate).
1295    #[serde(default)]
1296    pub avg_recording_delay_seconds: u32,
1297    /// Allow backdating (recording time before valid time).
1298    #[serde(default)]
1299    pub allow_backdating: bool,
1300    /// Probability of backdating if allowed.
1301    #[serde(default = "default_backdating_probability")]
1302    pub backdating_probability: f64,
1303    /// Maximum backdate days.
1304    #[serde(default = "default_max_backdate_days")]
1305    pub max_backdate_days: u32,
1306}
1307
1308fn default_backdating_probability() -> f64 {
1309    0.01
1310}
1311
1312fn default_max_backdate_days() -> u32 {
1313    30
1314}
1315
1316impl Default for TransactionTimeSchemaConfig {
1317    fn default() -> Self {
1318        Self {
1319            avg_recording_delay_seconds: 0,
1320            allow_backdating: false,
1321            backdating_probability: 0.01,
1322            max_backdate_days: 30,
1323        }
1324    }
1325}
1326
1327// ============================================================================
1328// Relationship Generation Configuration (Phase 4)
1329// ============================================================================
1330
1331/// Configuration for relationship generation.
1332#[derive(Debug, Clone, Serialize, Deserialize)]
1333pub struct RelationshipSchemaConfig {
1334    /// Relationship type definitions.
1335    #[serde(default)]
1336    pub relationship_types: Vec<RelationshipTypeSchemaConfig>,
1337    /// Allow orphan entities (entities with no relationships).
1338    #[serde(default = "default_true")]
1339    pub allow_orphans: bool,
1340    /// Probability of creating an orphan entity.
1341    #[serde(default = "default_orphan_probability")]
1342    pub orphan_probability: f64,
1343    /// Allow circular relationships.
1344    #[serde(default)]
1345    pub allow_circular: bool,
1346    /// Maximum depth for circular relationship detection.
1347    #[serde(default = "default_max_circular_depth")]
1348    pub max_circular_depth: u32,
1349}
1350
1351fn default_orphan_probability() -> f64 {
1352    0.01
1353}
1354
1355fn default_max_circular_depth() -> u32 {
1356    3
1357}
1358
1359impl Default for RelationshipSchemaConfig {
1360    fn default() -> Self {
1361        Self {
1362            relationship_types: Vec::new(),
1363            allow_orphans: true,
1364            orphan_probability: 0.01,
1365            allow_circular: false,
1366            max_circular_depth: 3,
1367        }
1368    }
1369}
1370
1371/// Configuration for a specific relationship type.
1372#[derive(Debug, Clone, Serialize, Deserialize)]
1373pub struct RelationshipTypeSchemaConfig {
1374    /// Name of the relationship type (e.g., "debits", "credits", "created").
1375    pub name: String,
1376    /// Source entity type (e.g., "journal_entry").
1377    pub source_type: String,
1378    /// Target entity type (e.g., "account").
1379    pub target_type: String,
1380    /// Cardinality rule for this relationship.
1381    #[serde(default)]
1382    pub cardinality: CardinalitySchemaRule,
1383    /// Weight for this relationship in random selection.
1384    #[serde(default = "default_relationship_weight")]
1385    pub weight: f64,
1386    /// Whether this relationship is required.
1387    #[serde(default)]
1388    pub required: bool,
1389    /// Whether this relationship is directed.
1390    #[serde(default = "default_true")]
1391    pub directed: bool,
1392}
1393
1394fn default_relationship_weight() -> f64 {
1395    1.0
1396}
1397
1398impl Default for RelationshipTypeSchemaConfig {
1399    fn default() -> Self {
1400        Self {
1401            name: String::new(),
1402            source_type: String::new(),
1403            target_type: String::new(),
1404            cardinality: CardinalitySchemaRule::default(),
1405            weight: 1.0,
1406            required: false,
1407            directed: true,
1408        }
1409    }
1410}
1411
1412/// Cardinality rule for relationships in schema config.
1413#[derive(Debug, Clone, Serialize, Deserialize)]
1414#[serde(rename_all = "snake_case")]
1415pub enum CardinalitySchemaRule {
1416    /// One source to one target.
1417    OneToOne,
1418    /// One source to many targets.
1419    OneToMany {
1420        /// Minimum number of targets.
1421        min: u32,
1422        /// Maximum number of targets.
1423        max: u32,
1424    },
1425    /// Many sources to one target.
1426    ManyToOne {
1427        /// Minimum number of sources.
1428        min: u32,
1429        /// Maximum number of sources.
1430        max: u32,
1431    },
1432    /// Many sources to many targets.
1433    ManyToMany {
1434        /// Minimum targets per source.
1435        min_per_source: u32,
1436        /// Maximum targets per source.
1437        max_per_source: u32,
1438    },
1439}
1440
1441impl Default for CardinalitySchemaRule {
1442    fn default() -> Self {
1443        Self::OneToMany { min: 1, max: 5 }
1444    }
1445}
1446
1447/// Global configuration settings.
1448#[derive(Debug, Clone, Serialize, Deserialize)]
1449pub struct GlobalConfig {
1450    /// Random seed for reproducibility
1451    pub seed: Option<u64>,
1452    /// Industry sector
1453    pub industry: IndustrySector,
1454    /// Simulation start date (YYYY-MM-DD)
1455    #[serde(alias = "startDate")]
1456    pub start_date: String,
1457    /// Simulation period in months
1458    #[serde(alias = "periodMonths")]
1459    pub period_months: u32,
1460    /// Base currency for group reporting
1461    #[serde(default = "default_currency", alias = "groupCurrency")]
1462    pub group_currency: String,
1463    /// Presentation currency for consolidated financial statements (ISO 4217).
1464    /// If not set, defaults to `group_currency`.
1465    #[serde(default, alias = "presentationCurrency")]
1466    pub presentation_currency: Option<String>,
1467    /// Enable parallel generation
1468    #[serde(default = "default_true")]
1469    pub parallel: bool,
1470    /// Number of worker threads (0 = auto-detect)
1471    #[serde(default, alias = "workerThreads")]
1472    pub worker_threads: usize,
1473    /// Memory limit in MB (0 = unlimited)
1474    #[serde(default, alias = "memoryLimitMb")]
1475    pub memory_limit_mb: usize,
1476    /// Fiscal year length in months (defaults to 12 if not set).
1477    /// Used by session-based generation to split the total period into fiscal years.
1478    #[serde(default, alias = "fiscalYearMonths")]
1479    pub fiscal_year_months: Option<u32>,
1480}
1481
1482fn default_currency() -> String {
1483    "USD".to_string()
1484}
1485fn default_true() -> bool {
1486    true
1487}
1488
1489/// Configuration for generation session behavior.
1490///
1491/// When enabled, the generation pipeline splits the total period into fiscal years
1492/// and generates data period-by-period, carrying forward balance state.
1493#[derive(Debug, Clone, Serialize, Deserialize)]
1494pub struct SessionSchemaConfig {
1495    /// Whether session-based (period-by-period) generation is enabled.
1496    #[serde(default)]
1497    pub enabled: bool,
1498    /// Optional path for saving/loading session checkpoint files.
1499    #[serde(default)]
1500    pub checkpoint_path: Option<String>,
1501    /// Whether to write output files per fiscal period (e.g., `period_01/`).
1502    #[serde(default = "default_true")]
1503    pub per_period_output: bool,
1504    /// Whether to also produce a single consolidated output across all periods.
1505    #[serde(default = "default_true")]
1506    pub consolidated_output: bool,
1507}
1508
1509impl Default for SessionSchemaConfig {
1510    fn default() -> Self {
1511        Self {
1512            enabled: false,
1513            checkpoint_path: None,
1514            per_period_output: true,
1515            consolidated_output: true,
1516        }
1517    }
1518}
1519
1520/// Company code configuration.
1521#[derive(Debug, Clone, Serialize, Deserialize)]
1522pub struct CompanyConfig {
1523    /// Company code identifier
1524    pub code: String,
1525    /// Company name
1526    pub name: String,
1527    /// Local currency (ISO 4217)
1528    pub currency: String,
1529    /// Functional currency for IAS 21 translation (ISO 4217).
1530    /// If not set, defaults to the `currency` field (i.e. local == functional).
1531    #[serde(default, alias = "functionalCurrency")]
1532    pub functional_currency: Option<String>,
1533    /// Country code (ISO 3166-1 alpha-2)
1534    pub country: String,
1535    /// Fiscal year variant
1536    #[serde(default = "default_fiscal_variant", alias = "fiscalYearVariant")]
1537    pub fiscal_year_variant: String,
1538    /// Transaction volume per year
1539    #[serde(alias = "annualTransactionVolume")]
1540    pub annual_transaction_volume: TransactionVolume,
1541    /// Company-specific transaction weight
1542    #[serde(default = "default_weight", alias = "volumeWeight")]
1543    pub volume_weight: f64,
1544}
1545
1546fn default_fiscal_variant() -> String {
1547    "K4".to_string()
1548}
1549fn default_weight() -> f64 {
1550    1.0
1551}
1552
1553/// Transaction volume presets.
1554#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1555#[serde(rename_all = "snake_case")]
1556pub enum TransactionVolume {
1557    /// 10,000 transactions per year
1558    TenK,
1559    /// 50,000 transactions per year
1560    FiftyK,
1561    /// 100,000 transactions per year
1562    HundredK,
1563    /// 1,000,000 transactions per year
1564    OneM,
1565    /// 10,000,000 transactions per year
1566    TenM,
1567    /// 100,000,000 transactions per year
1568    HundredM,
1569    /// Custom count
1570    Custom(u64),
1571}
1572
1573impl TransactionVolume {
1574    /// Get the transaction count.
1575    pub fn count(&self) -> u64 {
1576        match self {
1577            Self::TenK => 10_000,
1578            Self::FiftyK => 50_000,
1579            Self::HundredK => 100_000,
1580            Self::OneM => 1_000_000,
1581            Self::TenM => 10_000_000,
1582            Self::HundredM => 100_000_000,
1583            Self::Custom(n) => *n,
1584        }
1585    }
1586}
1587
1588/// Chart of Accounts configuration.
1589#[derive(Debug, Clone, Serialize, Deserialize)]
1590pub struct ChartOfAccountsConfig {
1591    /// CoA complexity level
1592    pub complexity: CoAComplexity,
1593    /// Use industry-specific accounts
1594    #[serde(default = "default_true")]
1595    pub industry_specific: bool,
1596    /// Custom account definitions file
1597    pub custom_accounts: Option<PathBuf>,
1598    /// Minimum hierarchy depth
1599    #[serde(default = "default_min_depth")]
1600    pub min_hierarchy_depth: u8,
1601    /// Maximum hierarchy depth
1602    #[serde(default = "default_max_depth")]
1603    pub max_hierarchy_depth: u8,
1604    /// **v5.7.0** — expand canonical accounts into industry-specific
1605    /// 6-digit sub-accounts using the embedded
1606    /// [`datasynth_core::industry_packs`] (manufacturing, retail,
1607    /// financial_services, healthcare, technology). When `true`:
1608    ///
1609    /// - Each canonical 4-digit account that has an expansion in the
1610    ///   pack becomes a non-postable control account (`is_postable =
1611    ///   false`).
1612    /// - 2–6 6-digit sub-accounts are added per parent, with
1613    ///   suffix-driven names (`"Product Revenue — Steel Products"`),
1614    ///   industry-realistic gaps, and inherited ISO 21378 codes.
1615    /// - Generators that currently target canonical accounts via
1616    ///   constants will pick a sub-account deterministically per
1617    ///   `document_id` (preserving seed-based reproducibility).
1618    ///
1619    /// Default: `false` (preserves v5.6.0 behaviour exactly — same
1620    /// account count, same numbering, same goldens).
1621    #[serde(default, alias = "expandIndustrySubaccounts")]
1622    pub expand_industry_subaccounts: bool,
1623}
1624
1625fn default_min_depth() -> u8 {
1626    2
1627}
1628fn default_max_depth() -> u8 {
1629    5
1630}
1631
1632impl Default for ChartOfAccountsConfig {
1633    fn default() -> Self {
1634        Self {
1635            complexity: CoAComplexity::Small,
1636            industry_specific: true,
1637            custom_accounts: None,
1638            min_hierarchy_depth: default_min_depth(),
1639            max_hierarchy_depth: default_max_depth(),
1640            expand_industry_subaccounts: false,
1641        }
1642    }
1643}
1644
1645/// Transaction generation configuration.
1646#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1647pub struct TransactionConfig {
1648    /// Line item distribution
1649    #[serde(default)]
1650    pub line_item_distribution: LineItemDistributionConfig,
1651    /// Debit/credit balance distribution
1652    #[serde(default)]
1653    pub debit_credit_distribution: DebitCreditDistributionConfig,
1654    /// Even/odd line count distribution
1655    #[serde(default)]
1656    pub even_odd_distribution: EvenOddDistributionConfig,
1657    /// Transaction source distribution
1658    #[serde(default)]
1659    pub source_distribution: SourceDistribution,
1660    /// **T2-D** Source-mix breadth. When unset or `true` (the default), the
1661    /// emitted `source` column is drawn from a generic SAP document-type mix
1662    /// (~25 codes, entropy ~2.7) instead of the coarse `TransactionSource`
1663    /// enum (~4 values, entropy ~0.75), closing the source-mix gap measured
1664    /// in experiments/ml/FINDINGS.md §6. Industry priors, when loaded, take
1665    /// precedence. Set `false` to restore the legacy enum labels. `Option`
1666    /// (not bare `bool`) so the default is genuinely on under both serde and
1667    /// `Default::default()`.
1668    #[serde(default)]
1669    pub synthetic_source_codes: Option<bool>,
1670    /// **SOTA-1** Recurring / standard-journal templating. When unset or `true`
1671    /// (the default), the no-priors generation path reuses a small per-(company,
1672    /// process) library of standard JE account-archetypes with high probability,
1673    /// so standard postings recur (and a hot subset of accounts dominates)
1674    /// instead of every JE drawing fresh uniform accounts. Matches the corpus's
1675    /// heavy templating (FINDINGS.md sec.8: 97% recurring, top-50 cover 65%; vs
1676    /// the engine's 758/1k unique). Reuse overrides only account *choice* (the
1677    /// main RNG + amounts/dates/counts are unchanged). Set `false` for the
1678    /// legacy uniform-per-line account selection.
1679    #[serde(default)]
1680    pub recurring_templates: Option<bool>,
1681    /// **SOTA-5** Fraction of journal entries that are reversals/corrections of
1682    /// a recent JE (swap dr/cr, reference the original) — a process auditors
1683    /// specifically look for, and largely absent from the engine (FINDINGS.md
1684    /// sec.8: corpus reversal-proxy ~10% vs synthetic ~0.2%). Unset → a default
1685    /// of ~0.10 (matching the corpus proxy); `0.0` disables it. Reversals are
1686    /// interspersed without perturbing the normal JEs (separate RNG + derived id).
1687    #[serde(default)]
1688    pub reversal_rate: Option<f64>,
1689    /// **SOTA-2** Concentrate posting activity onto a hot subset of accounts via
1690    /// a Zipf (power-law) override of the per-line account pick, so a few
1691    /// accounts carry most lines like a real GL (FINDINGS.md sec.8: corpus
1692    /// top-10% of accounts ≈ 95% of lines vs the engine's near-uniform ~0.21).
1693    /// The uniform draw is still consumed (amounts/dates/counts unchanged) — only
1694    /// the chosen account moves toward the hot set. Set `false` for the legacy
1695    /// uniform-over-pool selection. Default-on when unset.
1696    #[serde(default)]
1697    pub account_concentration: Option<bool>,
1698    /// **SOTA-6** Fraction of journal entries that are allocation/assessment
1699    /// batches — large 1-to-many postings (one cost pool spread across many
1700    /// cost centers) that drive the corpus lines-per-JE tail (FINDINGS.md
1701    /// sec.8: AB docs ~52 lines vs the engine's ~4.6 mean with no large-batch
1702    /// process). Each batch carries ~30-80 cost-center-spread sub-lines and
1703    /// stays balanced. Unset → a small default (~0.008, ≈8% of lines); `0.0`
1704    /// disables. Interspersed without perturbing the normal JEs (separate RNG +
1705    /// derived id, reusing a recent JE's header).
1706    #[serde(default)]
1707    pub allocation_batch_rate: Option<f64>,
1708    /// **SOTA-3** Populate a line-level `business_unit` dimension — an
1709    /// organisational segment that rolls up the cost center, or the profit
1710    /// center as fallback (the same dimension value always maps to the same BU).
1711    /// The corpus carries a BU dimension (~11 codes) the engine lacked entirely;
1712    /// this fills it wherever a cost or profit center is present (~corpus fill),
1713    /// so BU-level analytics are coherent. Default-on when unset; `false`
1714    /// leaves `business_unit` empty (legacy).
1715    #[serde(default)]
1716    pub business_unit_dimension: Option<bool>,
1717    /// **SOTA-4** Fraction of journal entries that post in a foreign
1718    /// (document) currency — SAP-style: `debit_amount`/`credit_amount`/
1719    /// `local_amount` stay the company-ledger amount (DMBTR; the trial balance
1720    /// is unaffected), and the line's `transaction_amount` (WRBTR) plus
1721    /// `header.currency` (WAERS) / `header.exchange_rate` carry the foreign
1722    /// value. The corpus shows ~3.5% functional≠reporting (FINDINGS §8).
1723    /// Unset/`0.0` → all company-currency (default). Additive — ledger
1724    /// coherence is preserved; enable for corpus-matching / FX realism.
1725    #[serde(default)]
1726    pub foreign_currency_rate: Option<f64>,
1727    /// Seasonality configuration
1728    #[serde(default)]
1729    pub seasonality: SeasonalityConfig,
1730    /// Amount distribution
1731    #[serde(default)]
1732    pub amounts: AmountDistributionConfig,
1733    /// Benford's Law compliance configuration
1734    #[serde(default)]
1735    pub benford: BenfordConfig,
1736    /// SOTA-10 (FINDINGS §14): optional hard cap on total lines per JE. Corpus has
1737    /// p99.9 ~99 lines / max ~924; the synthetic engine occasionally produces
1738    /// 2000+-line monster JEs that degrade the audit packet's signal-to-noise.
1739    /// `None` = no cap (legacy); ~100 is a realism-matching default. Applies after
1740    /// copula adjustment; preserves balance by scaling debit/credit proportionally.
1741    #[serde(default)]
1742    pub lines_per_je_cap: Option<usize>,
1743    /// SOTA-9 (FINDINGS §14): archetype reuse probability for the recurring-templates
1744    /// process (overrides the historical 0.90 default). Corpus recurring share ~0.97;
1745    /// raising this concentrates `edges/je` toward the corpus value (currently 8.75×
1746    /// too diffuse). Range [0.0, 1.0]. None = use legacy 0.90.
1747    #[serde(default)]
1748    pub archetype_reuse_probability: Option<f64>,
1749    /// SOTA-8 (FINDINGS §14): source-conditional Dirichlet account-pair sampler.
1750    /// Models the corpus finding that per-source account usage is *concentrated*
1751    /// (entropy ~0.68 vs synth 0.97) over a *larger* pool (~23 vs 5 accts/source).
1752    /// Default off — opt-in so existing synthetic streams stay byte-identical;
1753    /// enable for audit-realism + tighter inverse-audit normal manifold.
1754    #[serde(default)]
1755    pub source_conditional_account_pair: SourceConditionalAccountPairConfig,
1756}
1757
1758/// SOTA-8 — per-source Dirichlet over account pairs. Concentration α controls
1759/// per-source structure tightness (low α = razor-tight prior, high α = diffuse);
1760/// `accts_per_source_target` controls the per-source account-pool size.
1761#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1762pub struct SourceConditionalAccountPairConfig {
1763    /// Enable the source-conditional account-pair sampler (default off).
1764    #[serde(default)]
1765    pub enabled: bool,
1766    /// Symmetric Dirichlet α — lower = more concentrated PMF per source.
1767    /// α=0.5 + N_s=25 ⇒ expected normalised entropy ≈ 0.65 (corpus median 0.68).
1768    #[serde(default = "default_source_cond_concentration")]
1769    pub concentration: f64,
1770    /// Expected distinct accounts per source (jittered by LogNormal(0, 0.3)).
1771    /// Corpus median 23.5; synth pre-SOTA-8 is ~5.
1772    #[serde(default = "default_accts_per_source_target")]
1773    pub accts_per_source_target: usize,
1774}
1775
1776fn default_source_cond_concentration() -> f64 {
1777    0.5
1778}
1779
1780fn default_accts_per_source_target() -> usize {
1781    25
1782}
1783
1784impl Default for SourceConditionalAccountPairConfig {
1785    fn default() -> Self {
1786        Self {
1787            enabled: false,
1788            concentration: default_source_cond_concentration(),
1789            accts_per_source_target: default_accts_per_source_target(),
1790        }
1791    }
1792}
1793
1794/// Benford's Law compliance configuration.
1795#[derive(Debug, Clone, Serialize, Deserialize)]
1796pub struct BenfordConfig {
1797    /// Enable Benford's Law compliance for amount generation
1798    #[serde(default = "default_true")]
1799    pub enabled: bool,
1800    /// Tolerance for deviation from ideal Benford distribution (0.0-1.0)
1801    #[serde(default = "default_benford_tolerance")]
1802    pub tolerance: f64,
1803    /// Transaction sources exempt from Benford's Law (fixed amounts)
1804    #[serde(default)]
1805    pub exempt_sources: Vec<BenfordExemption>,
1806}
1807
1808fn default_benford_tolerance() -> f64 {
1809    0.05
1810}
1811
1812impl Default for BenfordConfig {
1813    fn default() -> Self {
1814        Self {
1815            enabled: true,
1816            tolerance: default_benford_tolerance(),
1817            exempt_sources: vec![BenfordExemption::Recurring, BenfordExemption::Payroll],
1818        }
1819    }
1820}
1821
1822/// Types of transactions exempt from Benford's Law.
1823#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1824#[serde(rename_all = "snake_case")]
1825pub enum BenfordExemption {
1826    /// Recurring fixed amounts (rent, subscriptions)
1827    Recurring,
1828    /// Payroll (standardized salaries)
1829    Payroll,
1830    /// Fixed fees and charges
1831    FixedFees,
1832    /// Round number purchases (often legitimate)
1833    RoundAmounts,
1834}
1835
1836/// Distribution of transaction sources.
1837#[derive(Debug, Clone, Serialize, Deserialize)]
1838pub struct SourceDistribution {
1839    /// Manual entries percentage
1840    pub manual: f64,
1841    /// Automated system entries
1842    pub automated: f64,
1843    /// Recurring entries
1844    pub recurring: f64,
1845    /// Adjustment entries
1846    pub adjustment: f64,
1847}
1848
1849impl Default for SourceDistribution {
1850    fn default() -> Self {
1851        Self {
1852            manual: 0.20,
1853            automated: 0.70,
1854            recurring: 0.07,
1855            adjustment: 0.03,
1856        }
1857    }
1858}
1859
1860/// Output configuration.
1861#[derive(Debug, Clone, Serialize, Deserialize)]
1862pub struct OutputConfig {
1863    /// Output mode
1864    #[serde(default)]
1865    pub mode: OutputMode,
1866    /// Output directory
1867    #[serde(alias = "outputDirectory")]
1868    pub output_directory: PathBuf,
1869    /// File formats to generate. Accepts both `formats: [json, csv]`
1870    /// (canonical YAML) and `exportFormat: "json"` / `exportFormats:
1871    /// ["json", "csv"]` (SDK-style camelCase). The single-string
1872    /// `exportFormat` form is deserialised via `one_or_many_formats`
1873    /// so SDK clients submitting `exportFormat: "json"` hit the right
1874    /// code path instead of silently falling through to the Parquet
1875    /// default — the bug the SDK team flagged in v4.4.0.
1876    #[serde(
1877        default = "default_formats",
1878        alias = "exportFormats",
1879        alias = "exportFormat",
1880        deserialize_with = "one_or_many_formats"
1881    )]
1882    pub formats: Vec<FileFormat>,
1883    /// Compression settings
1884    #[serde(default)]
1885    pub compression: CompressionConfig,
1886    /// Batch size for writes
1887    #[serde(default = "default_batch_size", alias = "batchSize")]
1888    pub batch_size: usize,
1889    /// Include ACDOCA format
1890    #[serde(default = "default_true", alias = "includeAcdoca")]
1891    pub include_acdoca: bool,
1892    /// Include BSEG format
1893    #[serde(default, alias = "includeBseg")]
1894    pub include_bseg: bool,
1895    /// Partition by fiscal period
1896    #[serde(default = "default_true", alias = "partitionByPeriod")]
1897    pub partition_by_period: bool,
1898    /// Partition by company code
1899    #[serde(default, alias = "partitionByCompany")]
1900    pub partition_by_company: bool,
1901    /// Numeric serialization mode for JSON output.
1902    /// "string" (default): decimals as `"1729237.30"` — lossless precision.
1903    /// "native": decimals as `1729237.30` — friendlier for pandas/analytics.
1904    #[serde(default, alias = "numericMode")]
1905    pub numeric_mode: NumericMode,
1906    /// JSON export layout for journal entries and document flows.
1907    /// "nested" (default): `{"header": {...}, "lines": [...]}` — natural ERP structure.
1908    /// "flat": header fields repeated on every line — friendlier for analytics/ML.
1909    ///
1910    /// Accepts both `export_layout` (canonical / YAML) and `exportLayout`
1911    /// (camelCase / SDK JSON) so SDKs that follow camelCase conventions
1912    /// hit the flat path rather than silently getting the Nested default.
1913    /// Before v3.1.1 the missing camelCase alias meant SDK requests with
1914    /// `exportLayout: "flat"` were silently ignored, which SDK operators
1915    /// reported as "flat hangs generation" (the job completed with Nested
1916    /// layout, but manifests didn't match the expected flat shape).
1917    #[serde(default, alias = "exportLayout")]
1918    pub export_layout: ExportLayout,
1919    /// SAP / HANA export settings (only read when the CLI
1920    /// `--export-format sap` flag is passed). Empty by default so
1921    /// existing configs don't change behaviour; dialect defaults to
1922    /// `classic` for backward compatibility.
1923    #[serde(default, alias = "sapExport")]
1924    pub sap: SapExportSettings,
1925    /// SAF-T (Standard Audit File for Tax) export settings. Read when
1926    /// the CLI `--export-format saft` flag is passed. Defaults to
1927    /// Portugal (`pt`) because the PT variant is the most mature and
1928    /// cross-jurisdiction compatible. Override with
1929    /// `jurisdiction: pl|ro|no|lu` for the other supported countries.
1930    #[serde(default, alias = "saftExport")]
1931    pub saft: SaftExportSettings,
1932}
1933
1934/// Configuration for the SAP export writers (BKPF / BSEG / ACDOCA and
1935/// master-data tables).
1936///
1937/// Mirror of `datasynth_output::SapExportConfig` in YAML form — the CLI
1938/// translates this into the runtime struct before invoking the exporter,
1939/// replacing the v3.x hardcoded `SapExportConfig::default()`.
1940#[derive(Debug, Clone, Serialize, Deserialize)]
1941pub struct SapExportSettings {
1942    /// SAP client / MANDT column value on every table.
1943    #[serde(default = "default_sap_client")]
1944    pub client: String,
1945    /// Leading ledger for ACDOCA rows (0L for S/4HANA default).
1946    #[serde(default = "default_sap_ledger")]
1947    pub ledger: String,
1948    /// Source system identifier — written to ACDOCA.AWSYS so downstream
1949    /// consumers can distinguish synthetic rows from production ones.
1950    #[serde(default = "default_sap_source_system")]
1951    pub source_system: String,
1952    /// Local currency (WAERS / RWCUR).
1953    #[serde(default = "default_sap_currency")]
1954    pub local_currency: String,
1955    /// Optional group / consolidation currency (triggers the HSL / RHCUR columns).
1956    #[serde(default, skip_serializing_if = "Option::is_none")]
1957    pub group_currency: Option<String>,
1958    /// Which SAP tables to export. Empty = default set (bkpf, bseg, acdoca).
1959    #[serde(default)]
1960    pub tables: Vec<String>,
1961    /// Include ZSIM_* extension columns on ACDOCA rows.
1962    #[serde(default = "default_true")]
1963    pub include_extension_fields: bool,
1964    /// Export dialect — `classic` (R/3 / BODS) or `hana` (S/4HANA CDS).
1965    #[serde(default)]
1966    pub dialect: SapDialectSetting,
1967    /// Legacy flag, retained for backward compatibility. Has no effect
1968    /// when `dialect = hana`.
1969    #[serde(default = "default_true")]
1970    pub use_sap_date_format: bool,
1971}
1972
1973impl Default for SapExportSettings {
1974    fn default() -> Self {
1975        Self {
1976            client: default_sap_client(),
1977            ledger: default_sap_ledger(),
1978            source_system: default_sap_source_system(),
1979            local_currency: default_sap_currency(),
1980            group_currency: None,
1981            tables: Vec::new(),
1982            include_extension_fields: true,
1983            dialect: SapDialectSetting::default(),
1984            use_sap_date_format: true,
1985        }
1986    }
1987}
1988
1989fn default_sap_client() -> String {
1990    "100".to_string()
1991}
1992fn default_sap_ledger() -> String {
1993    "0L".to_string()
1994}
1995fn default_sap_source_system() -> String {
1996    "SYNTH".to_string()
1997}
1998fn default_sap_currency() -> String {
1999    "USD".to_string()
2000}
2001
2002/// SAP export dialect (wire form — `datasynth_output::SapDialect` is the
2003/// runtime form).
2004#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
2005#[serde(rename_all = "snake_case")]
2006pub enum SapDialectSetting {
2007    /// Legacy R/3 / BODS-compatible CSV (default).
2008    #[default]
2009    Classic,
2010    /// S/4HANA CDS dialect (semicolon + UTF-8 BOM + decimal comma + ISO dates).
2011    Hana,
2012}
2013
2014/// SAF-T export settings (v4.3.1).
2015#[derive(Debug, Clone, Serialize, Deserialize)]
2016pub struct SaftExportSettings {
2017    /// ISO-ish two-letter code: `pt` / `pl` / `ro` / `no` / `lu`.
2018    /// Defaults to `pt` (Portugal, most mature variant).
2019    #[serde(default = "default_saft_jurisdiction")]
2020    pub jurisdiction: String,
2021    /// Company tax registration number / VAT ID / TIN used in the
2022    /// `Header.TaxRegistrationNumber` element. Falls back to
2023    /// `"Desconhecido"` (Portuguese for "unknown") when empty.
2024    #[serde(default)]
2025    pub company_tax_id: String,
2026    /// Optional override for the company name used in the Header.
2027    /// When empty, the first configured company's `name` is used.
2028    #[serde(default)]
2029    pub company_name: String,
2030}
2031
2032impl Default for SaftExportSettings {
2033    fn default() -> Self {
2034        Self {
2035            jurisdiction: default_saft_jurisdiction(),
2036            company_tax_id: String::new(),
2037            company_name: String::new(),
2038        }
2039    }
2040}
2041
2042fn default_saft_jurisdiction() -> String {
2043    "pt".to_string()
2044}
2045
2046fn default_formats() -> Vec<FileFormat> {
2047    vec![FileFormat::Parquet]
2048}
2049fn default_batch_size() -> usize {
2050    100_000
2051}
2052
2053/// Custom deserializer for `formats` that accepts either a single
2054/// `FileFormat` (e.g. `"json"` for SDK `exportFormat: "json"`) or a
2055/// vector (e.g. `["json", "csv"]`). Without this shim an SDK config
2056/// with `exportFormat: "json"` would fail to parse (serde expects a
2057/// sequence for a `Vec` field) and silently fall through to defaults.
2058fn one_or_many_formats<'de, D>(deserializer: D) -> Result<Vec<FileFormat>, D::Error>
2059where
2060    D: serde::Deserializer<'de>,
2061{
2062    #[derive(Deserialize)]
2063    #[serde(untagged)]
2064    enum OneOrMany {
2065        One(FileFormat),
2066        Many(Vec<FileFormat>),
2067    }
2068    match OneOrMany::deserialize(deserializer)? {
2069        OneOrMany::One(f) => Ok(vec![f]),
2070        OneOrMany::Many(v) => Ok(v),
2071    }
2072}
2073
2074impl Default for OutputConfig {
2075    fn default() -> Self {
2076        Self {
2077            mode: OutputMode::FlatFile,
2078            output_directory: PathBuf::from("./output"),
2079            formats: default_formats(),
2080            compression: CompressionConfig::default(),
2081            batch_size: default_batch_size(),
2082            include_acdoca: true,
2083            include_bseg: false,
2084            partition_by_period: true,
2085            partition_by_company: false,
2086            numeric_mode: NumericMode::default(),
2087            export_layout: ExportLayout::default(),
2088            sap: SapExportSettings::default(),
2089            saft: SaftExportSettings::default(),
2090        }
2091    }
2092}
2093
2094/// Numeric serialization mode for JSON decimal fields.
2095#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
2096#[serde(rename_all = "snake_case")]
2097pub enum NumericMode {
2098    /// Decimals as JSON strings (e.g. `"1729237.30"`). Preserves full precision.
2099    #[default]
2100    String,
2101    /// Decimals as JSON numbers (e.g. `1729237.30`). Friendlier for pandas/analytics.
2102    Native,
2103}
2104
2105/// JSON export layout for nested structures (journal entries, document flows).
2106#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
2107#[serde(rename_all = "snake_case")]
2108pub enum ExportLayout {
2109    /// Nested structure: `{"header": {...}, "lines": [...]}`. Natural ERP format.
2110    #[default]
2111    Nested,
2112    /// Flat structure: header fields repeated on every line. Analytics-friendly.
2113    Flat,
2114}
2115
2116/// Output mode.
2117#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2118#[serde(rename_all = "snake_case")]
2119pub enum OutputMode {
2120    /// Stream records as generated
2121    Streaming,
2122    /// Write to flat files
2123    #[default]
2124    FlatFile,
2125    /// Both streaming and flat file
2126    Both,
2127}
2128
2129/// Supported file formats.
2130#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
2131#[serde(rename_all = "snake_case")]
2132pub enum FileFormat {
2133    Csv,
2134    Parquet,
2135    Json,
2136    JsonLines,
2137}
2138
2139/// Compression configuration.
2140#[derive(Debug, Clone, Serialize, Deserialize)]
2141pub struct CompressionConfig {
2142    /// Enable compression
2143    #[serde(default = "default_true")]
2144    pub enabled: bool,
2145    /// Compression algorithm
2146    #[serde(default)]
2147    pub algorithm: CompressionAlgorithm,
2148    /// Compression level (1-9)
2149    #[serde(default = "default_compression_level")]
2150    pub level: u8,
2151}
2152
2153fn default_compression_level() -> u8 {
2154    3
2155}
2156
2157impl Default for CompressionConfig {
2158    fn default() -> Self {
2159        Self {
2160            enabled: true,
2161            algorithm: CompressionAlgorithm::default(),
2162            level: default_compression_level(),
2163        }
2164    }
2165}
2166
2167/// Compression algorithms.
2168#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
2169#[serde(rename_all = "snake_case")]
2170pub enum CompressionAlgorithm {
2171    Gzip,
2172    #[default]
2173    Zstd,
2174    Lz4,
2175    Snappy,
2176}
2177
2178/// Fraud simulation configuration.
2179///
2180/// ## Document-level vs. line-level fraud
2181///
2182/// `fraud_rate` applies to individual journal-entry lines (line-level).
2183/// `document_fraud_rate` (optional) applies to source documents
2184/// (purchase orders, vendor invoices, customer invoices, payments), and when
2185/// `propagate_to_lines` is true, every JE derived from a fraudulent document
2186/// also gets `is_fraud = true`. This lets users express either:
2187///
2188///  * pure line-level fraud (`document_fraud_rate = None`): legacy behaviour;
2189///  * pure document-level fraud (`fraud_rate ≈ 0` and `document_fraud_rate` set):
2190///    fraud rings expressed at document granularity — realistic for PO/invoice
2191///    fraud schemes where one fraudulent document spawns multiple derived JEs;
2192///  * hybrid (both set): document-level scheme fraud plus unrelated line-level
2193///    slip-ups.
2194///
2195/// `propagate_to_document` does the inverse: when a JE is tagged as fraud by
2196/// the anomaly injector, its source document is also marked fraudulent.
2197#[derive(Debug, Clone, Serialize, Deserialize)]
2198pub struct FraudConfig {
2199    /// Enable fraud scenario generation
2200    #[serde(default)]
2201    pub enabled: bool,
2202    /// Line-level fraud rate: fraction of individual JE lines flagged as fraud (0.0 to 1.0).
2203    ///
2204    /// # Effective line-level prevalence
2205    ///
2206    /// If `document_fraud_rate = Some(d)` and `propagate_to_lines = true`,
2207    /// the observed line-level fraud prevalence is roughly:
2208    ///
2209    /// > `P(line is_fraud) ≈ fraud_rate + d × avg_lines_per_fraud_doc / total_lines`
2210    ///
2211    /// For a typical retail job (avg 3 lines per document, ~30 % of lines
2212    /// come from doc-flow-derived JEs) the combined rate lands near:
2213    ///
2214    /// > `fraud_rate + 0.3 × d`
2215    ///
2216    /// so setting `fraud_rate=0.02, document_fraud_rate=0.05, propagate_to_lines=true`
2217    /// produces ~3.5 % line-level fraud, not 2 %. To target a specific
2218    /// line-level prevalence X, choose `fraud_rate = X - 0.3 × d`.
2219    #[serde(default = "default_fraud_rate", alias = "fraudRate")]
2220    pub fraud_rate: f64,
2221    /// Document-level fraud rate: fraction of source documents (PO, vendor
2222    /// invoice, customer invoice, payment) flagged as fraud. `None` disables
2223    /// document-level injection; `Some(r)` marks ~r × document-count as fraud
2224    /// independently of the line-level rate.
2225    ///
2226    /// v4.4.2+ default: `Some(0.01)` — the SDK team reported
2227    /// `is_fraud_propagated: 0/72` regressed from `12/33` in 3.1.1 because
2228    /// the default had silently become None. A 1% document-fraud default
2229    /// restores the propagation signal (~0.3% of JE headers carry
2230    /// `is_fraud_propagated = true`) without meaningfully changing the
2231    /// line-level fraud prevalence. Set to `Some(0.0)` or `null` in your
2232    /// YAML to explicitly disable document-level injection.
2233    #[serde(default = "default_document_fraud_rate", alias = "documentFraudRate")]
2234    pub document_fraud_rate: Option<f64>,
2235    /// When true, flagging a document as fraudulent cascades `is_fraud = true`
2236    /// and `fraud_type` to every journal entry derived from that document,
2237    /// and records `fraud_source_document_id` on the JE header.
2238    /// Default: `true`.
2239    #[serde(default = "default_true", alias = "propagateToLines")]
2240    pub propagate_to_lines: bool,
2241    /// When true, tagging a JE as fraud via line-level anomaly injection also
2242    /// marks the JE's source document as fraudulent (if it can be resolved).
2243    /// Default: `true`.
2244    #[serde(default = "default_true", alias = "propagateToDocument")]
2245    pub propagate_to_document: bool,
2246    /// Fraud type distribution
2247    #[serde(default)]
2248    pub fraud_type_distribution: FraudTypeDistribution,
2249    /// Enable fraud clustering
2250    #[serde(default)]
2251    pub clustering_enabled: bool,
2252    /// Clustering factor
2253    #[serde(default = "default_clustering_factor")]
2254    pub clustering_factor: f64,
2255    /// Approval thresholds for threshold-adjacent fraud pattern
2256    #[serde(default = "default_approval_thresholds")]
2257    pub approval_thresholds: Vec<f64>,
2258    /// v5.30 B3 (#153) — per-business-process fraud rate overrides.
2259    ///
2260    /// Keys are business-process slugs (`"P2P"`, `"O2C"`, `"R2R"`, `"H2R"`,
2261    /// `"A2R"`); values are line-level fraud rates that **override** the
2262    /// global `fraud_rate` when a JE's selected business process matches a
2263    /// key. Unmatched processes fall back to `fraud_rate`.
2264    ///
2265    /// When empty (the default), per-process rates are disabled and every
2266    /// JE uses the global `fraud_rate` — preserving v5.29 byte-identical
2267    /// output for configs that don't opt in.
2268    ///
2269    /// # Why
2270    ///
2271    /// Real audit data shows process-specific fraud signatures (R2R
2272    /// manual-close and period-end accruals carry higher fraud
2273    /// concentration than P2P invoice-processing). The v5.29 global
2274    /// `fraud_rate` flattens this signal, leaving the GNN fraud detector
2275    /// at a uniform per-process AUC band (0.914-0.925 in the v5.29 retrain).
2276    ///
2277    /// # Example
2278    ///
2279    /// ```yaml
2280    /// fraud:
2281    ///   fraud_rate: 0.02         # baseline for unmapped processes
2282    ///   per_process_rates:
2283    ///     R2R: 0.06              # 3× baseline — period-close hot spot
2284    ///     P2P: 0.04              # 2× baseline — invoice fraud
2285    ///     O2C: 0.025             # 1.25× baseline — revenue manipulation
2286    ///     H2R: 0.015             # below baseline — payroll
2287    ///     A2R: 0.020             # baseline — asset accounting
2288    /// ```
2289    ///
2290    /// Aggregate effective line-level prevalence depends on the
2291    /// `business_processes` weights mix; calibrate to a target X by
2292    /// solving for the weighted average. For default v5.29 weights
2293    /// (P2P 0.35, O2C 0.35, R2R 0.20, H2R 0.05, A2R 0.05) the
2294    /// example above yields ~0.0335 line-level fraud.
2295    #[serde(default, alias = "perProcessRates")]
2296    pub per_process_rates: std::collections::HashMap<String, f64>,
2297}
2298
2299fn default_approval_thresholds() -> Vec<f64> {
2300    vec![1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0]
2301}
2302
2303fn default_fraud_rate() -> f64 {
2304    0.005
2305}
2306fn default_document_fraud_rate() -> Option<f64> {
2307    // v5.0.1: bumped 0.01 → 0.05 to deliver meaningful scheme-level
2308    // fraud propagation at typical line-level rates. The 1 % default
2309    // (set in v4.4.2 to restore `is_fraud_propagated > 0`) was too
2310    // conservative — at `fraud_rate = 0.08` it produced ~3.6 % observed
2311    // propagation against a 26.7 % target. The new 5 % default + the
2312    // additive formula `P(line is_fraud) ≈ fraud_rate + 0.3 × d` yields
2313    // ~9.5 % combined at fraud_rate=0.08 (closer to the spec target).
2314    // Set explicitly to `Some(0.0)` or `null` in YAML to disable, or to
2315    // a higher value (e.g. 0.20) for scheme-heavy fraud workloads.
2316    Some(0.05)
2317}
2318fn default_clustering_factor() -> f64 {
2319    3.0
2320}
2321
2322impl Default for FraudConfig {
2323    fn default() -> Self {
2324        Self {
2325            enabled: false,
2326            fraud_rate: default_fraud_rate(),
2327            document_fraud_rate: default_document_fraud_rate(),
2328            propagate_to_lines: true,
2329            propagate_to_document: true,
2330            fraud_type_distribution: FraudTypeDistribution::default(),
2331            clustering_enabled: false,
2332            clustering_factor: default_clustering_factor(),
2333            approval_thresholds: default_approval_thresholds(),
2334            per_process_rates: std::collections::HashMap::new(),
2335        }
2336    }
2337}
2338
2339/// Distribution of fraud types.
2340///
2341/// All fields default to `0.0` if absent from the YAML, so partial
2342/// distributions are accepted; the validator (`validate_sum_to_one`)
2343/// then enforces that the populated weights sum to `1.0 ± 0.01`.
2344#[derive(Debug, Clone, Serialize, Deserialize)]
2345#[serde(deny_unknown_fields)]
2346pub struct FraudTypeDistribution {
2347    #[serde(default)]
2348    pub suspense_account_abuse: f64,
2349    #[serde(default)]
2350    pub fictitious_transaction: f64,
2351    #[serde(default)]
2352    pub revenue_manipulation: f64,
2353    #[serde(default)]
2354    pub expense_capitalization: f64,
2355    #[serde(default)]
2356    pub split_transaction: f64,
2357    #[serde(default)]
2358    pub timing_anomaly: f64,
2359    #[serde(default)]
2360    pub unauthorized_access: f64,
2361    #[serde(default)]
2362    pub duplicate_payment: f64,
2363    /// Vendor kickback scheme.
2364    #[serde(default)]
2365    pub kickback_scheme: f64,
2366    /// Round-tripping funds through multiple entities or accounts.
2367    #[serde(default)]
2368    pub round_tripping: f64,
2369    /// Unauthorized customer/vendor discounts (sweethearting, side deals).
2370    #[serde(default)]
2371    pub unauthorized_discount: f64,
2372}
2373
2374impl Default for FraudTypeDistribution {
2375    fn default() -> Self {
2376        // Preserves the pre-extension default sum=1.0 over the original
2377        // eight fields.  The three additional fields (kickback_scheme,
2378        // round_tripping, unauthorized_discount) default to 0.0 so that
2379        // existing fraud packs / templates that explicitly enumerate the
2380        // original eight fields continue to merge to a 1.0 sum without
2381        // modification.  Users who want those fraud types must set them
2382        // explicitly (and rebalance the others).
2383        Self {
2384            suspense_account_abuse: 0.25,
2385            fictitious_transaction: 0.15,
2386            revenue_manipulation: 0.10,
2387            expense_capitalization: 0.10,
2388            split_transaction: 0.15,
2389            timing_anomaly: 0.10,
2390            unauthorized_access: 0.10,
2391            duplicate_payment: 0.05,
2392            kickback_scheme: 0.0,
2393            round_tripping: 0.0,
2394            unauthorized_discount: 0.0,
2395        }
2396    }
2397}
2398
2399/// Internal Controls System (ICS) configuration.
2400#[derive(Debug, Clone, Serialize, Deserialize)]
2401pub struct InternalControlsConfig {
2402    /// Enable internal controls system
2403    #[serde(default)]
2404    pub enabled: bool,
2405    /// Rate at which controls result in exceptions (0.0 - 1.0)
2406    #[serde(default = "default_exception_rate")]
2407    pub exception_rate: f64,
2408    /// Rate at which SoD violations occur (0.0 - 1.0)
2409    #[serde(default = "default_sod_violation_rate")]
2410    pub sod_violation_rate: f64,
2411    /// Export control master data to separate files
2412    #[serde(default = "default_true")]
2413    pub export_control_master_data: bool,
2414    /// SOX materiality threshold for marking transactions as SOX-relevant
2415    #[serde(default = "default_sox_materiality_threshold")]
2416    pub sox_materiality_threshold: f64,
2417    /// Enable COSO 2013 framework integration
2418    #[serde(default = "default_true")]
2419    pub coso_enabled: bool,
2420    /// Include entity-level controls in generation
2421    #[serde(default)]
2422    pub include_entity_level_controls: bool,
2423    /// Target maturity level for controls
2424    /// Valid values: "ad_hoc", "repeatable", "defined", "managed", "optimized", "mixed"
2425    #[serde(default = "default_target_maturity_level")]
2426    pub target_maturity_level: String,
2427}
2428
2429fn default_exception_rate() -> f64 {
2430    0.02
2431}
2432
2433fn default_sod_violation_rate() -> f64 {
2434    0.01
2435}
2436
2437fn default_sox_materiality_threshold() -> f64 {
2438    10000.0
2439}
2440
2441fn default_target_maturity_level() -> String {
2442    "mixed".to_string()
2443}
2444
2445impl Default for InternalControlsConfig {
2446    fn default() -> Self {
2447        Self {
2448            enabled: false,
2449            exception_rate: default_exception_rate(),
2450            sod_violation_rate: default_sod_violation_rate(),
2451            export_control_master_data: true,
2452            sox_materiality_threshold: default_sox_materiality_threshold(),
2453            coso_enabled: true,
2454            include_entity_level_controls: false,
2455            target_maturity_level: default_target_maturity_level(),
2456        }
2457    }
2458}
2459
2460/// Business process configuration.
2461#[derive(Debug, Clone, Serialize, Deserialize)]
2462pub struct BusinessProcessConfig {
2463    /// Order-to-Cash weight
2464    #[serde(default = "default_o2c")]
2465    pub o2c_weight: f64,
2466    /// Procure-to-Pay weight
2467    #[serde(default = "default_p2p")]
2468    pub p2p_weight: f64,
2469    /// Record-to-Report weight
2470    #[serde(default = "default_r2r")]
2471    pub r2r_weight: f64,
2472    /// Hire-to-Retire weight
2473    #[serde(default = "default_h2r")]
2474    pub h2r_weight: f64,
2475    /// Acquire-to-Retire weight
2476    #[serde(default = "default_a2r")]
2477    pub a2r_weight: f64,
2478}
2479
2480fn default_o2c() -> f64 {
2481    0.35
2482}
2483fn default_p2p() -> f64 {
2484    0.30
2485}
2486fn default_r2r() -> f64 {
2487    0.20
2488}
2489fn default_h2r() -> f64 {
2490    0.10
2491}
2492fn default_a2r() -> f64 {
2493    0.05
2494}
2495
2496impl Default for BusinessProcessConfig {
2497    fn default() -> Self {
2498        Self {
2499            o2c_weight: default_o2c(),
2500            p2p_weight: default_p2p(),
2501            r2r_weight: default_r2r(),
2502            h2r_weight: default_h2r(),
2503            a2r_weight: default_a2r(),
2504        }
2505    }
2506}
2507
2508/// User persona configuration.
2509#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2510pub struct UserPersonaConfig {
2511    /// Distribution of user personas
2512    #[serde(default)]
2513    pub persona_distribution: PersonaDistribution,
2514    /// Users per persona type
2515    #[serde(default)]
2516    pub users_per_persona: UsersPerPersona,
2517}
2518
2519/// Distribution of user personas for transaction generation.
2520#[derive(Debug, Clone, Serialize, Deserialize)]
2521pub struct PersonaDistribution {
2522    pub junior_accountant: f64,
2523    pub senior_accountant: f64,
2524    pub controller: f64,
2525    pub manager: f64,
2526    pub automated_system: f64,
2527}
2528
2529impl Default for PersonaDistribution {
2530    fn default() -> Self {
2531        Self {
2532            junior_accountant: 0.15,
2533            senior_accountant: 0.15,
2534            controller: 0.05,
2535            manager: 0.05,
2536            automated_system: 0.60,
2537        }
2538    }
2539}
2540
2541/// Number of users per persona type.
2542#[derive(Debug, Clone, Serialize, Deserialize)]
2543pub struct UsersPerPersona {
2544    pub junior_accountant: usize,
2545    pub senior_accountant: usize,
2546    pub controller: usize,
2547    pub manager: usize,
2548    pub automated_system: usize,
2549}
2550
2551impl Default for UsersPerPersona {
2552    fn default() -> Self {
2553        Self {
2554            junior_accountant: 10,
2555            senior_accountant: 5,
2556            controller: 2,
2557            manager: 3,
2558            automated_system: 20,
2559        }
2560    }
2561}
2562
2563/// Template configuration for realistic data generation.
2564///
2565/// # User-supplied template packs (v3.2.0+)
2566///
2567/// Set `path` to a directory (or single YAML/JSON file) to override or
2568/// extend the embedded default pools for vendor names, customer names,
2569/// material/asset descriptions, audit findings, bank names, and
2570/// department names. When `path` is `None` (the default), generators
2571/// use the compiled-in pools and output is byte-identical to v3.1.2.
2572///
2573/// See `crates/datasynth-core/src/templates/loader.rs::TemplateData`
2574/// for the full YAML schema. Use `datasynth-data templates export` to
2575/// dump the defaults as a starter pack.
2576#[derive(Debug, Clone, Serialize, Deserialize, Default)]
2577pub struct TemplateConfig {
2578    /// Name generation settings
2579    #[serde(default)]
2580    pub names: NameTemplateConfig,
2581    /// Description generation settings
2582    #[serde(default)]
2583    pub descriptions: DescriptionTemplateConfig,
2584    /// Reference number settings
2585    #[serde(default)]
2586    pub references: ReferenceTemplateConfig,
2587    /// Optional path to a user-supplied template file or directory.
2588    /// When set, entries from the file(s) augment or replace the
2589    /// embedded defaults according to `merge_strategy`.
2590    ///
2591    /// `None` (default) = use embedded pools only (byte-identical to v3.1.2).
2592    #[serde(default, alias = "templatesPath")]
2593    pub path: Option<std::path::PathBuf>,
2594    /// How file-based entries combine with embedded defaults.
2595    ///
2596    /// - `extend` (default): append file entries to embedded pools,
2597    ///   de-duplicating. Safe for incremental overlays.
2598    /// - `replace`: discard embedded pools entirely and use only file
2599    ///   entries. Requires a fully-populated template file.
2600    /// - `merge_prefer_file`: replace individual categories when present
2601    ///   in the file; keep embedded for absent categories.
2602    #[serde(default, alias = "mergeStrategy")]
2603    pub merge_strategy: TemplateMergeStrategy,
2604}
2605
2606/// Strategy for combining user-supplied template files with embedded defaults.
2607#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2608#[serde(rename_all = "snake_case")]
2609pub enum TemplateMergeStrategy {
2610    /// Append file entries to embedded pools (default).
2611    #[default]
2612    Extend,
2613    /// Replace embedded pools entirely with file entries.
2614    Replace,
2615    /// Replace individual categories when present in file; keep embedded for absent ones.
2616    MergePreferFile,
2617}
2618
2619/// Name template configuration.
2620#[derive(Debug, Clone, Serialize, Deserialize)]
2621pub struct NameTemplateConfig {
2622    /// Distribution of name cultures
2623    #[serde(default)]
2624    pub culture_distribution: CultureDistribution,
2625    /// Email domain for generated users
2626    #[serde(default = "default_email_domain")]
2627    pub email_domain: String,
2628    /// Generate realistic display names
2629    #[serde(default = "default_true")]
2630    pub generate_realistic_names: bool,
2631}
2632
2633fn default_email_domain() -> String {
2634    "company.com".to_string()
2635}
2636
2637impl Default for NameTemplateConfig {
2638    fn default() -> Self {
2639        Self {
2640            culture_distribution: CultureDistribution::default(),
2641            email_domain: default_email_domain(),
2642            generate_realistic_names: true,
2643        }
2644    }
2645}
2646
2647/// Distribution of name cultures for generation.
2648#[derive(Debug, Clone, Serialize, Deserialize)]
2649pub struct CultureDistribution {
2650    pub western_us: f64,
2651    pub hispanic: f64,
2652    pub german: f64,
2653    pub french: f64,
2654    pub chinese: f64,
2655    pub japanese: f64,
2656    pub indian: f64,
2657}
2658
2659impl Default for CultureDistribution {
2660    fn default() -> Self {
2661        Self {
2662            western_us: 0.40,
2663            hispanic: 0.20,
2664            german: 0.10,
2665            french: 0.05,
2666            chinese: 0.10,
2667            japanese: 0.05,
2668            indian: 0.10,
2669        }
2670    }
2671}
2672
2673/// Description template configuration.
2674#[derive(Debug, Clone, Serialize, Deserialize)]
2675pub struct DescriptionTemplateConfig {
2676    /// Generate header text for journal entries
2677    #[serde(default = "default_true")]
2678    pub generate_header_text: bool,
2679    /// Generate line text for journal entry lines
2680    #[serde(default = "default_true")]
2681    pub generate_line_text: bool,
2682}
2683
2684impl Default for DescriptionTemplateConfig {
2685    fn default() -> Self {
2686        Self {
2687            generate_header_text: true,
2688            generate_line_text: true,
2689        }
2690    }
2691}
2692
2693/// Reference number template configuration.
2694#[derive(Debug, Clone, Serialize, Deserialize)]
2695pub struct ReferenceTemplateConfig {
2696    /// Generate reference numbers
2697    #[serde(default = "default_true")]
2698    pub generate_references: bool,
2699    /// Invoice prefix
2700    #[serde(default = "default_invoice_prefix")]
2701    pub invoice_prefix: String,
2702    /// Purchase order prefix
2703    #[serde(default = "default_po_prefix")]
2704    pub po_prefix: String,
2705    /// Sales order prefix
2706    #[serde(default = "default_so_prefix")]
2707    pub so_prefix: String,
2708}
2709
2710fn default_invoice_prefix() -> String {
2711    "INV".to_string()
2712}
2713fn default_po_prefix() -> String {
2714    "PO".to_string()
2715}
2716fn default_so_prefix() -> String {
2717    "SO".to_string()
2718}
2719
2720impl Default for ReferenceTemplateConfig {
2721    fn default() -> Self {
2722        Self {
2723            generate_references: true,
2724            invoice_prefix: default_invoice_prefix(),
2725            po_prefix: default_po_prefix(),
2726            so_prefix: default_so_prefix(),
2727        }
2728    }
2729}
2730
2731/// Approval workflow configuration.
2732#[derive(Debug, Clone, Serialize, Deserialize)]
2733pub struct ApprovalConfig {
2734    /// Enable approval workflow generation
2735    #[serde(default)]
2736    pub enabled: bool,
2737    /// Threshold below which transactions are auto-approved
2738    #[serde(default = "default_auto_approve_threshold")]
2739    pub auto_approve_threshold: f64,
2740    /// Rate at which approvals are rejected (0.0 to 1.0)
2741    #[serde(default = "default_rejection_rate")]
2742    pub rejection_rate: f64,
2743    /// Rate at which approvals require revision (0.0 to 1.0)
2744    #[serde(default = "default_revision_rate")]
2745    pub revision_rate: f64,
2746    /// Average delay in hours for approval processing
2747    #[serde(default = "default_approval_delay_hours")]
2748    pub average_approval_delay_hours: f64,
2749    /// Approval chain thresholds
2750    #[serde(default)]
2751    pub thresholds: Vec<ApprovalThresholdConfig>,
2752}
2753
2754fn default_auto_approve_threshold() -> f64 {
2755    1000.0
2756}
2757fn default_rejection_rate() -> f64 {
2758    0.02
2759}
2760fn default_revision_rate() -> f64 {
2761    0.05
2762}
2763fn default_approval_delay_hours() -> f64 {
2764    4.0
2765}
2766
2767impl Default for ApprovalConfig {
2768    fn default() -> Self {
2769        Self {
2770            enabled: false,
2771            auto_approve_threshold: default_auto_approve_threshold(),
2772            rejection_rate: default_rejection_rate(),
2773            revision_rate: default_revision_rate(),
2774            average_approval_delay_hours: default_approval_delay_hours(),
2775            thresholds: vec![
2776                ApprovalThresholdConfig {
2777                    amount: 1000.0,
2778                    level: 1,
2779                    roles: vec!["senior_accountant".to_string()],
2780                },
2781                ApprovalThresholdConfig {
2782                    amount: 10000.0,
2783                    level: 2,
2784                    roles: vec!["senior_accountant".to_string(), "controller".to_string()],
2785                },
2786                ApprovalThresholdConfig {
2787                    amount: 100000.0,
2788                    level: 3,
2789                    roles: vec![
2790                        "senior_accountant".to_string(),
2791                        "controller".to_string(),
2792                        "manager".to_string(),
2793                    ],
2794                },
2795                ApprovalThresholdConfig {
2796                    amount: 500000.0,
2797                    level: 4,
2798                    roles: vec![
2799                        "senior_accountant".to_string(),
2800                        "controller".to_string(),
2801                        "manager".to_string(),
2802                        "executive".to_string(),
2803                    ],
2804                },
2805            ],
2806        }
2807    }
2808}
2809
2810/// Configuration for a single approval threshold.
2811#[derive(Debug, Clone, Serialize, Deserialize)]
2812pub struct ApprovalThresholdConfig {
2813    /// Amount threshold
2814    pub amount: f64,
2815    /// Approval level required
2816    pub level: u8,
2817    /// Roles that can approve at this level
2818    pub roles: Vec<String>,
2819}
2820
2821/// Department configuration.
2822#[derive(Debug, Clone, Serialize, Deserialize)]
2823pub struct DepartmentConfig {
2824    /// Enable department assignment
2825    #[serde(default)]
2826    pub enabled: bool,
2827    /// Multiplier for department headcounts
2828    #[serde(default = "default_headcount_multiplier")]
2829    pub headcount_multiplier: f64,
2830    /// Custom department definitions (optional)
2831    #[serde(default)]
2832    pub custom_departments: Vec<CustomDepartmentConfig>,
2833}
2834
2835fn default_headcount_multiplier() -> f64 {
2836    1.0
2837}
2838
2839impl Default for DepartmentConfig {
2840    fn default() -> Self {
2841        Self {
2842            enabled: false,
2843            headcount_multiplier: default_headcount_multiplier(),
2844            custom_departments: Vec::new(),
2845        }
2846    }
2847}
2848
2849/// Custom department definition.
2850#[derive(Debug, Clone, Serialize, Deserialize)]
2851pub struct CustomDepartmentConfig {
2852    /// Department code
2853    pub code: String,
2854    /// Department name
2855    pub name: String,
2856    /// Associated cost center
2857    #[serde(default)]
2858    pub cost_center: Option<String>,
2859    /// Primary business processes
2860    #[serde(default)]
2861    pub primary_processes: Vec<String>,
2862    /// Parent department code
2863    #[serde(default)]
2864    pub parent_code: Option<String>,
2865}
2866
2867// ============================================================================
2868// Master Data Configuration
2869// ============================================================================
2870
2871/// Master data generation configuration.
2872#[derive(Debug, Clone, Default, Serialize, Deserialize)]
2873pub struct MasterDataConfig {
2874    /// Vendor master data settings
2875    #[serde(default)]
2876    pub vendors: VendorMasterConfig,
2877    /// Customer master data settings
2878    #[serde(default)]
2879    pub customers: CustomerMasterConfig,
2880    /// Material master data settings
2881    #[serde(default)]
2882    pub materials: MaterialMasterConfig,
2883    /// Fixed asset master data settings
2884    #[serde(default)]
2885    pub fixed_assets: FixedAssetMasterConfig,
2886    /// Employee master data settings
2887    #[serde(default)]
2888    pub employees: EmployeeMasterConfig,
2889    /// Cost center master data settings
2890    #[serde(default)]
2891    pub cost_centers: CostCenterMasterConfig,
2892}
2893
2894/// Vendor master data configuration.
2895#[derive(Debug, Clone, Serialize, Deserialize)]
2896pub struct VendorMasterConfig {
2897    /// Number of vendors to generate
2898    #[serde(default = "default_vendor_count")]
2899    pub count: usize,
2900    /// Percentage of vendors that are intercompany (0.0 to 1.0)
2901    #[serde(default = "default_intercompany_percent")]
2902    pub intercompany_percent: f64,
2903    /// Payment terms distribution
2904    #[serde(default)]
2905    pub payment_terms_distribution: PaymentTermsDistribution,
2906    /// Vendor behavior distribution
2907    #[serde(default)]
2908    pub behavior_distribution: VendorBehaviorDistribution,
2909    /// Generate bank account details
2910    #[serde(default = "default_true")]
2911    pub generate_bank_accounts: bool,
2912    /// Generate tax IDs
2913    #[serde(default = "default_true")]
2914    pub generate_tax_ids: bool,
2915}
2916
2917fn default_vendor_count() -> usize {
2918    500
2919}
2920
2921fn default_intercompany_percent() -> f64 {
2922    0.05
2923}
2924
2925impl Default for VendorMasterConfig {
2926    fn default() -> Self {
2927        Self {
2928            count: default_vendor_count(),
2929            intercompany_percent: default_intercompany_percent(),
2930            payment_terms_distribution: PaymentTermsDistribution::default(),
2931            behavior_distribution: VendorBehaviorDistribution::default(),
2932            generate_bank_accounts: true,
2933            generate_tax_ids: true,
2934        }
2935    }
2936}
2937
2938/// Payment terms distribution for vendors.
2939#[derive(Debug, Clone, Serialize, Deserialize)]
2940pub struct PaymentTermsDistribution {
2941    /// Net 30 days
2942    pub net_30: f64,
2943    /// Net 60 days
2944    pub net_60: f64,
2945    /// Net 90 days
2946    pub net_90: f64,
2947    /// 2% 10 Net 30 (early payment discount)
2948    pub two_ten_net_30: f64,
2949    /// Due on receipt
2950    pub due_on_receipt: f64,
2951    /// End of month
2952    pub end_of_month: f64,
2953}
2954
2955impl Default for PaymentTermsDistribution {
2956    fn default() -> Self {
2957        Self {
2958            net_30: 0.40,
2959            net_60: 0.20,
2960            net_90: 0.10,
2961            two_ten_net_30: 0.15,
2962            due_on_receipt: 0.05,
2963            end_of_month: 0.10,
2964        }
2965    }
2966}
2967
2968/// Vendor behavior distribution.
2969///
2970/// All fields default to `0.0` if absent from the YAML, so partial
2971/// distributions are accepted; the validator (`validate_sum_to_one`)
2972/// then enforces that the populated weights sum to `1.0 ± 0.01`.
2973#[derive(Debug, Clone, Serialize, Deserialize)]
2974#[serde(deny_unknown_fields)]
2975pub struct VendorBehaviorDistribution {
2976    /// Reliable vendors (consistent delivery, quality)
2977    #[serde(default)]
2978    pub reliable: f64,
2979    /// Sometimes late vendors
2980    #[serde(default)]
2981    pub sometimes_late: f64,
2982    /// Inconsistent quality vendors
2983    #[serde(default)]
2984    pub inconsistent_quality: f64,
2985    /// Premium vendors (high quality, premium pricing)
2986    #[serde(default)]
2987    pub premium: f64,
2988    /// Budget vendors (lower quality, lower pricing)
2989    #[serde(default)]
2990    pub budget: f64,
2991    /// Erratic vendors (variable behavior, unpredictable performance)
2992    #[serde(default)]
2993    pub erratic: f64,
2994    /// Problematic vendors (frequent issues, high risk for fraud scenarios)
2995    #[serde(default)]
2996    pub problematic: f64,
2997}
2998
2999impl Default for VendorBehaviorDistribution {
3000    fn default() -> Self {
3001        // Preserves the pre-extension default sum=1.0 over the original
3002        // five fields.  `erratic` and `problematic` default to 0.0 so
3003        // that existing configs/packs continue to merge to a 1.0 sum
3004        // without modification.
3005        Self {
3006            reliable: 0.50,
3007            sometimes_late: 0.20,
3008            inconsistent_quality: 0.10,
3009            premium: 0.10,
3010            budget: 0.10,
3011            erratic: 0.0,
3012            problematic: 0.0,
3013        }
3014    }
3015}
3016
3017/// Customer master data configuration.
3018#[derive(Debug, Clone, Serialize, Deserialize)]
3019pub struct CustomerMasterConfig {
3020    /// Number of customers to generate
3021    #[serde(default = "default_customer_count")]
3022    pub count: usize,
3023    /// Percentage of customers that are intercompany (0.0 to 1.0)
3024    #[serde(default = "default_intercompany_percent")]
3025    pub intercompany_percent: f64,
3026    /// Credit rating distribution
3027    #[serde(default)]
3028    pub credit_rating_distribution: CreditRatingDistribution,
3029    /// Payment behavior distribution
3030    #[serde(default)]
3031    pub payment_behavior_distribution: PaymentBehaviorDistribution,
3032    /// Generate credit limits based on rating
3033    #[serde(default = "default_true")]
3034    pub generate_credit_limits: bool,
3035}
3036
3037fn default_customer_count() -> usize {
3038    2000
3039}
3040
3041impl Default for CustomerMasterConfig {
3042    fn default() -> Self {
3043        Self {
3044            count: default_customer_count(),
3045            intercompany_percent: default_intercompany_percent(),
3046            credit_rating_distribution: CreditRatingDistribution::default(),
3047            payment_behavior_distribution: PaymentBehaviorDistribution::default(),
3048            generate_credit_limits: true,
3049        }
3050    }
3051}
3052
3053/// Credit rating distribution for customers.
3054///
3055/// Two parallel vocabularies are accepted:
3056///   * Bond-grade tiers: `aaa`, `aa`, `a`, `bbb`, `bb`, `b`, `below_b`
3057///   * Plain-English tiers: `excellent`, `good`, `fair`, `poor`
3058///
3059/// All fields default to `0.0` if absent; mix and match as needed.
3060/// The validator enforces that the populated weights sum to `1.0`.
3061#[derive(Debug, Clone, Serialize, Deserialize)]
3062#[serde(deny_unknown_fields)]
3063pub struct CreditRatingDistribution {
3064    /// AAA rating
3065    #[serde(default)]
3066    pub aaa: f64,
3067    /// AA rating
3068    #[serde(default)]
3069    pub aa: f64,
3070    /// A rating
3071    #[serde(default)]
3072    pub a: f64,
3073    /// BBB rating
3074    #[serde(default)]
3075    pub bbb: f64,
3076    /// BB rating
3077    #[serde(default)]
3078    pub bb: f64,
3079    /// B rating
3080    #[serde(default)]
3081    pub b: f64,
3082    /// Below B rating
3083    #[serde(default)]
3084    pub below_b: f64,
3085    /// Plain-English: excellent credit (≈ AAA/AA tier)
3086    #[serde(default)]
3087    pub excellent: f64,
3088    /// Plain-English: good credit (≈ A tier)
3089    #[serde(default)]
3090    pub good: f64,
3091    /// Plain-English: fair credit (≈ BBB/BB tier)
3092    #[serde(default)]
3093    pub fair: f64,
3094    /// Plain-English: poor credit (≈ B/below tier)
3095    #[serde(default)]
3096    pub poor: f64,
3097}
3098
3099impl Default for CreditRatingDistribution {
3100    fn default() -> Self {
3101        Self {
3102            aaa: 0.05,
3103            aa: 0.10,
3104            a: 0.20,
3105            bbb: 0.30,
3106            bb: 0.20,
3107            b: 0.10,
3108            below_b: 0.05,
3109            excellent: 0.0,
3110            good: 0.0,
3111            fair: 0.0,
3112            poor: 0.0,
3113        }
3114    }
3115}
3116
3117/// Payment behavior distribution for customers.
3118///
3119/// All fields default to `0.0` if absent from the YAML.  Validator
3120/// enforces that populated weights sum to `1.0 ± 0.01`.
3121#[derive(Debug, Clone, Serialize, Deserialize)]
3122#[serde(deny_unknown_fields)]
3123pub struct PaymentBehaviorDistribution {
3124    /// Always pays early
3125    #[serde(default)]
3126    pub early_payer: f64,
3127    /// Pays on time
3128    #[serde(default)]
3129    pub on_time: f64,
3130    /// Occasionally late
3131    #[serde(default)]
3132    pub occasional_late: f64,
3133    /// Frequently late
3134    #[serde(default)]
3135    pub frequent_late: f64,
3136    /// Takes early payment discounts
3137    #[serde(default)]
3138    pub discount_taker: f64,
3139}
3140
3141impl Default for PaymentBehaviorDistribution {
3142    fn default() -> Self {
3143        Self {
3144            early_payer: 0.10,
3145            on_time: 0.50,
3146            occasional_late: 0.25,
3147            frequent_late: 0.10,
3148            discount_taker: 0.05,
3149        }
3150    }
3151}
3152
3153/// Material master data configuration.
3154#[derive(Debug, Clone, Serialize, Deserialize)]
3155pub struct MaterialMasterConfig {
3156    /// Number of materials to generate
3157    #[serde(default = "default_material_count")]
3158    pub count: usize,
3159    /// Material type distribution
3160    #[serde(default)]
3161    pub type_distribution: MaterialTypeDistribution,
3162    /// Valuation method distribution
3163    #[serde(default)]
3164    pub valuation_distribution: ValuationMethodDistribution,
3165    /// Percentage of materials with BOM (bill of materials)
3166    #[serde(default = "default_bom_percent")]
3167    pub bom_percent: f64,
3168    /// Maximum BOM depth
3169    #[serde(default = "default_max_bom_depth")]
3170    pub max_bom_depth: u8,
3171}
3172
3173fn default_material_count() -> usize {
3174    5000
3175}
3176
3177fn default_bom_percent() -> f64 {
3178    0.20
3179}
3180
3181fn default_max_bom_depth() -> u8 {
3182    3
3183}
3184
3185impl Default for MaterialMasterConfig {
3186    fn default() -> Self {
3187        Self {
3188            count: default_material_count(),
3189            type_distribution: MaterialTypeDistribution::default(),
3190            valuation_distribution: ValuationMethodDistribution::default(),
3191            bom_percent: default_bom_percent(),
3192            max_bom_depth: default_max_bom_depth(),
3193        }
3194    }
3195}
3196
3197/// Material type distribution.
3198#[derive(Debug, Clone, Serialize, Deserialize)]
3199pub struct MaterialTypeDistribution {
3200    /// Raw materials
3201    pub raw_material: f64,
3202    /// Semi-finished goods
3203    pub semi_finished: f64,
3204    /// Finished goods
3205    pub finished_good: f64,
3206    /// Trading goods (purchased for resale)
3207    pub trading_good: f64,
3208    /// Operating supplies
3209    pub operating_supply: f64,
3210    /// Services
3211    pub service: f64,
3212}
3213
3214impl Default for MaterialTypeDistribution {
3215    fn default() -> Self {
3216        Self {
3217            raw_material: 0.30,
3218            semi_finished: 0.15,
3219            finished_good: 0.25,
3220            trading_good: 0.15,
3221            operating_supply: 0.10,
3222            service: 0.05,
3223        }
3224    }
3225}
3226
3227/// Valuation method distribution for materials.
3228#[derive(Debug, Clone, Serialize, Deserialize)]
3229pub struct ValuationMethodDistribution {
3230    /// Standard cost
3231    pub standard_cost: f64,
3232    /// Moving average
3233    pub moving_average: f64,
3234    /// FIFO (First In, First Out)
3235    pub fifo: f64,
3236    /// LIFO (Last In, First Out)
3237    pub lifo: f64,
3238}
3239
3240impl Default for ValuationMethodDistribution {
3241    fn default() -> Self {
3242        Self {
3243            standard_cost: 0.50,
3244            moving_average: 0.30,
3245            fifo: 0.15,
3246            lifo: 0.05,
3247        }
3248    }
3249}
3250
3251/// Fixed asset master data configuration.
3252#[derive(Debug, Clone, Serialize, Deserialize)]
3253pub struct FixedAssetMasterConfig {
3254    /// Number of fixed assets to generate
3255    #[serde(default = "default_asset_count")]
3256    pub count: usize,
3257    /// Asset class distribution
3258    #[serde(default)]
3259    pub class_distribution: AssetClassDistribution,
3260    /// Depreciation method distribution
3261    #[serde(default)]
3262    pub depreciation_distribution: DepreciationMethodDistribution,
3263    /// Percentage of assets that are fully depreciated
3264    #[serde(default = "default_fully_depreciated_percent")]
3265    pub fully_depreciated_percent: f64,
3266    /// Generate acquisition history
3267    #[serde(default = "default_true")]
3268    pub generate_acquisition_history: bool,
3269}
3270
3271fn default_asset_count() -> usize {
3272    800
3273}
3274
3275fn default_fully_depreciated_percent() -> f64 {
3276    0.15
3277}
3278
3279impl Default for FixedAssetMasterConfig {
3280    fn default() -> Self {
3281        Self {
3282            count: default_asset_count(),
3283            class_distribution: AssetClassDistribution::default(),
3284            depreciation_distribution: DepreciationMethodDistribution::default(),
3285            fully_depreciated_percent: default_fully_depreciated_percent(),
3286            generate_acquisition_history: true,
3287        }
3288    }
3289}
3290
3291/// Asset class distribution.
3292#[derive(Debug, Clone, Serialize, Deserialize)]
3293pub struct AssetClassDistribution {
3294    /// Buildings and structures
3295    pub buildings: f64,
3296    /// Machinery and equipment
3297    pub machinery: f64,
3298    /// Vehicles
3299    pub vehicles: f64,
3300    /// IT equipment
3301    pub it_equipment: f64,
3302    /// Furniture and fixtures
3303    pub furniture: f64,
3304    /// Land (non-depreciable)
3305    pub land: f64,
3306    /// Leasehold improvements
3307    pub leasehold: f64,
3308}
3309
3310impl Default for AssetClassDistribution {
3311    fn default() -> Self {
3312        Self {
3313            buildings: 0.15,
3314            machinery: 0.30,
3315            vehicles: 0.15,
3316            it_equipment: 0.20,
3317            furniture: 0.10,
3318            land: 0.05,
3319            leasehold: 0.05,
3320        }
3321    }
3322}
3323
3324/// Depreciation method distribution.
3325#[derive(Debug, Clone, Serialize, Deserialize)]
3326pub struct DepreciationMethodDistribution {
3327    /// Straight line
3328    pub straight_line: f64,
3329    /// Declining balance
3330    pub declining_balance: f64,
3331    /// Double declining balance
3332    pub double_declining: f64,
3333    /// Sum of years' digits
3334    pub sum_of_years: f64,
3335    /// Units of production
3336    pub units_of_production: f64,
3337}
3338
3339impl Default for DepreciationMethodDistribution {
3340    fn default() -> Self {
3341        Self {
3342            straight_line: 0.60,
3343            declining_balance: 0.20,
3344            double_declining: 0.10,
3345            sum_of_years: 0.05,
3346            units_of_production: 0.05,
3347        }
3348    }
3349}
3350
3351/// Employee master data configuration.
3352#[derive(Debug, Clone, Serialize, Deserialize)]
3353pub struct EmployeeMasterConfig {
3354    /// Number of employees to generate
3355    #[serde(default = "default_employee_count")]
3356    pub count: usize,
3357    /// Generate organizational hierarchy
3358    #[serde(default = "default_true")]
3359    pub generate_hierarchy: bool,
3360    /// Maximum hierarchy depth
3361    #[serde(default = "default_hierarchy_depth")]
3362    pub max_hierarchy_depth: u8,
3363    /// Average span of control (direct reports per manager)
3364    #[serde(default = "default_span_of_control")]
3365    pub average_span_of_control: f64,
3366    /// Approval limit distribution by job level
3367    #[serde(default)]
3368    pub approval_limits: ApprovalLimitDistribution,
3369    /// Department distribution
3370    #[serde(default)]
3371    pub department_distribution: EmployeeDepartmentDistribution,
3372}
3373
3374fn default_employee_count() -> usize {
3375    1500
3376}
3377
3378fn default_hierarchy_depth() -> u8 {
3379    6
3380}
3381
3382fn default_span_of_control() -> f64 {
3383    5.0
3384}
3385
3386impl Default for EmployeeMasterConfig {
3387    fn default() -> Self {
3388        Self {
3389            count: default_employee_count(),
3390            generate_hierarchy: true,
3391            max_hierarchy_depth: default_hierarchy_depth(),
3392            average_span_of_control: default_span_of_control(),
3393            approval_limits: ApprovalLimitDistribution::default(),
3394            department_distribution: EmployeeDepartmentDistribution::default(),
3395        }
3396    }
3397}
3398
3399/// Approval limit distribution by job level.
3400#[derive(Debug, Clone, Serialize, Deserialize)]
3401pub struct ApprovalLimitDistribution {
3402    /// Staff level approval limit
3403    #[serde(default = "default_staff_limit")]
3404    pub staff: f64,
3405    /// Senior staff approval limit
3406    #[serde(default = "default_senior_limit")]
3407    pub senior: f64,
3408    /// Manager approval limit
3409    #[serde(default = "default_manager_limit")]
3410    pub manager: f64,
3411    /// Director approval limit
3412    #[serde(default = "default_director_limit")]
3413    pub director: f64,
3414    /// VP approval limit
3415    #[serde(default = "default_vp_limit")]
3416    pub vp: f64,
3417    /// Executive approval limit
3418    #[serde(default = "default_executive_limit")]
3419    pub executive: f64,
3420}
3421
3422fn default_staff_limit() -> f64 {
3423    1000.0
3424}
3425fn default_senior_limit() -> f64 {
3426    5000.0
3427}
3428fn default_manager_limit() -> f64 {
3429    25000.0
3430}
3431fn default_director_limit() -> f64 {
3432    100000.0
3433}
3434fn default_vp_limit() -> f64 {
3435    500000.0
3436}
3437fn default_executive_limit() -> f64 {
3438    f64::INFINITY
3439}
3440
3441impl Default for ApprovalLimitDistribution {
3442    fn default() -> Self {
3443        Self {
3444            staff: default_staff_limit(),
3445            senior: default_senior_limit(),
3446            manager: default_manager_limit(),
3447            director: default_director_limit(),
3448            vp: default_vp_limit(),
3449            executive: default_executive_limit(),
3450        }
3451    }
3452}
3453
3454/// Employee distribution across departments.
3455#[derive(Debug, Clone, Serialize, Deserialize)]
3456pub struct EmployeeDepartmentDistribution {
3457    /// Finance and Accounting
3458    pub finance: f64,
3459    /// Procurement
3460    pub procurement: f64,
3461    /// Sales
3462    pub sales: f64,
3463    /// Warehouse and Logistics
3464    pub warehouse: f64,
3465    /// IT
3466    pub it: f64,
3467    /// Human Resources
3468    pub hr: f64,
3469    /// Operations
3470    pub operations: f64,
3471    /// Executive
3472    pub executive: f64,
3473}
3474
3475impl Default for EmployeeDepartmentDistribution {
3476    fn default() -> Self {
3477        Self {
3478            finance: 0.12,
3479            procurement: 0.10,
3480            sales: 0.25,
3481            warehouse: 0.15,
3482            it: 0.10,
3483            hr: 0.05,
3484            operations: 0.20,
3485            executive: 0.03,
3486        }
3487    }
3488}
3489
3490/// Cost center master data configuration.
3491#[derive(Debug, Clone, Serialize, Deserialize)]
3492pub struct CostCenterMasterConfig {
3493    /// Number of cost centers to generate
3494    #[serde(default = "default_cost_center_count")]
3495    pub count: usize,
3496    /// Generate cost center hierarchy
3497    #[serde(default = "default_true")]
3498    pub generate_hierarchy: bool,
3499    /// Maximum hierarchy depth
3500    #[serde(default = "default_cc_hierarchy_depth")]
3501    pub max_hierarchy_depth: u8,
3502}
3503
3504fn default_cost_center_count() -> usize {
3505    50
3506}
3507
3508fn default_cc_hierarchy_depth() -> u8 {
3509    3
3510}
3511
3512impl Default for CostCenterMasterConfig {
3513    fn default() -> Self {
3514        Self {
3515            count: default_cost_center_count(),
3516            generate_hierarchy: true,
3517            max_hierarchy_depth: default_cc_hierarchy_depth(),
3518        }
3519    }
3520}
3521
3522// ============================================================================
3523// Document Flow Configuration
3524// ============================================================================
3525
3526/// Document flow generation configuration.
3527#[derive(Debug, Clone, Serialize, Deserialize)]
3528pub struct DocumentFlowConfig {
3529    /// P2P (Procure-to-Pay) flow configuration
3530    #[serde(default)]
3531    pub p2p: P2PFlowConfig,
3532    /// O2C (Order-to-Cash) flow configuration
3533    #[serde(default)]
3534    pub o2c: O2CFlowConfig,
3535    /// Generate document reference chains
3536    #[serde(default = "default_true")]
3537    pub generate_document_references: bool,
3538    /// Export document flow graph
3539    #[serde(default)]
3540    pub export_flow_graph: bool,
3541}
3542
3543impl Default for DocumentFlowConfig {
3544    fn default() -> Self {
3545        Self {
3546            p2p: P2PFlowConfig::default(),
3547            o2c: O2CFlowConfig::default(),
3548            generate_document_references: true,
3549            export_flow_graph: false,
3550        }
3551    }
3552}
3553
3554/// P2P (Procure-to-Pay) flow configuration.
3555#[derive(Debug, Clone, Serialize, Deserialize)]
3556pub struct P2PFlowConfig {
3557    /// Enable P2P document flow generation
3558    #[serde(default = "default_true")]
3559    pub enabled: bool,
3560    /// Three-way match success rate (PO-GR-Invoice)
3561    #[serde(default = "default_three_way_match_rate")]
3562    pub three_way_match_rate: f64,
3563    /// Rate of partial deliveries
3564    #[serde(default = "default_partial_delivery_rate")]
3565    pub partial_delivery_rate: f64,
3566    /// Rate of price variances between PO and Invoice
3567    #[serde(default = "default_price_variance_rate")]
3568    pub price_variance_rate: f64,
3569    /// Maximum price variance percentage
3570    #[serde(default = "default_max_price_variance")]
3571    pub max_price_variance_percent: f64,
3572    /// Rate of quantity variances between PO/GR and Invoice
3573    #[serde(default = "default_quantity_variance_rate")]
3574    pub quantity_variance_rate: f64,
3575    /// Average days from PO to goods receipt
3576    #[serde(default = "default_po_to_gr_days")]
3577    pub average_po_to_gr_days: u32,
3578    /// Average days from GR to invoice
3579    #[serde(default = "default_gr_to_invoice_days")]
3580    pub average_gr_to_invoice_days: u32,
3581    /// Average days from invoice to payment
3582    #[serde(default = "default_invoice_to_payment_days")]
3583    pub average_invoice_to_payment_days: u32,
3584    /// PO line count distribution
3585    #[serde(default)]
3586    pub line_count_distribution: DocumentLineCountDistribution,
3587    /// Payment behavior configuration
3588    #[serde(default)]
3589    pub payment_behavior: P2PPaymentBehaviorConfig,
3590    /// Rate of over-deliveries (quantity received exceeds PO quantity)
3591    #[serde(default)]
3592    pub over_delivery_rate: Option<f64>,
3593    /// Rate of early payment discounts being taken
3594    #[serde(default)]
3595    pub early_payment_discount_rate: Option<f64>,
3596}
3597
3598fn default_three_way_match_rate() -> f64 {
3599    0.95
3600}
3601
3602fn default_partial_delivery_rate() -> f64 {
3603    0.15
3604}
3605
3606fn default_price_variance_rate() -> f64 {
3607    0.08
3608}
3609
3610fn default_max_price_variance() -> f64 {
3611    0.05
3612}
3613
3614fn default_quantity_variance_rate() -> f64 {
3615    0.05
3616}
3617
3618fn default_po_to_gr_days() -> u32 {
3619    14
3620}
3621
3622fn default_gr_to_invoice_days() -> u32 {
3623    5
3624}
3625
3626fn default_invoice_to_payment_days() -> u32 {
3627    30
3628}
3629
3630impl Default for P2PFlowConfig {
3631    fn default() -> Self {
3632        Self {
3633            enabled: true,
3634            three_way_match_rate: default_three_way_match_rate(),
3635            partial_delivery_rate: default_partial_delivery_rate(),
3636            price_variance_rate: default_price_variance_rate(),
3637            max_price_variance_percent: default_max_price_variance(),
3638            quantity_variance_rate: default_quantity_variance_rate(),
3639            average_po_to_gr_days: default_po_to_gr_days(),
3640            average_gr_to_invoice_days: default_gr_to_invoice_days(),
3641            average_invoice_to_payment_days: default_invoice_to_payment_days(),
3642            line_count_distribution: DocumentLineCountDistribution::default(),
3643            payment_behavior: P2PPaymentBehaviorConfig::default(),
3644            over_delivery_rate: None,
3645            early_payment_discount_rate: None,
3646        }
3647    }
3648}
3649
3650// ============================================================================
3651// P2P Payment Behavior Configuration
3652// ============================================================================
3653
3654/// P2P payment behavior configuration.
3655#[derive(Debug, Clone, Serialize, Deserialize)]
3656pub struct P2PPaymentBehaviorConfig {
3657    /// Rate of late payments (beyond due date)
3658    #[serde(default = "default_p2p_late_payment_rate")]
3659    pub late_payment_rate: f64,
3660    /// Distribution of late payment days
3661    #[serde(default)]
3662    pub late_payment_days_distribution: LatePaymentDaysDistribution,
3663    /// Rate of partial payments
3664    #[serde(default = "default_p2p_partial_payment_rate")]
3665    pub partial_payment_rate: f64,
3666    /// Rate of payment corrections (NSF, chargebacks, reversals)
3667    #[serde(default = "default_p2p_payment_correction_rate")]
3668    pub payment_correction_rate: f64,
3669    /// Average days until partial payment remainder is paid
3670    #[serde(default = "default_p2p_avg_days_until_remainder")]
3671    pub avg_days_until_remainder: u32,
3672}
3673
3674fn default_p2p_late_payment_rate() -> f64 {
3675    0.15
3676}
3677
3678fn default_p2p_partial_payment_rate() -> f64 {
3679    0.05
3680}
3681
3682fn default_p2p_payment_correction_rate() -> f64 {
3683    0.02
3684}
3685
3686fn default_p2p_avg_days_until_remainder() -> u32 {
3687    30
3688}
3689
3690impl Default for P2PPaymentBehaviorConfig {
3691    fn default() -> Self {
3692        Self {
3693            late_payment_rate: default_p2p_late_payment_rate(),
3694            late_payment_days_distribution: LatePaymentDaysDistribution::default(),
3695            partial_payment_rate: default_p2p_partial_payment_rate(),
3696            payment_correction_rate: default_p2p_payment_correction_rate(),
3697            avg_days_until_remainder: default_p2p_avg_days_until_remainder(),
3698        }
3699    }
3700}
3701
3702/// Distribution of late payment days for P2P.
3703#[derive(Debug, Clone, Serialize, Deserialize)]
3704pub struct LatePaymentDaysDistribution {
3705    /// 1-7 days late (slightly late)
3706    #[serde(default = "default_slightly_late")]
3707    pub slightly_late_1_to_7: f64,
3708    /// 8-14 days late
3709    #[serde(default = "default_late_8_14")]
3710    pub late_8_to_14: f64,
3711    /// 15-30 days late (very late)
3712    #[serde(default = "default_very_late")]
3713    pub very_late_15_to_30: f64,
3714    /// 31-60 days late (severely late)
3715    #[serde(default = "default_severely_late")]
3716    pub severely_late_31_to_60: f64,
3717    /// Over 60 days late (extremely late)
3718    #[serde(default = "default_extremely_late")]
3719    pub extremely_late_over_60: f64,
3720}
3721
3722fn default_slightly_late() -> f64 {
3723    0.50
3724}
3725
3726fn default_late_8_14() -> f64 {
3727    0.25
3728}
3729
3730fn default_very_late() -> f64 {
3731    0.15
3732}
3733
3734fn default_severely_late() -> f64 {
3735    0.07
3736}
3737
3738fn default_extremely_late() -> f64 {
3739    0.03
3740}
3741
3742impl Default for LatePaymentDaysDistribution {
3743    fn default() -> Self {
3744        Self {
3745            slightly_late_1_to_7: default_slightly_late(),
3746            late_8_to_14: default_late_8_14(),
3747            very_late_15_to_30: default_very_late(),
3748            severely_late_31_to_60: default_severely_late(),
3749            extremely_late_over_60: default_extremely_late(),
3750        }
3751    }
3752}
3753
3754/// O2C (Order-to-Cash) flow configuration.
3755#[derive(Debug, Clone, Serialize, Deserialize)]
3756pub struct O2CFlowConfig {
3757    /// Enable O2C document flow generation
3758    #[serde(default = "default_true")]
3759    pub enabled: bool,
3760    /// Credit check failure rate
3761    #[serde(default = "default_credit_check_failure_rate")]
3762    pub credit_check_failure_rate: f64,
3763    /// Rate of partial shipments
3764    #[serde(default = "default_partial_shipment_rate")]
3765    pub partial_shipment_rate: f64,
3766    /// Rate of returns
3767    #[serde(default = "default_return_rate")]
3768    pub return_rate: f64,
3769    /// Bad debt write-off rate
3770    #[serde(default = "default_bad_debt_rate")]
3771    pub bad_debt_rate: f64,
3772    /// Average days from SO to delivery
3773    #[serde(default = "default_so_to_delivery_days")]
3774    pub average_so_to_delivery_days: u32,
3775    /// Average days from delivery to invoice
3776    #[serde(default = "default_delivery_to_invoice_days")]
3777    pub average_delivery_to_invoice_days: u32,
3778    /// Average days from invoice to receipt
3779    #[serde(default = "default_invoice_to_receipt_days")]
3780    pub average_invoice_to_receipt_days: u32,
3781    /// SO line count distribution
3782    #[serde(default)]
3783    pub line_count_distribution: DocumentLineCountDistribution,
3784    /// Cash discount configuration
3785    #[serde(default)]
3786    pub cash_discount: CashDiscountConfig,
3787    /// Payment behavior configuration
3788    #[serde(default)]
3789    pub payment_behavior: O2CPaymentBehaviorConfig,
3790    /// Rate of late payments
3791    #[serde(default)]
3792    pub late_payment_rate: Option<f64>,
3793}
3794
3795fn default_credit_check_failure_rate() -> f64 {
3796    0.02
3797}
3798
3799fn default_partial_shipment_rate() -> f64 {
3800    0.10
3801}
3802
3803fn default_return_rate() -> f64 {
3804    0.03
3805}
3806
3807fn default_bad_debt_rate() -> f64 {
3808    0.01
3809}
3810
3811fn default_so_to_delivery_days() -> u32 {
3812    7
3813}
3814
3815fn default_delivery_to_invoice_days() -> u32 {
3816    1
3817}
3818
3819fn default_invoice_to_receipt_days() -> u32 {
3820    45
3821}
3822
3823impl Default for O2CFlowConfig {
3824    fn default() -> Self {
3825        Self {
3826            enabled: true,
3827            credit_check_failure_rate: default_credit_check_failure_rate(),
3828            partial_shipment_rate: default_partial_shipment_rate(),
3829            return_rate: default_return_rate(),
3830            bad_debt_rate: default_bad_debt_rate(),
3831            average_so_to_delivery_days: default_so_to_delivery_days(),
3832            average_delivery_to_invoice_days: default_delivery_to_invoice_days(),
3833            average_invoice_to_receipt_days: default_invoice_to_receipt_days(),
3834            line_count_distribution: DocumentLineCountDistribution::default(),
3835            cash_discount: CashDiscountConfig::default(),
3836            payment_behavior: O2CPaymentBehaviorConfig::default(),
3837            late_payment_rate: None,
3838        }
3839    }
3840}
3841
3842// ============================================================================
3843// O2C Payment Behavior Configuration
3844// ============================================================================
3845
3846/// O2C payment behavior configuration.
3847#[derive(Debug, Clone, Serialize, Deserialize, Default)]
3848pub struct O2CPaymentBehaviorConfig {
3849    /// Dunning (Mahnung) configuration
3850    #[serde(default)]
3851    pub dunning: DunningConfig,
3852    /// Partial payment configuration
3853    #[serde(default)]
3854    pub partial_payments: PartialPaymentConfig,
3855    /// Short payment configuration (unauthorized deductions)
3856    #[serde(default)]
3857    pub short_payments: ShortPaymentConfig,
3858    /// On-account payment configuration (unapplied payments)
3859    #[serde(default)]
3860    pub on_account_payments: OnAccountPaymentConfig,
3861    /// Payment correction configuration (NSF, chargebacks)
3862    #[serde(default)]
3863    pub payment_corrections: PaymentCorrectionConfig,
3864}
3865
3866/// Dunning (Mahnungen) configuration for AR collections.
3867#[derive(Debug, Clone, Serialize, Deserialize)]
3868pub struct DunningConfig {
3869    /// Enable dunning process
3870    #[serde(default)]
3871    pub enabled: bool,
3872    /// Days overdue for level 1 dunning (1st reminder)
3873    #[serde(default = "default_dunning_level_1_days")]
3874    pub level_1_days_overdue: u32,
3875    /// Days overdue for level 2 dunning (2nd reminder)
3876    #[serde(default = "default_dunning_level_2_days")]
3877    pub level_2_days_overdue: u32,
3878    /// Days overdue for level 3 dunning (final notice)
3879    #[serde(default = "default_dunning_level_3_days")]
3880    pub level_3_days_overdue: u32,
3881    /// Days overdue for collection handover
3882    #[serde(default = "default_collection_days")]
3883    pub collection_days_overdue: u32,
3884    /// Payment rates after each dunning level
3885    #[serde(default)]
3886    pub payment_after_dunning_rates: DunningPaymentRates,
3887    /// Rate of invoices blocked from dunning (disputes)
3888    #[serde(default = "default_dunning_block_rate")]
3889    pub dunning_block_rate: f64,
3890    /// Interest rate per year for overdue amounts
3891    #[serde(default = "default_dunning_interest_rate")]
3892    pub interest_rate_per_year: f64,
3893    /// Fixed dunning charge per letter
3894    #[serde(default = "default_dunning_charge")]
3895    pub dunning_charge: f64,
3896}
3897
3898fn default_dunning_level_1_days() -> u32 {
3899    14
3900}
3901
3902fn default_dunning_level_2_days() -> u32 {
3903    28
3904}
3905
3906fn default_dunning_level_3_days() -> u32 {
3907    42
3908}
3909
3910fn default_collection_days() -> u32 {
3911    60
3912}
3913
3914fn default_dunning_block_rate() -> f64 {
3915    0.05
3916}
3917
3918fn default_dunning_interest_rate() -> f64 {
3919    0.09
3920}
3921
3922fn default_dunning_charge() -> f64 {
3923    25.0
3924}
3925
3926impl Default for DunningConfig {
3927    fn default() -> Self {
3928        Self {
3929            enabled: false,
3930            level_1_days_overdue: default_dunning_level_1_days(),
3931            level_2_days_overdue: default_dunning_level_2_days(),
3932            level_3_days_overdue: default_dunning_level_3_days(),
3933            collection_days_overdue: default_collection_days(),
3934            payment_after_dunning_rates: DunningPaymentRates::default(),
3935            dunning_block_rate: default_dunning_block_rate(),
3936            interest_rate_per_year: default_dunning_interest_rate(),
3937            dunning_charge: default_dunning_charge(),
3938        }
3939    }
3940}
3941
3942/// Payment rates after each dunning level.
3943#[derive(Debug, Clone, Serialize, Deserialize)]
3944pub struct DunningPaymentRates {
3945    /// Rate that pays after level 1 reminder
3946    #[serde(default = "default_after_level_1")]
3947    pub after_level_1: f64,
3948    /// Rate that pays after level 2 reminder
3949    #[serde(default = "default_after_level_2")]
3950    pub after_level_2: f64,
3951    /// Rate that pays after level 3 final notice
3952    #[serde(default = "default_after_level_3")]
3953    pub after_level_3: f64,
3954    /// Rate that pays during collection
3955    #[serde(default = "default_during_collection")]
3956    pub during_collection: f64,
3957    /// Rate that never pays (becomes bad debt)
3958    #[serde(default = "default_never_pay")]
3959    pub never_pay: f64,
3960}
3961
3962fn default_after_level_1() -> f64 {
3963    0.40
3964}
3965
3966fn default_after_level_2() -> f64 {
3967    0.30
3968}
3969
3970fn default_after_level_3() -> f64 {
3971    0.15
3972}
3973
3974fn default_during_collection() -> f64 {
3975    0.05
3976}
3977
3978fn default_never_pay() -> f64 {
3979    0.10
3980}
3981
3982impl Default for DunningPaymentRates {
3983    fn default() -> Self {
3984        Self {
3985            after_level_1: default_after_level_1(),
3986            after_level_2: default_after_level_2(),
3987            after_level_3: default_after_level_3(),
3988            during_collection: default_during_collection(),
3989            never_pay: default_never_pay(),
3990        }
3991    }
3992}
3993
3994/// Partial payment configuration.
3995#[derive(Debug, Clone, Serialize, Deserialize)]
3996pub struct PartialPaymentConfig {
3997    /// Rate of invoices paid partially
3998    #[serde(default = "default_partial_payment_rate")]
3999    pub rate: f64,
4000    /// Distribution of partial payment percentages
4001    #[serde(default)]
4002    pub percentage_distribution: PartialPaymentPercentageDistribution,
4003    /// Average days until remainder is paid
4004    #[serde(default = "default_avg_days_until_remainder")]
4005    pub avg_days_until_remainder: u32,
4006}
4007
4008fn default_partial_payment_rate() -> f64 {
4009    0.08
4010}
4011
4012fn default_avg_days_until_remainder() -> u32 {
4013    30
4014}
4015
4016impl Default for PartialPaymentConfig {
4017    fn default() -> Self {
4018        Self {
4019            rate: default_partial_payment_rate(),
4020            percentage_distribution: PartialPaymentPercentageDistribution::default(),
4021            avg_days_until_remainder: default_avg_days_until_remainder(),
4022        }
4023    }
4024}
4025
4026/// Distribution of partial payment percentages.
4027#[derive(Debug, Clone, Serialize, Deserialize)]
4028pub struct PartialPaymentPercentageDistribution {
4029    /// Pay 25% of invoice
4030    #[serde(default = "default_partial_25")]
4031    pub pay_25_percent: f64,
4032    /// Pay 50% of invoice
4033    #[serde(default = "default_partial_50")]
4034    pub pay_50_percent: f64,
4035    /// Pay 75% of invoice
4036    #[serde(default = "default_partial_75")]
4037    pub pay_75_percent: f64,
4038    /// Pay random percentage
4039    #[serde(default = "default_partial_random")]
4040    pub pay_random_percent: f64,
4041}
4042
4043fn default_partial_25() -> f64 {
4044    0.15
4045}
4046
4047fn default_partial_50() -> f64 {
4048    0.50
4049}
4050
4051fn default_partial_75() -> f64 {
4052    0.25
4053}
4054
4055fn default_partial_random() -> f64 {
4056    0.10
4057}
4058
4059impl Default for PartialPaymentPercentageDistribution {
4060    fn default() -> Self {
4061        Self {
4062            pay_25_percent: default_partial_25(),
4063            pay_50_percent: default_partial_50(),
4064            pay_75_percent: default_partial_75(),
4065            pay_random_percent: default_partial_random(),
4066        }
4067    }
4068}
4069
4070/// Short payment configuration (unauthorized deductions).
4071#[derive(Debug, Clone, Serialize, Deserialize)]
4072pub struct ShortPaymentConfig {
4073    /// Rate of payments that are short
4074    #[serde(default = "default_short_payment_rate")]
4075    pub rate: f64,
4076    /// Distribution of short payment reasons
4077    #[serde(default)]
4078    pub reason_distribution: ShortPaymentReasonDistribution,
4079    /// Maximum percentage that can be short
4080    #[serde(default = "default_max_short_percent")]
4081    pub max_short_percent: f64,
4082}
4083
4084fn default_short_payment_rate() -> f64 {
4085    0.03
4086}
4087
4088fn default_max_short_percent() -> f64 {
4089    0.10
4090}
4091
4092impl Default for ShortPaymentConfig {
4093    fn default() -> Self {
4094        Self {
4095            rate: default_short_payment_rate(),
4096            reason_distribution: ShortPaymentReasonDistribution::default(),
4097            max_short_percent: default_max_short_percent(),
4098        }
4099    }
4100}
4101
4102/// Distribution of short payment reasons.
4103#[derive(Debug, Clone, Serialize, Deserialize)]
4104pub struct ShortPaymentReasonDistribution {
4105    /// Pricing dispute
4106    #[serde(default = "default_pricing_dispute")]
4107    pub pricing_dispute: f64,
4108    /// Quality issue
4109    #[serde(default = "default_quality_issue")]
4110    pub quality_issue: f64,
4111    /// Quantity discrepancy
4112    #[serde(default = "default_quantity_discrepancy")]
4113    pub quantity_discrepancy: f64,
4114    /// Unauthorized deduction
4115    #[serde(default = "default_unauthorized_deduction")]
4116    pub unauthorized_deduction: f64,
4117    /// Early payment discount taken incorrectly
4118    #[serde(default = "default_incorrect_discount")]
4119    pub incorrect_discount: f64,
4120}
4121
4122fn default_pricing_dispute() -> f64 {
4123    0.30
4124}
4125
4126fn default_quality_issue() -> f64 {
4127    0.20
4128}
4129
4130fn default_quantity_discrepancy() -> f64 {
4131    0.20
4132}
4133
4134fn default_unauthorized_deduction() -> f64 {
4135    0.15
4136}
4137
4138fn default_incorrect_discount() -> f64 {
4139    0.15
4140}
4141
4142impl Default for ShortPaymentReasonDistribution {
4143    fn default() -> Self {
4144        Self {
4145            pricing_dispute: default_pricing_dispute(),
4146            quality_issue: default_quality_issue(),
4147            quantity_discrepancy: default_quantity_discrepancy(),
4148            unauthorized_deduction: default_unauthorized_deduction(),
4149            incorrect_discount: default_incorrect_discount(),
4150        }
4151    }
4152}
4153
4154/// On-account payment configuration (unapplied payments).
4155#[derive(Debug, Clone, Serialize, Deserialize)]
4156pub struct OnAccountPaymentConfig {
4157    /// Rate of payments that are on-account (unapplied)
4158    #[serde(default = "default_on_account_rate")]
4159    pub rate: f64,
4160    /// Average days until on-account payments are applied
4161    #[serde(default = "default_avg_days_until_applied")]
4162    pub avg_days_until_applied: u32,
4163}
4164
4165fn default_on_account_rate() -> f64 {
4166    0.02
4167}
4168
4169fn default_avg_days_until_applied() -> u32 {
4170    14
4171}
4172
4173impl Default for OnAccountPaymentConfig {
4174    fn default() -> Self {
4175        Self {
4176            rate: default_on_account_rate(),
4177            avg_days_until_applied: default_avg_days_until_applied(),
4178        }
4179    }
4180}
4181
4182/// Payment correction configuration.
4183#[derive(Debug, Clone, Serialize, Deserialize)]
4184pub struct PaymentCorrectionConfig {
4185    /// Rate of payments requiring correction
4186    #[serde(default = "default_payment_correction_rate")]
4187    pub rate: f64,
4188    /// Distribution of correction types
4189    #[serde(default)]
4190    pub type_distribution: PaymentCorrectionTypeDistribution,
4191}
4192
4193fn default_payment_correction_rate() -> f64 {
4194    0.02
4195}
4196
4197impl Default for PaymentCorrectionConfig {
4198    fn default() -> Self {
4199        Self {
4200            rate: default_payment_correction_rate(),
4201            type_distribution: PaymentCorrectionTypeDistribution::default(),
4202        }
4203    }
4204}
4205
4206/// Distribution of payment correction types.
4207#[derive(Debug, Clone, Serialize, Deserialize)]
4208pub struct PaymentCorrectionTypeDistribution {
4209    /// NSF (Non-sufficient funds) / bounced check
4210    #[serde(default = "default_nsf_rate")]
4211    pub nsf: f64,
4212    /// Chargeback
4213    #[serde(default = "default_chargeback_rate")]
4214    pub chargeback: f64,
4215    /// Wrong amount applied
4216    #[serde(default = "default_wrong_amount_rate")]
4217    pub wrong_amount: f64,
4218    /// Wrong customer applied
4219    #[serde(default = "default_wrong_customer_rate")]
4220    pub wrong_customer: f64,
4221    /// Duplicate payment
4222    #[serde(default = "default_duplicate_payment_rate")]
4223    pub duplicate_payment: f64,
4224}
4225
4226fn default_nsf_rate() -> f64 {
4227    0.30
4228}
4229
4230fn default_chargeback_rate() -> f64 {
4231    0.20
4232}
4233
4234fn default_wrong_amount_rate() -> f64 {
4235    0.20
4236}
4237
4238fn default_wrong_customer_rate() -> f64 {
4239    0.15
4240}
4241
4242fn default_duplicate_payment_rate() -> f64 {
4243    0.15
4244}
4245
4246impl Default for PaymentCorrectionTypeDistribution {
4247    fn default() -> Self {
4248        Self {
4249            nsf: default_nsf_rate(),
4250            chargeback: default_chargeback_rate(),
4251            wrong_amount: default_wrong_amount_rate(),
4252            wrong_customer: default_wrong_customer_rate(),
4253            duplicate_payment: default_duplicate_payment_rate(),
4254        }
4255    }
4256}
4257
4258/// Document line count distribution.
4259#[derive(Debug, Clone, Serialize, Deserialize)]
4260pub struct DocumentLineCountDistribution {
4261    /// Minimum number of lines
4262    #[serde(default = "default_min_lines")]
4263    pub min_lines: u32,
4264    /// Maximum number of lines
4265    #[serde(default = "default_max_lines")]
4266    pub max_lines: u32,
4267    /// Most common line count (mode)
4268    #[serde(default = "default_mode_lines")]
4269    pub mode_lines: u32,
4270}
4271
4272fn default_min_lines() -> u32 {
4273    1
4274}
4275
4276fn default_max_lines() -> u32 {
4277    20
4278}
4279
4280fn default_mode_lines() -> u32 {
4281    3
4282}
4283
4284impl Default for DocumentLineCountDistribution {
4285    fn default() -> Self {
4286        Self {
4287            min_lines: default_min_lines(),
4288            max_lines: default_max_lines(),
4289            mode_lines: default_mode_lines(),
4290        }
4291    }
4292}
4293
4294/// Cash discount configuration.
4295#[derive(Debug, Clone, Serialize, Deserialize)]
4296pub struct CashDiscountConfig {
4297    /// Percentage of invoices eligible for cash discount
4298    #[serde(default = "default_discount_eligible_rate")]
4299    pub eligible_rate: f64,
4300    /// Rate at which customers take the discount
4301    #[serde(default = "default_discount_taken_rate")]
4302    pub taken_rate: f64,
4303    /// Standard discount percentage
4304    #[serde(default = "default_discount_percent")]
4305    pub discount_percent: f64,
4306    /// Days within which discount must be taken
4307    #[serde(default = "default_discount_days")]
4308    pub discount_days: u32,
4309}
4310
4311fn default_discount_eligible_rate() -> f64 {
4312    0.30
4313}
4314
4315fn default_discount_taken_rate() -> f64 {
4316    0.60
4317}
4318
4319fn default_discount_percent() -> f64 {
4320    0.02
4321}
4322
4323fn default_discount_days() -> u32 {
4324    10
4325}
4326
4327impl Default for CashDiscountConfig {
4328    fn default() -> Self {
4329        Self {
4330            eligible_rate: default_discount_eligible_rate(),
4331            taken_rate: default_discount_taken_rate(),
4332            discount_percent: default_discount_percent(),
4333            discount_days: default_discount_days(),
4334        }
4335    }
4336}
4337
4338// ============================================================================
4339// Intercompany Configuration
4340// ============================================================================
4341
4342/// Intercompany transaction configuration.
4343#[derive(Debug, Clone, Serialize, Deserialize)]
4344pub struct IntercompanyConfig {
4345    /// Enable intercompany transaction generation
4346    #[serde(default)]
4347    pub enabled: bool,
4348    /// Rate of transactions that are intercompany
4349    #[serde(default = "default_ic_transaction_rate")]
4350    pub ic_transaction_rate: f64,
4351    /// Transfer pricing method
4352    #[serde(default)]
4353    pub transfer_pricing_method: TransferPricingMethod,
4354    /// Transfer pricing markup percentage (for cost-plus)
4355    #[serde(default = "default_markup_percent")]
4356    pub markup_percent: f64,
4357    /// Generate matched IC pairs (offsetting entries)
4358    #[serde(default = "default_true")]
4359    pub generate_matched_pairs: bool,
4360    /// IC transaction type distribution
4361    #[serde(default)]
4362    pub transaction_type_distribution: ICTransactionTypeDistribution,
4363    /// Generate elimination entries for consolidation
4364    #[serde(default)]
4365    pub generate_eliminations: bool,
4366}
4367
4368fn default_ic_transaction_rate() -> f64 {
4369    0.15
4370}
4371
4372fn default_markup_percent() -> f64 {
4373    0.05
4374}
4375
4376impl Default for IntercompanyConfig {
4377    fn default() -> Self {
4378        Self {
4379            enabled: false,
4380            ic_transaction_rate: default_ic_transaction_rate(),
4381            transfer_pricing_method: TransferPricingMethod::default(),
4382            markup_percent: default_markup_percent(),
4383            generate_matched_pairs: true,
4384            transaction_type_distribution: ICTransactionTypeDistribution::default(),
4385            generate_eliminations: false,
4386        }
4387    }
4388}
4389
4390/// Transfer pricing method.
4391#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
4392#[serde(rename_all = "snake_case")]
4393pub enum TransferPricingMethod {
4394    /// Cost plus a markup
4395    #[default]
4396    CostPlus,
4397    /// Comparable uncontrolled price
4398    ComparableUncontrolled,
4399    /// Resale price method
4400    ResalePrice,
4401    /// Transactional net margin method
4402    TransactionalNetMargin,
4403    /// Profit split method
4404    ProfitSplit,
4405}
4406
4407/// IC transaction type distribution.
4408#[derive(Debug, Clone, Serialize, Deserialize)]
4409pub struct ICTransactionTypeDistribution {
4410    /// Goods sales between entities
4411    pub goods_sale: f64,
4412    /// Services provided
4413    pub service_provided: f64,
4414    /// Intercompany loans
4415    pub loan: f64,
4416    /// Dividends
4417    pub dividend: f64,
4418    /// Management fees
4419    pub management_fee: f64,
4420    /// Royalties
4421    pub royalty: f64,
4422    /// Cost sharing
4423    pub cost_sharing: f64,
4424}
4425
4426impl Default for ICTransactionTypeDistribution {
4427    fn default() -> Self {
4428        Self {
4429            goods_sale: 0.35,
4430            service_provided: 0.20,
4431            loan: 0.10,
4432            dividend: 0.05,
4433            management_fee: 0.15,
4434            royalty: 0.10,
4435            cost_sharing: 0.05,
4436        }
4437    }
4438}
4439
4440// ============================================================================
4441// Balance Configuration
4442// ============================================================================
4443
4444/// Balance and trial balance configuration.
4445#[derive(Debug, Clone, Serialize, Deserialize)]
4446pub struct BalanceConfig {
4447    /// Generate opening balances
4448    #[serde(default)]
4449    pub generate_opening_balances: bool,
4450    /// Generate trial balances
4451    #[serde(default = "default_true")]
4452    pub generate_trial_balances: bool,
4453    /// Target gross margin (for revenue/COGS coherence)
4454    #[serde(default = "default_gross_margin")]
4455    pub target_gross_margin: f64,
4456    /// Target DSO (Days Sales Outstanding)
4457    #[serde(default = "default_dso")]
4458    pub target_dso_days: u32,
4459    /// Target DPO (Days Payable Outstanding)
4460    #[serde(default = "default_dpo")]
4461    pub target_dpo_days: u32,
4462    /// Target current ratio
4463    #[serde(default = "default_current_ratio")]
4464    pub target_current_ratio: f64,
4465    /// Target debt-to-equity ratio
4466    #[serde(default = "default_debt_equity")]
4467    pub target_debt_to_equity: f64,
4468    /// Validate balance sheet equation (A = L + E)
4469    #[serde(default = "default_true")]
4470    pub validate_balance_equation: bool,
4471    /// Reconcile subledgers to GL control accounts
4472    #[serde(default = "default_true")]
4473    pub reconcile_subledgers: bool,
4474}
4475
4476fn default_gross_margin() -> f64 {
4477    0.35
4478}
4479
4480fn default_dso() -> u32 {
4481    45
4482}
4483
4484fn default_dpo() -> u32 {
4485    30
4486}
4487
4488fn default_current_ratio() -> f64 {
4489    1.5
4490}
4491
4492fn default_debt_equity() -> f64 {
4493    0.5
4494}
4495
4496impl Default for BalanceConfig {
4497    fn default() -> Self {
4498        Self {
4499            generate_opening_balances: false,
4500            generate_trial_balances: true,
4501            target_gross_margin: default_gross_margin(),
4502            target_dso_days: default_dso(),
4503            target_dpo_days: default_dpo(),
4504            target_current_ratio: default_current_ratio(),
4505            target_debt_to_equity: default_debt_equity(),
4506            validate_balance_equation: true,
4507            reconcile_subledgers: true,
4508        }
4509    }
4510}
4511
4512// ==========================================================================
4513// OCPM (Object-Centric Process Mining) Configuration
4514// ==========================================================================
4515
4516/// OCPM (Object-Centric Process Mining) configuration.
4517///
4518/// Controls generation of OCEL 2.0 compatible event logs with
4519/// many-to-many event-to-object relationships.
4520#[derive(Debug, Clone, Serialize, Deserialize)]
4521pub struct OcpmConfig {
4522    /// Enable OCPM event log generation
4523    #[serde(default)]
4524    pub enabled: bool,
4525
4526    /// Generate lifecycle events (Start/Complete pairs vs atomic events)
4527    #[serde(default = "default_true")]
4528    pub generate_lifecycle_events: bool,
4529
4530    /// Include object-to-object relationships in output
4531    #[serde(default = "default_true")]
4532    pub include_object_relationships: bool,
4533
4534    /// Compute and export process variants
4535    #[serde(default = "default_true")]
4536    pub compute_variants: bool,
4537
4538    /// Maximum variants to track (0 = unlimited)
4539    #[serde(default)]
4540    pub max_variants: usize,
4541
4542    /// P2P process configuration
4543    #[serde(default)]
4544    pub p2p_process: OcpmProcessConfig,
4545
4546    /// O2C process configuration
4547    #[serde(default)]
4548    pub o2c_process: OcpmProcessConfig,
4549
4550    /// Output format configuration
4551    #[serde(default)]
4552    pub output: OcpmOutputConfig,
4553}
4554
4555impl Default for OcpmConfig {
4556    fn default() -> Self {
4557        Self {
4558            enabled: false,
4559            generate_lifecycle_events: true,
4560            include_object_relationships: true,
4561            compute_variants: true,
4562            max_variants: 0,
4563            p2p_process: OcpmProcessConfig::default(),
4564            o2c_process: OcpmProcessConfig::default(),
4565            output: OcpmOutputConfig::default(),
4566        }
4567    }
4568}
4569
4570/// Process-specific OCPM configuration.
4571#[derive(Debug, Clone, Serialize, Deserialize)]
4572pub struct OcpmProcessConfig {
4573    /// Rework probability (0.0-1.0)
4574    #[serde(default = "default_rework_probability")]
4575    pub rework_probability: f64,
4576
4577    /// Skip step probability (0.0-1.0)
4578    #[serde(default = "default_skip_probability")]
4579    pub skip_step_probability: f64,
4580
4581    /// Out-of-order step probability (0.0-1.0)
4582    #[serde(default = "default_out_of_order_probability")]
4583    pub out_of_order_probability: f64,
4584}
4585
4586// Defaults deliberately produce variant counts and Inductive-Miner fitness
4587// in the range seen in real ERP data (dozens of variants, ~0.7–0.9 fitness).
4588// Lowering them all to 0 yields a single-variant happy-path log.
4589fn default_rework_probability() -> f64 {
4590    0.15
4591}
4592
4593fn default_skip_probability() -> f64 {
4594    0.10
4595}
4596
4597fn default_out_of_order_probability() -> f64 {
4598    0.08
4599}
4600
4601impl Default for OcpmProcessConfig {
4602    fn default() -> Self {
4603        Self {
4604            rework_probability: default_rework_probability(),
4605            skip_step_probability: default_skip_probability(),
4606            out_of_order_probability: default_out_of_order_probability(),
4607        }
4608    }
4609}
4610
4611/// OCPM output format configuration.
4612#[derive(Debug, Clone, Serialize, Deserialize)]
4613pub struct OcpmOutputConfig {
4614    /// Export OCEL 2.0 JSON format
4615    #[serde(default = "default_true")]
4616    pub ocel_json: bool,
4617
4618    /// Export OCEL 2.0 XML format
4619    #[serde(default)]
4620    pub ocel_xml: bool,
4621
4622    /// Export XES 2.0 XML format (IEEE standard for process mining tools)
4623    #[serde(default)]
4624    pub xes: bool,
4625
4626    /// Include lifecycle transitions in XES output (start/complete pairs)
4627    #[serde(default = "default_true")]
4628    pub xes_include_lifecycle: bool,
4629
4630    /// Include resource attributes in XES output
4631    #[serde(default = "default_true")]
4632    pub xes_include_resources: bool,
4633
4634    /// Export flattened CSV for each object type
4635    #[serde(default = "default_true")]
4636    pub flattened_csv: bool,
4637
4638    /// Export event-object relationship table
4639    #[serde(default = "default_true")]
4640    pub event_object_csv: bool,
4641
4642    /// Export object-object relationship table
4643    #[serde(default = "default_true")]
4644    pub object_relationship_csv: bool,
4645
4646    /// Export process variants summary
4647    #[serde(default = "default_true")]
4648    pub variants_csv: bool,
4649
4650    /// Export reference process models (canonical P2P, O2C, R2R)
4651    #[serde(default)]
4652    pub export_reference_models: bool,
4653}
4654
4655impl Default for OcpmOutputConfig {
4656    fn default() -> Self {
4657        Self {
4658            ocel_json: true,
4659            ocel_xml: false,
4660            xes: false,
4661            xes_include_lifecycle: true,
4662            xes_include_resources: true,
4663            flattened_csv: true,
4664            event_object_csv: true,
4665            object_relationship_csv: true,
4666            variants_csv: true,
4667            export_reference_models: false,
4668        }
4669    }
4670}
4671
4672/// Audit engagement and workpaper generation configuration.
4673#[derive(Debug, Clone, Serialize, Deserialize)]
4674pub struct AuditGenerationConfig {
4675    /// Enable audit engagement generation
4676    #[serde(default)]
4677    pub enabled: bool,
4678
4679    /// Gate for workpaper generation (v3.3.2+).
4680    /// When `false`, workpapers and dependent evidence are skipped
4681    /// while engagements / risk assessments / findings still generate.
4682    #[serde(default = "default_true")]
4683    pub generate_workpapers: bool,
4684
4685    /// Engagement type distribution (v3.3.2+). Drives per-engagement
4686    /// type draw via `AuditEngagementGenerator::draw_engagement_type`.
4687    #[serde(default)]
4688    pub engagement_types: AuditEngagementTypesConfig,
4689
4690    /// Workpaper configuration (v3.3.2+). `average_per_phase` maps onto
4691    /// `WorkpaperGenerator.workpapers_per_section` as a ±50% band
4692    /// around the average. Sampling / ISA / cross-reference flags are
4693    /// surfaced for downstream formatting overlays.
4694    #[serde(default)]
4695    pub workpapers: WorkpaperConfig,
4696
4697    /// Audit team configuration (v3.3.2+). `min_team_size` /
4698    /// `max_team_size` map directly onto
4699    /// `AuditEngagementGenerator.team_size_range`.
4700    /// `specialist_probability` is reserved for v3.4 (explicit
4701    /// specialist-role support).
4702    #[serde(default)]
4703    pub team: AuditTeamConfig,
4704
4705    /// Review workflow configuration (v3.3.2+).
4706    /// `average_review_delay_days` drives both
4707    /// `first_review_delay_range` and `second_review_delay_range` as
4708    /// a ±1-day band around the average. `rework_probability` and
4709    /// `require_partner_signoff` are reserved for v3.4 workflow
4710    /// modeling.
4711    #[serde(default)]
4712    pub review: ReviewWorkflowConfig,
4713
4714    /// FSM-driven audit generation configuration.
4715    #[serde(default)]
4716    pub fsm: Option<AuditFsmConfig>,
4717
4718    /// v3.3.0: IT general controls (access logs, change management
4719    /// records) emitted alongside audit engagements. Requires both
4720    /// `audit.enabled = true` and `audit.it_controls.enabled = true`
4721    /// to take effect — the latter defaults to `false` so current
4722    /// archives are byte-identical to v3.2.1.
4723    #[serde(default)]
4724    pub it_controls: ItControlsConfig,
4725}
4726
4727/// IT general controls config (v3.3.0+).
4728#[derive(Debug, Clone, Serialize, Deserialize)]
4729pub struct ItControlsConfig {
4730    /// Master switch — when `false`, no access logs or change records
4731    /// are generated.
4732    #[serde(default)]
4733    pub enabled: bool,
4734    /// Number of access-log entries per engagement (approximate — the
4735    /// generator may round or scale based on company size).
4736    #[serde(default = "default_access_log_count")]
4737    pub access_logs_per_engagement: usize,
4738    /// Number of change-management records per engagement.
4739    #[serde(default = "default_change_record_count")]
4740    pub change_records_per_engagement: usize,
4741}
4742
4743fn default_access_log_count() -> usize {
4744    500
4745}
4746fn default_change_record_count() -> usize {
4747    50
4748}
4749
4750impl Default for ItControlsConfig {
4751    fn default() -> Self {
4752        Self {
4753            enabled: false,
4754            access_logs_per_engagement: default_access_log_count(),
4755            change_records_per_engagement: default_change_record_count(),
4756        }
4757    }
4758}
4759
4760impl Default for AuditGenerationConfig {
4761    fn default() -> Self {
4762        Self {
4763            enabled: false,
4764            generate_workpapers: true,
4765            engagement_types: AuditEngagementTypesConfig::default(),
4766            workpapers: WorkpaperConfig::default(),
4767            team: AuditTeamConfig::default(),
4768            review: ReviewWorkflowConfig::default(),
4769            fsm: None,
4770            it_controls: ItControlsConfig::default(),
4771        }
4772    }
4773}
4774
4775/// FSM-driven audit generation configuration.
4776#[derive(Debug, Clone, Serialize, Deserialize)]
4777pub struct AuditFsmConfig {
4778    /// Enable FSM-driven audit generation.
4779    #[serde(default)]
4780    pub enabled: bool,
4781
4782    /// Blueprint source: "builtin:fsa", "builtin:ia", or a file path.
4783    #[serde(default = "default_audit_fsm_blueprint")]
4784    pub blueprint: String,
4785
4786    /// Overlay source: "builtin:default", "builtin:thorough", "builtin:rushed", or a file path.
4787    #[serde(default = "default_audit_fsm_overlay")]
4788    pub overlay: String,
4789
4790    /// Depth level override.
4791    #[serde(default)]
4792    pub depth: Option<String>,
4793
4794    /// Discriminator filter.
4795    #[serde(default)]
4796    pub discriminators: std::collections::HashMap<String, Vec<String>>,
4797
4798    /// Event trail output config.
4799    #[serde(default)]
4800    pub event_trail: AuditEventTrailConfig,
4801
4802    /// RNG seed override.
4803    #[serde(default)]
4804    pub seed: Option<u64>,
4805}
4806
4807impl Default for AuditFsmConfig {
4808    fn default() -> Self {
4809        Self {
4810            enabled: false,
4811            blueprint: default_audit_fsm_blueprint(),
4812            overlay: default_audit_fsm_overlay(),
4813            depth: None,
4814            discriminators: std::collections::HashMap::new(),
4815            event_trail: AuditEventTrailConfig::default(),
4816            seed: None,
4817        }
4818    }
4819}
4820
4821fn default_audit_fsm_blueprint() -> String {
4822    "builtin:fsa".to_string()
4823}
4824
4825fn default_audit_fsm_overlay() -> String {
4826    "builtin:default".to_string()
4827}
4828
4829/// Event trail output configuration for FSM-driven audit generation.
4830#[derive(Debug, Clone, Serialize, Deserialize)]
4831pub struct AuditEventTrailConfig {
4832    /// Emit a flat event log.
4833    #[serde(default = "default_true")]
4834    pub flat_log: bool,
4835    /// Project events to OCEL 2.0 format.
4836    #[serde(default)]
4837    pub ocel_projection: bool,
4838}
4839
4840impl Default for AuditEventTrailConfig {
4841    fn default() -> Self {
4842        Self {
4843            flat_log: true,
4844            ocel_projection: false,
4845        }
4846    }
4847}
4848
4849/// Engagement type distribution configuration.
4850#[derive(Debug, Clone, Serialize, Deserialize)]
4851pub struct AuditEngagementTypesConfig {
4852    /// Financial statement audit probability
4853    #[serde(default = "default_financial_audit_prob")]
4854    pub financial_statement: f64,
4855    /// SOX/ICFR audit probability
4856    #[serde(default = "default_sox_audit_prob")]
4857    pub sox_icfr: f64,
4858    /// Integrated audit probability
4859    #[serde(default = "default_integrated_audit_prob")]
4860    pub integrated: f64,
4861    /// Review engagement probability
4862    #[serde(default = "default_review_prob")]
4863    pub review: f64,
4864    /// Agreed-upon procedures probability
4865    #[serde(default = "default_aup_prob")]
4866    pub agreed_upon_procedures: f64,
4867}
4868
4869fn default_financial_audit_prob() -> f64 {
4870    0.40
4871}
4872fn default_sox_audit_prob() -> f64 {
4873    0.20
4874}
4875fn default_integrated_audit_prob() -> f64 {
4876    0.25
4877}
4878fn default_review_prob() -> f64 {
4879    0.10
4880}
4881fn default_aup_prob() -> f64 {
4882    0.05
4883}
4884
4885impl Default for AuditEngagementTypesConfig {
4886    fn default() -> Self {
4887        Self {
4888            financial_statement: default_financial_audit_prob(),
4889            sox_icfr: default_sox_audit_prob(),
4890            integrated: default_integrated_audit_prob(),
4891            review: default_review_prob(),
4892            agreed_upon_procedures: default_aup_prob(),
4893        }
4894    }
4895}
4896
4897/// Workpaper generation configuration.
4898#[derive(Debug, Clone, Serialize, Deserialize)]
4899pub struct WorkpaperConfig {
4900    /// Average workpapers per engagement phase
4901    #[serde(default = "default_workpapers_per_phase")]
4902    pub average_per_phase: usize,
4903
4904    /// Include ISA compliance references
4905    #[serde(default = "default_true")]
4906    pub include_isa_references: bool,
4907
4908    /// Generate sample details
4909    #[serde(default = "default_true")]
4910    pub include_sample_details: bool,
4911
4912    /// Include cross-references between workpapers
4913    #[serde(default = "default_true")]
4914    pub include_cross_references: bool,
4915
4916    /// Sampling configuration
4917    #[serde(default)]
4918    pub sampling: SamplingConfig,
4919}
4920
4921fn default_workpapers_per_phase() -> usize {
4922    5
4923}
4924
4925impl Default for WorkpaperConfig {
4926    fn default() -> Self {
4927        Self {
4928            average_per_phase: default_workpapers_per_phase(),
4929            include_isa_references: true,
4930            include_sample_details: true,
4931            include_cross_references: true,
4932            sampling: SamplingConfig::default(),
4933        }
4934    }
4935}
4936
4937/// Sampling method configuration.
4938#[derive(Debug, Clone, Serialize, Deserialize)]
4939pub struct SamplingConfig {
4940    /// Statistical sampling rate (0.0-1.0)
4941    #[serde(default = "default_statistical_rate")]
4942    pub statistical_rate: f64,
4943    /// Judgmental sampling rate (0.0-1.0)
4944    #[serde(default = "default_judgmental_rate")]
4945    pub judgmental_rate: f64,
4946    /// Haphazard sampling rate (0.0-1.0)
4947    #[serde(default = "default_haphazard_rate")]
4948    pub haphazard_rate: f64,
4949    /// 100% examination rate (0.0-1.0)
4950    #[serde(default = "default_complete_examination_rate")]
4951    pub complete_examination_rate: f64,
4952}
4953
4954fn default_statistical_rate() -> f64 {
4955    0.40
4956}
4957fn default_judgmental_rate() -> f64 {
4958    0.30
4959}
4960fn default_haphazard_rate() -> f64 {
4961    0.20
4962}
4963fn default_complete_examination_rate() -> f64 {
4964    0.10
4965}
4966
4967impl Default for SamplingConfig {
4968    fn default() -> Self {
4969        Self {
4970            statistical_rate: default_statistical_rate(),
4971            judgmental_rate: default_judgmental_rate(),
4972            haphazard_rate: default_haphazard_rate(),
4973            complete_examination_rate: default_complete_examination_rate(),
4974        }
4975    }
4976}
4977
4978/// Audit team configuration.
4979#[derive(Debug, Clone, Serialize, Deserialize)]
4980pub struct AuditTeamConfig {
4981    /// Minimum team size
4982    #[serde(default = "default_min_team_size")]
4983    pub min_team_size: usize,
4984    /// Maximum team size
4985    #[serde(default = "default_max_team_size")]
4986    pub max_team_size: usize,
4987    /// Probability of having a specialist on the team
4988    #[serde(default = "default_specialist_probability")]
4989    pub specialist_probability: f64,
4990}
4991
4992fn default_min_team_size() -> usize {
4993    3
4994}
4995fn default_max_team_size() -> usize {
4996    8
4997}
4998fn default_specialist_probability() -> f64 {
4999    0.30
5000}
5001
5002impl Default for AuditTeamConfig {
5003    fn default() -> Self {
5004        Self {
5005            min_team_size: default_min_team_size(),
5006            max_team_size: default_max_team_size(),
5007            specialist_probability: default_specialist_probability(),
5008        }
5009    }
5010}
5011
5012/// Review workflow configuration.
5013#[derive(Debug, Clone, Serialize, Deserialize)]
5014pub struct ReviewWorkflowConfig {
5015    /// Average days between preparer completion and first review
5016    #[serde(default = "default_review_delay_days")]
5017    pub average_review_delay_days: u32,
5018    /// Probability of review notes requiring rework
5019    #[serde(default = "default_rework_probability_review")]
5020    pub rework_probability: f64,
5021    /// Require partner sign-off for all workpapers
5022    #[serde(default = "default_true")]
5023    pub require_partner_signoff: bool,
5024}
5025
5026fn default_review_delay_days() -> u32 {
5027    2
5028}
5029fn default_rework_probability_review() -> f64 {
5030    0.15
5031}
5032
5033impl Default for ReviewWorkflowConfig {
5034    fn default() -> Self {
5035        Self {
5036            average_review_delay_days: default_review_delay_days(),
5037            rework_probability: default_rework_probability_review(),
5038            require_partner_signoff: true,
5039        }
5040    }
5041}
5042
5043// =============================================================================
5044// Data Quality Configuration
5045// =============================================================================
5046
5047/// Data quality variation settings for realistic flakiness injection.
5048#[derive(Debug, Clone, Serialize, Deserialize)]
5049pub struct DataQualitySchemaConfig {
5050    /// Enable data quality variations
5051    #[serde(default)]
5052    pub enabled: bool,
5053    /// Preset to use (overrides individual settings if set)
5054    #[serde(default)]
5055    pub preset: DataQualityPreset,
5056    /// Missing value injection settings
5057    #[serde(default)]
5058    pub missing_values: MissingValuesSchemaConfig,
5059    /// Typo injection settings
5060    #[serde(default)]
5061    pub typos: TypoSchemaConfig,
5062    /// Format variation settings
5063    #[serde(default)]
5064    pub format_variations: FormatVariationSchemaConfig,
5065    /// Duplicate injection settings
5066    #[serde(default)]
5067    pub duplicates: DuplicateSchemaConfig,
5068    /// Encoding issue settings
5069    #[serde(default)]
5070    pub encoding_issues: EncodingIssueSchemaConfig,
5071    /// Generate quality issue labels for ML training
5072    #[serde(default)]
5073    pub generate_labels: bool,
5074    /// Per-sink quality profiles (different settings for CSV vs JSON etc.)
5075    #[serde(default)]
5076    pub sink_profiles: SinkQualityProfiles,
5077}
5078
5079impl Default for DataQualitySchemaConfig {
5080    fn default() -> Self {
5081        Self {
5082            enabled: false,
5083            preset: DataQualityPreset::None,
5084            missing_values: MissingValuesSchemaConfig::default(),
5085            typos: TypoSchemaConfig::default(),
5086            format_variations: FormatVariationSchemaConfig::default(),
5087            duplicates: DuplicateSchemaConfig::default(),
5088            encoding_issues: EncodingIssueSchemaConfig::default(),
5089            generate_labels: true,
5090            sink_profiles: SinkQualityProfiles::default(),
5091        }
5092    }
5093}
5094
5095impl DataQualitySchemaConfig {
5096    /// Creates a config for a specific preset profile.
5097    pub fn with_preset(preset: DataQualityPreset) -> Self {
5098        let mut config = Self {
5099            preset,
5100            ..Default::default()
5101        };
5102        config.apply_preset();
5103        config
5104    }
5105
5106    /// Applies the preset settings to the individual configuration fields.
5107    /// Call this after deserializing if preset is not Custom or None.
5108    pub fn apply_preset(&mut self) {
5109        if !self.preset.overrides_settings() {
5110            return;
5111        }
5112
5113        self.enabled = true;
5114
5115        // Missing values
5116        self.missing_values.enabled = self.preset.missing_rate() > 0.0;
5117        self.missing_values.rate = self.preset.missing_rate();
5118
5119        // Typos
5120        self.typos.enabled = self.preset.typo_rate() > 0.0;
5121        self.typos.char_error_rate = self.preset.typo_rate();
5122
5123        // Duplicates
5124        self.duplicates.enabled = self.preset.duplicate_rate() > 0.0;
5125        self.duplicates.exact_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
5126        self.duplicates.near_duplicate_ratio = self.preset.duplicate_rate() * 0.4;
5127        self.duplicates.fuzzy_duplicate_ratio = self.preset.duplicate_rate() * 0.2;
5128
5129        // Format variations
5130        self.format_variations.enabled = self.preset.format_variations_enabled();
5131
5132        // Encoding issues
5133        self.encoding_issues.enabled = self.preset.encoding_issues_enabled();
5134        self.encoding_issues.rate = self.preset.encoding_issue_rate();
5135
5136        // OCR errors for typos in legacy preset
5137        if self.preset.ocr_errors_enabled() {
5138            self.typos.type_weights.ocr_errors = 0.3;
5139        }
5140    }
5141
5142    /// Returns the effective missing value rate (considering preset).
5143    pub fn effective_missing_rate(&self) -> f64 {
5144        if self.preset.overrides_settings() {
5145            self.preset.missing_rate()
5146        } else {
5147            self.missing_values.rate
5148        }
5149    }
5150
5151    /// Returns the effective typo rate (considering preset).
5152    pub fn effective_typo_rate(&self) -> f64 {
5153        if self.preset.overrides_settings() {
5154            self.preset.typo_rate()
5155        } else {
5156            self.typos.char_error_rate
5157        }
5158    }
5159
5160    /// Returns the effective duplicate rate (considering preset).
5161    pub fn effective_duplicate_rate(&self) -> f64 {
5162        if self.preset.overrides_settings() {
5163            self.preset.duplicate_rate()
5164        } else {
5165            self.duplicates.exact_duplicate_ratio
5166                + self.duplicates.near_duplicate_ratio
5167                + self.duplicates.fuzzy_duplicate_ratio
5168        }
5169    }
5170
5171    /// Creates a clean profile config.
5172    pub fn clean() -> Self {
5173        Self::with_preset(DataQualityPreset::Clean)
5174    }
5175
5176    /// Creates a noisy profile config.
5177    pub fn noisy() -> Self {
5178        Self::with_preset(DataQualityPreset::Noisy)
5179    }
5180
5181    /// Creates a legacy profile config.
5182    pub fn legacy() -> Self {
5183        Self::with_preset(DataQualityPreset::Legacy)
5184    }
5185}
5186
5187/// Preset configurations for common data quality scenarios.
5188#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5189#[serde(rename_all = "snake_case")]
5190pub enum DataQualityPreset {
5191    /// No data quality variations (clean data)
5192    #[default]
5193    None,
5194    /// Minimal variations (very clean data with rare issues)
5195    Minimal,
5196    /// Normal variations (realistic enterprise data quality)
5197    Normal,
5198    /// High variations (messy data for stress testing)
5199    High,
5200    /// Custom (use individual settings)
5201    Custom,
5202
5203    // ========================================
5204    // ML-Oriented Profiles (Phase 2.1)
5205    // ========================================
5206    /// Clean profile for ML training - minimal data quality issues
5207    /// Missing: 0.1%, Typos: 0.05%, Duplicates: 0%, Format: None
5208    Clean,
5209    /// Noisy profile simulating typical production data issues
5210    /// Missing: 5%, Typos: 2%, Duplicates: 1%, Format: Medium
5211    Noisy,
5212    /// Legacy profile simulating migrated/OCR'd historical data
5213    /// Missing: 10%, Typos: 5%, Duplicates: 3%, Format: Heavy + OCR
5214    Legacy,
5215}
5216
5217impl DataQualityPreset {
5218    /// Returns the missing value rate for this preset.
5219    pub fn missing_rate(&self) -> f64 {
5220        match self {
5221            DataQualityPreset::None => 0.0,
5222            DataQualityPreset::Minimal => 0.005,
5223            DataQualityPreset::Normal => 0.02,
5224            DataQualityPreset::High => 0.08,
5225            DataQualityPreset::Custom => 0.01, // Use config value
5226            DataQualityPreset::Clean => 0.001,
5227            DataQualityPreset::Noisy => 0.05,
5228            DataQualityPreset::Legacy => 0.10,
5229        }
5230    }
5231
5232    /// Returns the typo rate for this preset.
5233    pub fn typo_rate(&self) -> f64 {
5234        match self {
5235            DataQualityPreset::None => 0.0,
5236            DataQualityPreset::Minimal => 0.0005,
5237            DataQualityPreset::Normal => 0.002,
5238            DataQualityPreset::High => 0.01,
5239            DataQualityPreset::Custom => 0.001, // Use config value
5240            DataQualityPreset::Clean => 0.0005,
5241            DataQualityPreset::Noisy => 0.02,
5242            DataQualityPreset::Legacy => 0.05,
5243        }
5244    }
5245
5246    /// Returns the duplicate rate for this preset.
5247    pub fn duplicate_rate(&self) -> f64 {
5248        match self {
5249            DataQualityPreset::None => 0.0,
5250            DataQualityPreset::Minimal => 0.001,
5251            DataQualityPreset::Normal => 0.005,
5252            DataQualityPreset::High => 0.02,
5253            DataQualityPreset::Custom => 0.0, // Use config value
5254            DataQualityPreset::Clean => 0.0,
5255            DataQualityPreset::Noisy => 0.01,
5256            DataQualityPreset::Legacy => 0.03,
5257        }
5258    }
5259
5260    /// Returns whether format variations are enabled for this preset.
5261    pub fn format_variations_enabled(&self) -> bool {
5262        match self {
5263            DataQualityPreset::None | DataQualityPreset::Clean => false,
5264            DataQualityPreset::Minimal => true,
5265            DataQualityPreset::Normal => true,
5266            DataQualityPreset::High => true,
5267            DataQualityPreset::Custom => true,
5268            DataQualityPreset::Noisy => true,
5269            DataQualityPreset::Legacy => true,
5270        }
5271    }
5272
5273    /// Returns whether OCR-style errors are enabled for this preset.
5274    pub fn ocr_errors_enabled(&self) -> bool {
5275        matches!(self, DataQualityPreset::Legacy | DataQualityPreset::High)
5276    }
5277
5278    /// Returns whether encoding issues are enabled for this preset.
5279    pub fn encoding_issues_enabled(&self) -> bool {
5280        matches!(
5281            self,
5282            DataQualityPreset::Legacy | DataQualityPreset::High | DataQualityPreset::Noisy
5283        )
5284    }
5285
5286    /// Returns the encoding issue rate for this preset.
5287    pub fn encoding_issue_rate(&self) -> f64 {
5288        match self {
5289            DataQualityPreset::None | DataQualityPreset::Clean | DataQualityPreset::Minimal => 0.0,
5290            DataQualityPreset::Normal => 0.002,
5291            DataQualityPreset::High => 0.01,
5292            DataQualityPreset::Custom => 0.0,
5293            DataQualityPreset::Noisy => 0.005,
5294            DataQualityPreset::Legacy => 0.02,
5295        }
5296    }
5297
5298    /// Returns true if this preset overrides individual settings.
5299    pub fn overrides_settings(&self) -> bool {
5300        !matches!(self, DataQualityPreset::Custom | DataQualityPreset::None)
5301    }
5302
5303    /// Returns a human-readable description of this preset.
5304    pub fn description(&self) -> &'static str {
5305        match self {
5306            DataQualityPreset::None => "No data quality issues (pristine data)",
5307            DataQualityPreset::Minimal => "Very rare data quality issues",
5308            DataQualityPreset::Normal => "Realistic enterprise data quality",
5309            DataQualityPreset::High => "Messy data for stress testing",
5310            DataQualityPreset::Custom => "Custom settings from configuration",
5311            DataQualityPreset::Clean => "ML-ready clean data with minimal issues",
5312            DataQualityPreset::Noisy => "Typical production data with moderate issues",
5313            DataQualityPreset::Legacy => "Legacy/migrated data with heavy issues and OCR errors",
5314        }
5315    }
5316}
5317
5318/// Missing value injection configuration.
5319#[derive(Debug, Clone, Serialize, Deserialize)]
5320pub struct MissingValuesSchemaConfig {
5321    /// Enable missing value injection
5322    #[serde(default)]
5323    pub enabled: bool,
5324    /// Global missing rate (0.0 to 1.0)
5325    #[serde(default = "default_missing_rate")]
5326    pub rate: f64,
5327    /// Missing value strategy
5328    #[serde(default)]
5329    pub strategy: MissingValueStrategy,
5330    /// Field-specific rates (field name -> rate)
5331    #[serde(default)]
5332    pub field_rates: std::collections::HashMap<String, f64>,
5333    /// Fields that should never have missing values
5334    #[serde(default)]
5335    pub protected_fields: Vec<String>,
5336}
5337
5338fn default_missing_rate() -> f64 {
5339    0.01
5340}
5341
5342impl Default for MissingValuesSchemaConfig {
5343    fn default() -> Self {
5344        Self {
5345            enabled: false,
5346            rate: default_missing_rate(),
5347            strategy: MissingValueStrategy::Mcar,
5348            field_rates: std::collections::HashMap::new(),
5349            protected_fields: vec![
5350                "document_id".to_string(),
5351                "company_code".to_string(),
5352                "posting_date".to_string(),
5353            ],
5354        }
5355    }
5356}
5357
5358/// Missing value strategy types.
5359#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5360#[serde(rename_all = "snake_case")]
5361pub enum MissingValueStrategy {
5362    /// Missing Completely At Random - equal probability for all values
5363    #[default]
5364    Mcar,
5365    /// Missing At Random - depends on other observed values
5366    Mar,
5367    /// Missing Not At Random - depends on the value itself
5368    Mnar,
5369    /// Systematic - entire field groups missing together
5370    Systematic,
5371}
5372
5373/// Typo injection configuration.
5374#[derive(Debug, Clone, Serialize, Deserialize)]
5375pub struct TypoSchemaConfig {
5376    /// Enable typo injection
5377    #[serde(default)]
5378    pub enabled: bool,
5379    /// Character error rate (per character, not per field)
5380    #[serde(default = "default_typo_rate")]
5381    pub char_error_rate: f64,
5382    /// Typo type weights
5383    #[serde(default)]
5384    pub type_weights: TypoTypeWeights,
5385    /// Fields that should never have typos
5386    #[serde(default)]
5387    pub protected_fields: Vec<String>,
5388}
5389
5390fn default_typo_rate() -> f64 {
5391    0.001
5392}
5393
5394impl Default for TypoSchemaConfig {
5395    fn default() -> Self {
5396        Self {
5397            enabled: false,
5398            char_error_rate: default_typo_rate(),
5399            type_weights: TypoTypeWeights::default(),
5400            protected_fields: vec![
5401                "document_id".to_string(),
5402                "gl_account".to_string(),
5403                "company_code".to_string(),
5404            ],
5405        }
5406    }
5407}
5408
5409/// Weights for different typo types.
5410#[derive(Debug, Clone, Serialize, Deserialize)]
5411pub struct TypoTypeWeights {
5412    /// Keyboard-adjacent substitution (e.g., 'a' -> 's')
5413    #[serde(default = "default_substitution_weight")]
5414    pub substitution: f64,
5415    /// Adjacent character transposition (e.g., 'ab' -> 'ba')
5416    #[serde(default = "default_transposition_weight")]
5417    pub transposition: f64,
5418    /// Character insertion
5419    #[serde(default = "default_insertion_weight")]
5420    pub insertion: f64,
5421    /// Character deletion
5422    #[serde(default = "default_deletion_weight")]
5423    pub deletion: f64,
5424    /// OCR-style errors (e.g., '0' -> 'O')
5425    #[serde(default = "default_ocr_weight")]
5426    pub ocr_errors: f64,
5427    /// Homophone substitution (e.g., 'their' -> 'there')
5428    #[serde(default = "default_homophone_weight")]
5429    pub homophones: f64,
5430}
5431
5432fn default_substitution_weight() -> f64 {
5433    0.35
5434}
5435fn default_transposition_weight() -> f64 {
5436    0.25
5437}
5438fn default_insertion_weight() -> f64 {
5439    0.10
5440}
5441fn default_deletion_weight() -> f64 {
5442    0.15
5443}
5444fn default_ocr_weight() -> f64 {
5445    0.10
5446}
5447fn default_homophone_weight() -> f64 {
5448    0.05
5449}
5450
5451impl Default for TypoTypeWeights {
5452    fn default() -> Self {
5453        Self {
5454            substitution: default_substitution_weight(),
5455            transposition: default_transposition_weight(),
5456            insertion: default_insertion_weight(),
5457            deletion: default_deletion_weight(),
5458            ocr_errors: default_ocr_weight(),
5459            homophones: default_homophone_weight(),
5460        }
5461    }
5462}
5463
5464/// Format variation configuration.
5465#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5466pub struct FormatVariationSchemaConfig {
5467    /// Enable format variations
5468    #[serde(default)]
5469    pub enabled: bool,
5470    /// Date format variation settings
5471    #[serde(default)]
5472    pub dates: DateFormatVariationConfig,
5473    /// Amount format variation settings
5474    #[serde(default)]
5475    pub amounts: AmountFormatVariationConfig,
5476    /// Identifier format variation settings
5477    #[serde(default)]
5478    pub identifiers: IdentifierFormatVariationConfig,
5479}
5480
5481/// Date format variation configuration.
5482#[derive(Debug, Clone, Serialize, Deserialize)]
5483pub struct DateFormatVariationConfig {
5484    /// Enable date format variations
5485    #[serde(default)]
5486    pub enabled: bool,
5487    /// Overall variation rate
5488    #[serde(default = "default_date_variation_rate")]
5489    pub rate: f64,
5490    /// Include ISO format (2024-01-15)
5491    #[serde(default = "default_true")]
5492    pub iso_format: bool,
5493    /// Include US format (01/15/2024)
5494    #[serde(default)]
5495    pub us_format: bool,
5496    /// Include EU format (15.01.2024)
5497    #[serde(default)]
5498    pub eu_format: bool,
5499    /// Include long format (January 15, 2024)
5500    #[serde(default)]
5501    pub long_format: bool,
5502}
5503
5504fn default_date_variation_rate() -> f64 {
5505    0.05
5506}
5507
5508impl Default for DateFormatVariationConfig {
5509    fn default() -> Self {
5510        Self {
5511            enabled: false,
5512            rate: default_date_variation_rate(),
5513            iso_format: true,
5514            us_format: false,
5515            eu_format: false,
5516            long_format: false,
5517        }
5518    }
5519}
5520
5521/// Amount format variation configuration.
5522#[derive(Debug, Clone, Serialize, Deserialize)]
5523pub struct AmountFormatVariationConfig {
5524    /// Enable amount format variations
5525    #[serde(default)]
5526    pub enabled: bool,
5527    /// Overall variation rate
5528    #[serde(default = "default_amount_variation_rate")]
5529    pub rate: f64,
5530    /// Include US comma format (1,234.56)
5531    #[serde(default)]
5532    pub us_comma_format: bool,
5533    /// Include EU format (1.234,56)
5534    #[serde(default)]
5535    pub eu_format: bool,
5536    /// Include currency prefix ($1,234.56)
5537    #[serde(default)]
5538    pub currency_prefix: bool,
5539    /// Include accounting format with parentheses for negatives
5540    #[serde(default)]
5541    pub accounting_format: bool,
5542}
5543
5544fn default_amount_variation_rate() -> f64 {
5545    0.02
5546}
5547
5548impl Default for AmountFormatVariationConfig {
5549    fn default() -> Self {
5550        Self {
5551            enabled: false,
5552            rate: default_amount_variation_rate(),
5553            us_comma_format: false,
5554            eu_format: false,
5555            currency_prefix: false,
5556            accounting_format: false,
5557        }
5558    }
5559}
5560
5561/// Identifier format variation configuration.
5562#[derive(Debug, Clone, Serialize, Deserialize)]
5563pub struct IdentifierFormatVariationConfig {
5564    /// Enable identifier format variations
5565    #[serde(default)]
5566    pub enabled: bool,
5567    /// Overall variation rate
5568    #[serde(default = "default_identifier_variation_rate")]
5569    pub rate: f64,
5570    /// Case variations (uppercase, lowercase, mixed)
5571    #[serde(default)]
5572    pub case_variations: bool,
5573    /// Padding variations (leading zeros)
5574    #[serde(default)]
5575    pub padding_variations: bool,
5576    /// Separator variations (dash vs underscore)
5577    #[serde(default)]
5578    pub separator_variations: bool,
5579}
5580
5581fn default_identifier_variation_rate() -> f64 {
5582    0.02
5583}
5584
5585impl Default for IdentifierFormatVariationConfig {
5586    fn default() -> Self {
5587        Self {
5588            enabled: false,
5589            rate: default_identifier_variation_rate(),
5590            case_variations: false,
5591            padding_variations: false,
5592            separator_variations: false,
5593        }
5594    }
5595}
5596
5597/// Duplicate injection configuration.
5598#[derive(Debug, Clone, Serialize, Deserialize)]
5599pub struct DuplicateSchemaConfig {
5600    /// Enable duplicate injection
5601    #[serde(default)]
5602    pub enabled: bool,
5603    /// Overall duplicate rate
5604    #[serde(default = "default_duplicate_rate")]
5605    pub rate: f64,
5606    /// Exact duplicate proportion (out of duplicates)
5607    #[serde(default = "default_exact_duplicate_ratio")]
5608    pub exact_duplicate_ratio: f64,
5609    /// Near duplicate proportion (slight variations)
5610    #[serde(default = "default_near_duplicate_ratio")]
5611    pub near_duplicate_ratio: f64,
5612    /// Fuzzy duplicate proportion (typos in key fields)
5613    #[serde(default = "default_fuzzy_duplicate_ratio")]
5614    pub fuzzy_duplicate_ratio: f64,
5615    /// Maximum date offset for near/fuzzy duplicates (days)
5616    #[serde(default = "default_max_date_offset")]
5617    pub max_date_offset_days: u32,
5618    /// Maximum amount variance for near duplicates (fraction)
5619    #[serde(default = "default_max_amount_variance")]
5620    pub max_amount_variance: f64,
5621}
5622
5623fn default_duplicate_rate() -> f64 {
5624    0.005
5625}
5626fn default_exact_duplicate_ratio() -> f64 {
5627    0.4
5628}
5629fn default_near_duplicate_ratio() -> f64 {
5630    0.35
5631}
5632fn default_fuzzy_duplicate_ratio() -> f64 {
5633    0.25
5634}
5635fn default_max_date_offset() -> u32 {
5636    3
5637}
5638fn default_max_amount_variance() -> f64 {
5639    0.01
5640}
5641
5642impl Default for DuplicateSchemaConfig {
5643    fn default() -> Self {
5644        Self {
5645            enabled: false,
5646            rate: default_duplicate_rate(),
5647            exact_duplicate_ratio: default_exact_duplicate_ratio(),
5648            near_duplicate_ratio: default_near_duplicate_ratio(),
5649            fuzzy_duplicate_ratio: default_fuzzy_duplicate_ratio(),
5650            max_date_offset_days: default_max_date_offset(),
5651            max_amount_variance: default_max_amount_variance(),
5652        }
5653    }
5654}
5655
5656/// Encoding issue configuration.
5657#[derive(Debug, Clone, Serialize, Deserialize)]
5658pub struct EncodingIssueSchemaConfig {
5659    /// Enable encoding issue injection
5660    #[serde(default)]
5661    pub enabled: bool,
5662    /// Overall encoding issue rate
5663    #[serde(default = "default_encoding_rate")]
5664    pub rate: f64,
5665    /// Include mojibake (UTF-8/Latin-1 confusion)
5666    #[serde(default)]
5667    pub mojibake: bool,
5668    /// Include HTML entity corruption
5669    #[serde(default)]
5670    pub html_entities: bool,
5671    /// Include BOM issues
5672    #[serde(default)]
5673    pub bom_issues: bool,
5674}
5675
5676fn default_encoding_rate() -> f64 {
5677    0.001
5678}
5679
5680impl Default for EncodingIssueSchemaConfig {
5681    fn default() -> Self {
5682        Self {
5683            enabled: false,
5684            rate: default_encoding_rate(),
5685            mojibake: false,
5686            html_entities: false,
5687            bom_issues: false,
5688        }
5689    }
5690}
5691
5692/// Per-sink quality profiles for different output formats.
5693#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5694pub struct SinkQualityProfiles {
5695    /// CSV-specific quality settings
5696    #[serde(default)]
5697    pub csv: Option<SinkQualityOverride>,
5698    /// JSON-specific quality settings
5699    #[serde(default)]
5700    pub json: Option<SinkQualityOverride>,
5701    /// Parquet-specific quality settings
5702    #[serde(default)]
5703    pub parquet: Option<SinkQualityOverride>,
5704}
5705
5706/// Quality setting overrides for a specific sink type.
5707#[derive(Debug, Clone, Serialize, Deserialize)]
5708pub struct SinkQualityOverride {
5709    /// Override enabled state
5710    pub enabled: Option<bool>,
5711    /// Override missing value rate
5712    pub missing_rate: Option<f64>,
5713    /// Override typo rate
5714    pub typo_rate: Option<f64>,
5715    /// Override format variation rate
5716    pub format_variation_rate: Option<f64>,
5717    /// Override duplicate rate
5718    pub duplicate_rate: Option<f64>,
5719}
5720
5721// =============================================================================
5722// Accounting Standards Configuration
5723// =============================================================================
5724
5725/// Accounting standards framework configuration for generating standards-compliant data.
5726///
5727/// Supports US GAAP, IFRS, and French GAAP (PCG) frameworks with specific standards:
5728/// - ASC 606/IFRS 15/PCG: Revenue Recognition
5729/// - ASC 842/IFRS 16/PCG: Leases
5730/// - ASC 820/IFRS 13/PCG: Fair Value Measurement
5731/// - ASC 360/IAS 36/PCG: Impairment
5732#[derive(Debug, Clone, Serialize, Deserialize, Default)]
5733pub struct AccountingStandardsConfig {
5734    /// Enable accounting standards generation
5735    #[serde(default)]
5736    pub enabled: bool,
5737
5738    /// Accounting framework to use.
5739    /// When `None`, the country pack's `accounting.framework` is used as fallback;
5740    /// if that is also absent the orchestrator defaults to US GAAP.
5741    #[serde(default, skip_serializing_if = "Option::is_none")]
5742    pub framework: Option<AccountingFrameworkConfig>,
5743
5744    /// Revenue recognition configuration (ASC 606/IFRS 15)
5745    #[serde(default)]
5746    pub revenue_recognition: RevenueRecognitionConfig,
5747
5748    /// Lease accounting configuration (ASC 842/IFRS 16)
5749    #[serde(default)]
5750    pub leases: LeaseAccountingConfig,
5751
5752    /// Fair value measurement configuration (ASC 820/IFRS 13)
5753    #[serde(default)]
5754    pub fair_value: FairValueConfig,
5755
5756    /// Impairment testing configuration (ASC 360/IAS 36)
5757    #[serde(default)]
5758    pub impairment: ImpairmentConfig,
5759
5760    /// Business combination configuration (IFRS 3 / ASC 805)
5761    #[serde(default)]
5762    pub business_combinations: BusinessCombinationsConfig,
5763
5764    /// Expected Credit Loss configuration (IFRS 9 / ASC 326)
5765    #[serde(default)]
5766    pub expected_credit_loss: EclConfig,
5767
5768    /// Generate framework differences for dual reporting
5769    #[serde(default)]
5770    pub generate_differences: bool,
5771}
5772
5773/// Accounting framework selection.
5774#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
5775#[serde(rename_all = "snake_case")]
5776pub enum AccountingFrameworkConfig {
5777    /// US Generally Accepted Accounting Principles
5778    #[default]
5779    UsGaap,
5780    /// International Financial Reporting Standards
5781    Ifrs,
5782    /// Generate data for both frameworks with reconciliation
5783    DualReporting,
5784    /// French GAAP (Plan Comptable Général – PCG)
5785    FrenchGaap,
5786    /// German GAAP (Handelsgesetzbuch – HGB, §238-263)
5787    GermanGaap,
5788}
5789
5790/// Revenue recognition configuration (ASC 606/IFRS 15).
5791#[derive(Debug, Clone, Serialize, Deserialize)]
5792pub struct RevenueRecognitionConfig {
5793    /// Enable revenue recognition generation
5794    #[serde(default)]
5795    pub enabled: bool,
5796
5797    /// Generate customer contracts
5798    #[serde(default = "default_true")]
5799    pub generate_contracts: bool,
5800
5801    /// Average number of performance obligations per contract
5802    #[serde(default = "default_avg_obligations")]
5803    pub avg_obligations_per_contract: f64,
5804
5805    /// Rate of contracts with variable consideration
5806    #[serde(default = "default_variable_consideration_rate")]
5807    pub variable_consideration_rate: f64,
5808
5809    /// Rate of over-time revenue recognition (vs point-in-time)
5810    #[serde(default = "default_over_time_rate")]
5811    pub over_time_recognition_rate: f64,
5812
5813    /// Number of contracts to generate
5814    #[serde(default = "default_contract_count")]
5815    pub contract_count: usize,
5816}
5817
5818fn default_avg_obligations() -> f64 {
5819    2.0
5820}
5821
5822fn default_variable_consideration_rate() -> f64 {
5823    0.15
5824}
5825
5826fn default_over_time_rate() -> f64 {
5827    0.30
5828}
5829
5830fn default_contract_count() -> usize {
5831    100
5832}
5833
5834impl Default for RevenueRecognitionConfig {
5835    fn default() -> Self {
5836        Self {
5837            enabled: false,
5838            generate_contracts: true,
5839            avg_obligations_per_contract: default_avg_obligations(),
5840            variable_consideration_rate: default_variable_consideration_rate(),
5841            over_time_recognition_rate: default_over_time_rate(),
5842            contract_count: default_contract_count(),
5843        }
5844    }
5845}
5846
5847/// Lease accounting configuration (ASC 842/IFRS 16).
5848#[derive(Debug, Clone, Serialize, Deserialize)]
5849pub struct LeaseAccountingConfig {
5850    /// Enable lease accounting generation
5851    #[serde(default)]
5852    pub enabled: bool,
5853
5854    /// Number of leases to generate
5855    #[serde(default = "default_lease_count")]
5856    pub lease_count: usize,
5857
5858    /// Percentage of finance leases (vs operating)
5859    #[serde(default = "default_finance_lease_pct")]
5860    pub finance_lease_percent: f64,
5861
5862    /// Average lease term in months
5863    #[serde(default = "default_avg_lease_term")]
5864    pub avg_lease_term_months: u32,
5865
5866    /// Generate amortization schedules
5867    #[serde(default = "default_true")]
5868    pub generate_amortization: bool,
5869
5870    /// Real estate lease percentage
5871    #[serde(default = "default_real_estate_pct")]
5872    pub real_estate_percent: f64,
5873}
5874
5875fn default_lease_count() -> usize {
5876    50
5877}
5878
5879fn default_finance_lease_pct() -> f64 {
5880    0.30
5881}
5882
5883fn default_avg_lease_term() -> u32 {
5884    60
5885}
5886
5887fn default_real_estate_pct() -> f64 {
5888    0.40
5889}
5890
5891impl Default for LeaseAccountingConfig {
5892    fn default() -> Self {
5893        Self {
5894            enabled: false,
5895            lease_count: default_lease_count(),
5896            finance_lease_percent: default_finance_lease_pct(),
5897            avg_lease_term_months: default_avg_lease_term(),
5898            generate_amortization: true,
5899            real_estate_percent: default_real_estate_pct(),
5900        }
5901    }
5902}
5903
5904/// Fair value measurement configuration (ASC 820/IFRS 13).
5905#[derive(Debug, Clone, Serialize, Deserialize)]
5906pub struct FairValueConfig {
5907    /// Enable fair value measurement generation
5908    #[serde(default)]
5909    pub enabled: bool,
5910
5911    /// Number of fair value measurements to generate
5912    #[serde(default = "default_fv_count")]
5913    pub measurement_count: usize,
5914
5915    /// Level 1 (quoted prices) percentage
5916    #[serde(default = "default_level1_pct")]
5917    pub level1_percent: f64,
5918
5919    /// Level 2 (observable inputs) percentage
5920    #[serde(default = "default_level2_pct")]
5921    pub level2_percent: f64,
5922
5923    /// Level 3 (unobservable inputs) percentage
5924    #[serde(default = "default_level3_pct")]
5925    pub level3_percent: f64,
5926
5927    /// Include sensitivity analysis for Level 3
5928    #[serde(default)]
5929    pub include_sensitivity_analysis: bool,
5930}
5931
5932fn default_fv_count() -> usize {
5933    25
5934}
5935
5936fn default_level1_pct() -> f64 {
5937    0.40
5938}
5939
5940fn default_level2_pct() -> f64 {
5941    0.35
5942}
5943
5944fn default_level3_pct() -> f64 {
5945    0.25
5946}
5947
5948impl Default for FairValueConfig {
5949    fn default() -> Self {
5950        Self {
5951            enabled: false,
5952            measurement_count: default_fv_count(),
5953            level1_percent: default_level1_pct(),
5954            level2_percent: default_level2_pct(),
5955            level3_percent: default_level3_pct(),
5956            include_sensitivity_analysis: false,
5957        }
5958    }
5959}
5960
5961/// Impairment testing configuration (ASC 360/IAS 36).
5962#[derive(Debug, Clone, Serialize, Deserialize)]
5963pub struct ImpairmentConfig {
5964    /// Enable impairment testing generation
5965    #[serde(default)]
5966    pub enabled: bool,
5967
5968    /// Number of impairment tests to generate
5969    #[serde(default = "default_impairment_count")]
5970    pub test_count: usize,
5971
5972    /// Rate of tests resulting in impairment
5973    #[serde(default = "default_impairment_rate")]
5974    pub impairment_rate: f64,
5975
5976    /// Generate cash flow projections
5977    #[serde(default = "default_true")]
5978    pub generate_projections: bool,
5979
5980    /// Include goodwill impairment tests
5981    #[serde(default)]
5982    pub include_goodwill: bool,
5983}
5984
5985fn default_impairment_count() -> usize {
5986    15
5987}
5988
5989fn default_impairment_rate() -> f64 {
5990    0.10
5991}
5992
5993impl Default for ImpairmentConfig {
5994    fn default() -> Self {
5995        Self {
5996            enabled: false,
5997            test_count: default_impairment_count(),
5998            impairment_rate: default_impairment_rate(),
5999            generate_projections: true,
6000            include_goodwill: false,
6001        }
6002    }
6003}
6004
6005// =============================================================================
6006// Business Combinations Configuration (IFRS 3 / ASC 805)
6007// =============================================================================
6008
6009/// Configuration for generating business combination (acquisition) data.
6010#[derive(Debug, Clone, Serialize, Deserialize)]
6011pub struct BusinessCombinationsConfig {
6012    /// Enable business combination generation
6013    #[serde(default)]
6014    pub enabled: bool,
6015
6016    /// Number of acquisitions to generate per company (1-5)
6017    #[serde(default = "default_bc_acquisition_count")]
6018    pub acquisition_count: usize,
6019}
6020
6021fn default_bc_acquisition_count() -> usize {
6022    2
6023}
6024
6025impl Default for BusinessCombinationsConfig {
6026    fn default() -> Self {
6027        Self {
6028            enabled: false,
6029            acquisition_count: default_bc_acquisition_count(),
6030        }
6031    }
6032}
6033
6034// =============================================================================
6035// ECL Configuration (IFRS 9 / ASC 326)
6036// =============================================================================
6037
6038/// Configuration for Expected Credit Loss generation.
6039#[derive(Debug, Clone, Serialize, Deserialize)]
6040pub struct EclConfig {
6041    /// Enable ECL generation.
6042    #[serde(default)]
6043    pub enabled: bool,
6044
6045    /// Weight for base economic scenario (0–1).
6046    #[serde(default = "default_ecl_base_weight")]
6047    pub base_scenario_weight: f64,
6048
6049    /// Multiplier for base scenario (typically 1.0).
6050    #[serde(default = "default_ecl_base_multiplier")]
6051    pub base_scenario_multiplier: f64,
6052
6053    /// Weight for optimistic economic scenario (0–1).
6054    #[serde(default = "default_ecl_optimistic_weight")]
6055    pub optimistic_scenario_weight: f64,
6056
6057    /// Multiplier for optimistic scenario (< 1.0 means lower losses).
6058    #[serde(default = "default_ecl_optimistic_multiplier")]
6059    pub optimistic_scenario_multiplier: f64,
6060
6061    /// Weight for pessimistic economic scenario (0–1).
6062    #[serde(default = "default_ecl_pessimistic_weight")]
6063    pub pessimistic_scenario_weight: f64,
6064
6065    /// Multiplier for pessimistic scenario (> 1.0 means higher losses).
6066    #[serde(default = "default_ecl_pessimistic_multiplier")]
6067    pub pessimistic_scenario_multiplier: f64,
6068}
6069
6070fn default_ecl_base_weight() -> f64 {
6071    0.50
6072}
6073fn default_ecl_base_multiplier() -> f64 {
6074    1.0
6075}
6076fn default_ecl_optimistic_weight() -> f64 {
6077    0.30
6078}
6079fn default_ecl_optimistic_multiplier() -> f64 {
6080    0.8
6081}
6082fn default_ecl_pessimistic_weight() -> f64 {
6083    0.20
6084}
6085fn default_ecl_pessimistic_multiplier() -> f64 {
6086    1.4
6087}
6088
6089impl Default for EclConfig {
6090    fn default() -> Self {
6091        Self {
6092            enabled: false,
6093            base_scenario_weight: default_ecl_base_weight(),
6094            base_scenario_multiplier: default_ecl_base_multiplier(),
6095            optimistic_scenario_weight: default_ecl_optimistic_weight(),
6096            optimistic_scenario_multiplier: default_ecl_optimistic_multiplier(),
6097            pessimistic_scenario_weight: default_ecl_pessimistic_weight(),
6098            pessimistic_scenario_multiplier: default_ecl_pessimistic_multiplier(),
6099        }
6100    }
6101}
6102
6103// =============================================================================
6104// Audit Standards Configuration
6105// =============================================================================
6106
6107/// Audit standards framework configuration for generating standards-compliant audit data.
6108///
6109/// Supports ISA (International Standards on Auditing) and PCAOB standards:
6110/// - ISA 200-720: Complete coverage of audit standards
6111/// - ISA 520: Analytical Procedures
6112/// - ISA 505: External Confirmations
6113/// - ISA 700/705/706/701: Audit Reports
6114/// - PCAOB AS 2201: ICFR Auditing
6115#[derive(Debug, Clone, Serialize, Deserialize, Default)]
6116pub struct AuditStandardsConfig {
6117    /// Enable audit standards generation
6118    #[serde(default)]
6119    pub enabled: bool,
6120
6121    /// ISA compliance configuration
6122    #[serde(default)]
6123    pub isa_compliance: IsaComplianceConfig,
6124
6125    /// Analytical procedures configuration (ISA 520)
6126    #[serde(default)]
6127    pub analytical_procedures: AnalyticalProceduresConfig,
6128
6129    /// External confirmations configuration (ISA 505)
6130    #[serde(default)]
6131    pub confirmations: ConfirmationsConfig,
6132
6133    /// Audit opinion configuration (ISA 700/705/706/701)
6134    #[serde(default)]
6135    pub opinion: AuditOpinionConfig,
6136
6137    /// Generate complete audit trail with traceability
6138    #[serde(default)]
6139    pub generate_audit_trail: bool,
6140
6141    /// SOX 302/404 compliance configuration
6142    #[serde(default)]
6143    pub sox: SoxComplianceConfig,
6144
6145    /// PCAOB-specific configuration
6146    #[serde(default)]
6147    pub pcaob: PcaobConfig,
6148}
6149
6150/// ISA compliance level configuration.
6151#[derive(Debug, Clone, Serialize, Deserialize)]
6152pub struct IsaComplianceConfig {
6153    /// Enable ISA compliance tracking
6154    #[serde(default)]
6155    pub enabled: bool,
6156
6157    /// Compliance level: "basic", "standard", "comprehensive"
6158    #[serde(default = "default_compliance_level")]
6159    pub compliance_level: String,
6160
6161    /// Generate ISA requirement mappings
6162    #[serde(default = "default_true")]
6163    pub generate_isa_mappings: bool,
6164
6165    /// Generate ISA coverage summary
6166    #[serde(default = "default_true")]
6167    pub generate_coverage_summary: bool,
6168
6169    /// Include PCAOB standard mappings (for dual framework)
6170    #[serde(default)]
6171    pub include_pcaob: bool,
6172
6173    /// Framework to use: "isa", "pcaob", "dual"
6174    #[serde(default = "default_audit_framework")]
6175    pub framework: String,
6176}
6177
6178fn default_compliance_level() -> String {
6179    "standard".to_string()
6180}
6181
6182fn default_audit_framework() -> String {
6183    "isa".to_string()
6184}
6185
6186impl Default for IsaComplianceConfig {
6187    fn default() -> Self {
6188        Self {
6189            enabled: false,
6190            compliance_level: default_compliance_level(),
6191            generate_isa_mappings: true,
6192            generate_coverage_summary: true,
6193            include_pcaob: false,
6194            framework: default_audit_framework(),
6195        }
6196    }
6197}
6198
6199/// Analytical procedures configuration (ISA 520).
6200#[derive(Debug, Clone, Serialize, Deserialize)]
6201pub struct AnalyticalProceduresConfig {
6202    /// Enable analytical procedures generation
6203    #[serde(default)]
6204    pub enabled: bool,
6205
6206    /// Number of procedures per account/area
6207    #[serde(default = "default_procedures_per_account")]
6208    pub procedures_per_account: usize,
6209
6210    /// Probability of variance exceeding threshold
6211    #[serde(default = "default_variance_probability")]
6212    pub variance_probability: f64,
6213
6214    /// Include variance investigations
6215    #[serde(default = "default_true")]
6216    pub generate_investigations: bool,
6217
6218    /// Include financial ratio analysis
6219    #[serde(default = "default_true")]
6220    pub include_ratio_analysis: bool,
6221}
6222
6223fn default_procedures_per_account() -> usize {
6224    3
6225}
6226
6227fn default_variance_probability() -> f64 {
6228    0.20
6229}
6230
6231impl Default for AnalyticalProceduresConfig {
6232    fn default() -> Self {
6233        Self {
6234            enabled: false,
6235            procedures_per_account: default_procedures_per_account(),
6236            variance_probability: default_variance_probability(),
6237            generate_investigations: true,
6238            include_ratio_analysis: true,
6239        }
6240    }
6241}
6242
6243/// External confirmations configuration (ISA 505).
6244#[derive(Debug, Clone, Serialize, Deserialize)]
6245pub struct ConfirmationsConfig {
6246    /// Enable confirmation generation
6247    #[serde(default)]
6248    pub enabled: bool,
6249
6250    /// Number of confirmations to generate
6251    #[serde(default = "default_confirmation_count")]
6252    pub confirmation_count: usize,
6253
6254    /// Positive response rate
6255    #[serde(default = "default_positive_response_rate")]
6256    pub positive_response_rate: f64,
6257
6258    /// Exception rate (responses with differences)
6259    #[serde(default = "default_exception_rate_confirm")]
6260    pub exception_rate: f64,
6261
6262    /// Non-response rate
6263    #[serde(default = "default_non_response_rate")]
6264    pub non_response_rate: f64,
6265
6266    /// Generate alternative procedures for non-responses
6267    #[serde(default = "default_true")]
6268    pub generate_alternative_procedures: bool,
6269}
6270
6271fn default_confirmation_count() -> usize {
6272    50
6273}
6274
6275fn default_positive_response_rate() -> f64 {
6276    0.85
6277}
6278
6279fn default_exception_rate_confirm() -> f64 {
6280    0.10
6281}
6282
6283fn default_non_response_rate() -> f64 {
6284    0.05
6285}
6286
6287impl Default for ConfirmationsConfig {
6288    fn default() -> Self {
6289        Self {
6290            enabled: false,
6291            confirmation_count: default_confirmation_count(),
6292            positive_response_rate: default_positive_response_rate(),
6293            exception_rate: default_exception_rate_confirm(),
6294            non_response_rate: default_non_response_rate(),
6295            generate_alternative_procedures: true,
6296        }
6297    }
6298}
6299
6300/// Audit opinion configuration (ISA 700/705/706/701).
6301#[derive(Debug, Clone, Serialize, Deserialize)]
6302pub struct AuditOpinionConfig {
6303    /// Enable audit opinion generation
6304    #[serde(default)]
6305    pub enabled: bool,
6306
6307    /// Generate Key Audit Matters (KAM) / Critical Audit Matters (CAM)
6308    #[serde(default = "default_true")]
6309    pub generate_kam: bool,
6310
6311    /// Average number of KAMs/CAMs per opinion
6312    #[serde(default = "default_kam_count")]
6313    pub average_kam_count: usize,
6314
6315    /// Rate of modified opinions
6316    #[serde(default = "default_modified_opinion_rate")]
6317    pub modified_opinion_rate: f64,
6318
6319    /// Include emphasis of matter paragraphs
6320    #[serde(default)]
6321    pub include_emphasis_of_matter: bool,
6322
6323    /// Include going concern conclusions
6324    #[serde(default = "default_true")]
6325    pub include_going_concern: bool,
6326}
6327
6328fn default_kam_count() -> usize {
6329    3
6330}
6331
6332fn default_modified_opinion_rate() -> f64 {
6333    0.05
6334}
6335
6336impl Default for AuditOpinionConfig {
6337    fn default() -> Self {
6338        Self {
6339            enabled: false,
6340            generate_kam: true,
6341            average_kam_count: default_kam_count(),
6342            modified_opinion_rate: default_modified_opinion_rate(),
6343            include_emphasis_of_matter: false,
6344            include_going_concern: true,
6345        }
6346    }
6347}
6348
6349/// SOX compliance configuration (Sections 302/404).
6350#[derive(Debug, Clone, Serialize, Deserialize)]
6351pub struct SoxComplianceConfig {
6352    /// Enable SOX compliance generation
6353    #[serde(default)]
6354    pub enabled: bool,
6355
6356    /// Generate Section 302 CEO/CFO certifications
6357    #[serde(default = "default_true")]
6358    pub generate_302_certifications: bool,
6359
6360    /// Generate Section 404 ICFR assessments
6361    #[serde(default = "default_true")]
6362    pub generate_404_assessments: bool,
6363
6364    /// Materiality threshold for SOX testing
6365    #[serde(default = "default_sox_materiality_threshold")]
6366    pub materiality_threshold: f64,
6367
6368    /// Rate of material weaknesses
6369    #[serde(default = "default_material_weakness_rate")]
6370    pub material_weakness_rate: f64,
6371
6372    /// Rate of significant deficiencies
6373    #[serde(default = "default_significant_deficiency_rate")]
6374    pub significant_deficiency_rate: f64,
6375}
6376
6377fn default_material_weakness_rate() -> f64 {
6378    0.02
6379}
6380
6381fn default_significant_deficiency_rate() -> f64 {
6382    0.08
6383}
6384
6385impl Default for SoxComplianceConfig {
6386    fn default() -> Self {
6387        Self {
6388            enabled: false,
6389            generate_302_certifications: true,
6390            generate_404_assessments: true,
6391            materiality_threshold: default_sox_materiality_threshold(),
6392            material_weakness_rate: default_material_weakness_rate(),
6393            significant_deficiency_rate: default_significant_deficiency_rate(),
6394        }
6395    }
6396}
6397
6398/// PCAOB-specific configuration.
6399#[derive(Debug, Clone, Serialize, Deserialize)]
6400pub struct PcaobConfig {
6401    /// Enable PCAOB-specific elements
6402    #[serde(default)]
6403    pub enabled: bool,
6404
6405    /// Treat as PCAOB audit (vs ISA-only)
6406    #[serde(default)]
6407    pub is_pcaob_audit: bool,
6408
6409    /// Generate Critical Audit Matters (CAM)
6410    #[serde(default = "default_true")]
6411    pub generate_cam: bool,
6412
6413    /// Include ICFR opinion (for integrated audits)
6414    #[serde(default)]
6415    pub include_icfr_opinion: bool,
6416
6417    /// Generate PCAOB-ISA standard mappings
6418    #[serde(default)]
6419    pub generate_standard_mappings: bool,
6420}
6421
6422impl Default for PcaobConfig {
6423    fn default() -> Self {
6424        Self {
6425            enabled: false,
6426            is_pcaob_audit: false,
6427            generate_cam: true,
6428            include_icfr_opinion: false,
6429            generate_standard_mappings: false,
6430        }
6431    }
6432}
6433
6434// =============================================================================
6435// Advanced Distribution Configuration
6436// =============================================================================
6437
6438/// Advanced distribution configuration for realistic data generation.
6439///
6440/// This section enables sophisticated distribution models including:
6441/// - Mixture models (multi-modal distributions)
6442/// - Cross-field correlations
6443/// - Conditional distributions
6444/// - Regime changes and economic cycles
6445/// - Statistical validation
6446#[derive(Debug, Clone, Serialize, Deserialize, Default)]
6447pub struct AdvancedDistributionConfig {
6448    /// Enable advanced distribution features.
6449    #[serde(default)]
6450    pub enabled: bool,
6451
6452    /// Mixture model configuration for amounts.
6453    #[serde(default)]
6454    pub amounts: MixtureDistributionSchemaConfig,
6455
6456    /// Cross-field correlation configuration.
6457    #[serde(default)]
6458    pub correlations: CorrelationSchemaConfig,
6459
6460    /// Conditional distribution configurations.
6461    #[serde(default)]
6462    pub conditional: Vec<ConditionalDistributionSchemaConfig>,
6463
6464    /// Regime change configuration.
6465    #[serde(default)]
6466    pub regime_changes: RegimeChangeSchemaConfig,
6467
6468    /// Industry-specific distribution profile.
6469    ///
6470    /// Accepts either the legacy bare-name form (`industry_profile: retail`) or
6471    /// the SP3 extended struct form with optional `priors` sub-section.
6472    #[serde(default)]
6473    pub industry_profile: Option<IndustryProfileField>,
6474
6475    /// Statistical validation configuration.
6476    #[serde(default)]
6477    pub validation: StatisticalValidationSchemaConfig,
6478
6479    /// v3.4.4+ — Pareto heavy-tailed distribution for monetary amounts.
6480    /// When set and `enabled`, overrides `amounts` mixture model for the
6481    /// non-fraud amount-sampling path (fraud patterns remain orthogonal).
6482    /// Useful for capex, strategic contracts, and any domain where a small
6483    /// number of very large values dominates the tail.
6484    #[serde(default)]
6485    pub pareto: Option<ParetoSchemaConfig>,
6486}
6487
6488/// Schema-level Pareto distribution configuration (v3.4.4+).
6489///
6490/// Thin wrapper around `datasynth_core::distributions::ParetoConfig` that
6491/// adds an `enabled` gate and serde-friendly field names.
6492#[derive(Debug, Clone, Serialize, Deserialize)]
6493pub struct ParetoSchemaConfig {
6494    /// Enable Pareto sampling. When true, replaces the `amounts` mixture
6495    /// model for the non-fraud amount-sampling path.
6496    #[serde(default)]
6497    pub enabled: bool,
6498
6499    /// Shape parameter (tail heaviness). Lower values → heavier tail.
6500    /// Typical range: 1.5-3.0. Default: 2.0.
6501    #[serde(default = "default_pareto_alpha")]
6502    pub alpha: f64,
6503
6504    /// Scale / minimum value. All samples are >= x_min.
6505    /// Typical: 1000 (for capex) to 100,000 (for large contracts). Default: 100.
6506    #[serde(default = "default_pareto_x_min")]
6507    pub x_min: f64,
6508
6509    /// Optional upper clamp. `None` = unbounded (recommended for realistic
6510    /// heavy tails).
6511    #[serde(default)]
6512    pub max_value: Option<f64>,
6513
6514    /// Decimal places for rounding. Default: 2.
6515    #[serde(default = "default_pareto_decimal_places")]
6516    pub decimal_places: u8,
6517}
6518
6519fn default_pareto_alpha() -> f64 {
6520    2.0
6521}
6522
6523fn default_pareto_x_min() -> f64 {
6524    100.0
6525}
6526
6527fn default_pareto_decimal_places() -> u8 {
6528    2
6529}
6530
6531impl Default for ParetoSchemaConfig {
6532    fn default() -> Self {
6533        Self {
6534            enabled: false,
6535            alpha: default_pareto_alpha(),
6536            x_min: default_pareto_x_min(),
6537            max_value: None,
6538            decimal_places: default_pareto_decimal_places(),
6539        }
6540    }
6541}
6542
6543impl ParetoSchemaConfig {
6544    /// Convert this schema config into a `datasynth_core::distributions::ParetoConfig`.
6545    pub fn to_core_config(&self) -> datasynth_core::distributions::ParetoConfig {
6546        datasynth_core::distributions::ParetoConfig {
6547            alpha: self.alpha,
6548            x_min: self.x_min,
6549            max_value: self.max_value,
6550            decimal_places: self.decimal_places,
6551        }
6552    }
6553}
6554
6555/// Industry profile types for pre-configured distribution settings.
6556#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
6557#[serde(rename_all = "snake_case")]
6558pub enum IndustryProfileType {
6559    /// Retail industry profile (POS sales, inventory, seasonal)
6560    Retail,
6561    /// Manufacturing industry profile (raw materials, maintenance, capital)
6562    Manufacturing,
6563    /// Financial services profile (wire transfers, ACH, fee income)
6564    FinancialServices,
6565    /// Healthcare profile (claims, procedures, supplies)
6566    Healthcare,
6567    /// Technology profile (subscriptions, services, R&D)
6568    Technology,
6569}
6570
6571impl IndustryProfileType {
6572    /// Return the lowercase ASCII slug used for bundled-priors filenames.
6573    ///
6574    /// E.g. `IndustryProfileType::FinancialServices => "financial_services"`.
6575    pub fn slug(self) -> &'static str {
6576        match self {
6577            Self::Retail => "retail",
6578            Self::Manufacturing => "manufacturing",
6579            Self::FinancialServices => "financial_services",
6580            // Matches SP2's bundle naming (corpus uses "Health", not "Healthcare").
6581            Self::Healthcare => "health",
6582            Self::Technology => "technology",
6583        }
6584    }
6585}
6586
6587// ---------------------------------------------------------------------------
6588// SP3 — IndustryProfileField: backward-compatible wrapper
6589// ---------------------------------------------------------------------------
6590
6591/// The value of `distributions.industry_profile` in config YAML.
6592///
6593/// Accepts both the legacy bare-name form:
6594/// ```yaml
6595/// distributions:
6596///   industry_profile: retail
6597/// ```
6598/// and the new SP3 extended struct form with optional `priors` sub-section:
6599/// ```yaml
6600/// distributions:
6601///   industry_profile:
6602///     name: retail
6603///     priors:
6604///       enabled: true
6605///       source: bundled
6606/// ```
6607#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6608#[serde(untagged)]
6609pub enum IndustryProfileField {
6610    /// Legacy form: `industry_profile: retail`.
6611    Name(IndustryProfileType),
6612    /// New form: `industry_profile: { name: retail, priors: { ... } }`.
6613    Full(IndustryProfileFull),
6614}
6615
6616impl IndustryProfileField {
6617    /// Return the bare `IndustryProfileType` regardless of which form was used.
6618    pub fn profile_type(&self) -> IndustryProfileType {
6619        match self {
6620            IndustryProfileField::Name(t) => *t,
6621            IndustryProfileField::Full(f) => f.name,
6622        }
6623    }
6624
6625    /// Return the optional `priors` sub-section, if present.
6626    pub fn priors(&self) -> Option<&IndustryPriorsConfig> {
6627        match self {
6628            IndustryProfileField::Name(_) => None,
6629            IndustryProfileField::Full(f) => f.priors.as_ref(),
6630        }
6631    }
6632}
6633
6634/// Extended industry profile struct used when `priors` is needed (SP3).
6635#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6636pub struct IndustryProfileFull {
6637    /// The industry variant (same values as the bare-name legacy form).
6638    pub name: IndustryProfileType,
6639    /// Optional SP3 priors sub-section.
6640    #[serde(default, skip_serializing_if = "Option::is_none")]
6641    pub priors: Option<IndustryPriorsConfig>,
6642}
6643
6644/// SP3 — configuration for industry-prior injection.
6645///
6646/// When `enabled = true`, the generator uses pre-baked statistical priors
6647/// for the given industry. `source` selects whether to use bundled priors or
6648/// load from a user-supplied file (requires `path`).
6649#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
6650pub struct IndustryPriorsConfig {
6651    /// Enable prior injection. When false the rest of the struct is ignored.
6652    #[serde(default)]
6653    pub enabled: bool,
6654
6655    /// Where to load the priors from.
6656    #[serde(default)]
6657    pub source: PriorsSource,
6658
6659    /// Path to the priors file. Required when `source = file`.
6660    #[serde(default, skip_serializing_if = "Option::is_none")]
6661    pub path: Option<std::path::PathBuf>,
6662
6663    /// SP3.4 — enable online velocity-rule calibrator. Adds per-line overhead
6664    /// when `true`; default `false` keeps v5.12/v5.13-without-calibration behavior.
6665    #[serde(default)]
6666    pub velocity_calibration: bool,
6667}
6668
6669/// Source of industry priors.
6670#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)]
6671#[serde(rename_all = "lowercase")]
6672pub enum PriorsSource {
6673    /// Use the priors bundled with the binary (default).
6674    #[default]
6675    Bundled,
6676    /// Load priors from a user-supplied file (requires `path`).
6677    File,
6678}
6679
6680/// Mixture model distribution configuration.
6681#[derive(Debug, Clone, Serialize, Deserialize)]
6682pub struct MixtureDistributionSchemaConfig {
6683    /// Enable mixture model for amount generation.
6684    #[serde(default)]
6685    pub enabled: bool,
6686
6687    /// Distribution type: "gaussian" or "lognormal".
6688    #[serde(default = "default_mixture_type")]
6689    pub distribution_type: MixtureDistributionType,
6690
6691    /// Mixture components with weights.
6692    #[serde(default)]
6693    pub components: Vec<MixtureComponentConfig>,
6694
6695    /// Minimum value constraint.
6696    #[serde(default = "default_min_amount")]
6697    pub min_value: f64,
6698
6699    /// Maximum value constraint (optional).
6700    #[serde(default)]
6701    pub max_value: Option<f64>,
6702
6703    /// Decimal places for rounding.
6704    #[serde(default = "default_decimal_places")]
6705    pub decimal_places: u8,
6706}
6707
6708fn default_mixture_type() -> MixtureDistributionType {
6709    MixtureDistributionType::LogNormal
6710}
6711
6712fn default_min_amount() -> f64 {
6713    0.01
6714}
6715
6716fn default_decimal_places() -> u8 {
6717    2
6718}
6719
6720impl Default for MixtureDistributionSchemaConfig {
6721    fn default() -> Self {
6722        Self {
6723            enabled: false,
6724            distribution_type: MixtureDistributionType::LogNormal,
6725            components: Vec::new(),
6726            min_value: 0.01,
6727            max_value: None,
6728            decimal_places: 2,
6729        }
6730    }
6731}
6732
6733impl MixtureDistributionSchemaConfig {
6734    /// Convert this schema-level config into a `LogNormalMixtureConfig`
6735    /// suitable for `LogNormalMixtureSampler::new`. Returns `None` if there
6736    /// are no components (schema default is an empty list, which cannot
6737    /// drive a sampler).
6738    ///
6739    /// Callers should gate this with `self.enabled` before invoking.
6740    pub fn to_log_normal_config(
6741        &self,
6742    ) -> Option<datasynth_core::distributions::LogNormalMixtureConfig> {
6743        if self.components.is_empty() {
6744            return None;
6745        }
6746        Some(datasynth_core::distributions::LogNormalMixtureConfig {
6747            components: self
6748                .components
6749                .iter()
6750                .map(|c| match &c.label {
6751                    Some(lbl) => datasynth_core::distributions::LogNormalComponent::with_label(
6752                        c.weight,
6753                        c.mu,
6754                        c.sigma,
6755                        lbl.clone(),
6756                    ),
6757                    None => datasynth_core::distributions::LogNormalComponent::new(
6758                        c.weight, c.mu, c.sigma,
6759                    ),
6760                })
6761                .collect(),
6762            min_value: self.min_value,
6763            max_value: self.max_value,
6764            decimal_places: self.decimal_places,
6765        })
6766    }
6767
6768    /// Convert this schema-level config into a `GaussianMixtureConfig`.
6769    /// Returns `None` if there are no components.
6770    pub fn to_gaussian_config(
6771        &self,
6772    ) -> Option<datasynth_core::distributions::GaussianMixtureConfig> {
6773        if self.components.is_empty() {
6774            return None;
6775        }
6776        Some(datasynth_core::distributions::GaussianMixtureConfig {
6777            components: self
6778                .components
6779                .iter()
6780                .map(|c| {
6781                    datasynth_core::distributions::GaussianComponent::new(c.weight, c.mu, c.sigma)
6782                })
6783                .collect(),
6784            allow_negative: true,
6785            min_value: Some(self.min_value),
6786            max_value: self.max_value,
6787        })
6788    }
6789}
6790
6791/// Mixture distribution type.
6792#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
6793#[serde(rename_all = "snake_case")]
6794pub enum MixtureDistributionType {
6795    /// Gaussian (normal) mixture
6796    Gaussian,
6797    /// Log-normal mixture (for positive amounts)
6798    #[default]
6799    LogNormal,
6800}
6801
6802/// Configuration for a single mixture component.
6803#[derive(Debug, Clone, Serialize, Deserialize)]
6804pub struct MixtureComponentConfig {
6805    /// Weight of this component (must sum to 1.0 across all components).
6806    pub weight: f64,
6807
6808    /// Location parameter (mean for Gaussian, mu for log-normal).
6809    pub mu: f64,
6810
6811    /// Scale parameter (std dev for Gaussian, sigma for log-normal).
6812    pub sigma: f64,
6813
6814    /// Optional label for this component (e.g., "routine", "significant", "major").
6815    #[serde(default)]
6816    pub label: Option<String>,
6817}
6818
6819/// Cross-field correlation configuration.
6820#[derive(Debug, Clone, Serialize, Deserialize)]
6821pub struct CorrelationSchemaConfig {
6822    /// Enable correlation modeling.
6823    #[serde(default)]
6824    pub enabled: bool,
6825
6826    /// Copula type for dependency modeling.
6827    #[serde(default)]
6828    pub copula_type: CopulaSchemaType,
6829
6830    /// Field definitions for correlation.
6831    #[serde(default)]
6832    pub fields: Vec<CorrelatedFieldConfig>,
6833
6834    /// Correlation matrix (upper triangular, row-major).
6835    /// For n fields, this should have n*(n-1)/2 values.
6836    #[serde(default)]
6837    pub matrix: Vec<f64>,
6838
6839    /// Expected correlations for validation.
6840    #[serde(default)]
6841    pub expected_correlations: Vec<ExpectedCorrelationConfig>,
6842}
6843
6844impl Default for CorrelationSchemaConfig {
6845    fn default() -> Self {
6846        Self {
6847            enabled: false,
6848            copula_type: CopulaSchemaType::Gaussian,
6849            fields: Vec::new(),
6850            matrix: Vec::new(),
6851            expected_correlations: Vec::new(),
6852        }
6853    }
6854}
6855
6856impl CorrelationSchemaConfig {
6857    /// v3.5.4+: extract the correlation for a specific field pair from
6858    /// either the upper-triangular flat matrix (n*(n-1)/2 values) or a
6859    /// full symmetric n×n matrix (n*n values). Returns `None` when the
6860    /// named fields aren't both present or the matrix shape doesn't
6861    /// match.
6862    pub fn correlation_between(&self, field_a: &str, field_b: &str) -> Option<f64> {
6863        let idx_a = self.fields.iter().position(|f| f.name == field_a)?;
6864        let idx_b = self.fields.iter().position(|f| f.name == field_b)?;
6865        if idx_a == idx_b {
6866            return Some(1.0);
6867        }
6868        let (i, j) = if idx_a < idx_b {
6869            (idx_a, idx_b)
6870        } else {
6871            (idx_b, idx_a)
6872        };
6873        let n = self.fields.len();
6874        // Full n×n symmetric matrix?
6875        if self.matrix.len() == n * n {
6876            return self.matrix.get(idx_a * n + idx_b).copied();
6877        }
6878        // Upper triangular flat (row-major, excluding diagonal)?
6879        let expected_tri = n * (n - 1) / 2;
6880        if self.matrix.len() == expected_tri {
6881            // Row i, col j where j > i: flat index is
6882            //   sum_{k=0..i}((n-1-k)) + (j - i - 1)
6883            // = i*(n-1) - i*(i-1)/2 + (j - i - 1)
6884            let flat = i * (n - 1) - i * (i.saturating_sub(1)) / 2 + (j - i - 1);
6885            return self.matrix.get(flat).copied();
6886        }
6887        None
6888    }
6889
6890    /// Convert this schema config to a core `CopulaConfig` when the
6891    /// declared field pair `(field_a, field_b)` has a valid correlation
6892    /// entry. Returns `None` when disabled, fields missing, or matrix
6893    /// malformed.
6894    pub fn to_core_config_for_pair(
6895        &self,
6896        field_a: &str,
6897        field_b: &str,
6898    ) -> Option<datasynth_core::distributions::CopulaConfig> {
6899        if !self.enabled {
6900            return None;
6901        }
6902        let rho = self.correlation_between(field_a, field_b)?;
6903        use datasynth_core::distributions::{CopulaConfig, CopulaType};
6904        let copula_type = match self.copula_type {
6905            CopulaSchemaType::Gaussian => CopulaType::Gaussian,
6906            CopulaSchemaType::Clayton => CopulaType::Clayton,
6907            CopulaSchemaType::Gumbel => CopulaType::Gumbel,
6908            CopulaSchemaType::Frank => CopulaType::Frank,
6909            CopulaSchemaType::StudentT => CopulaType::StudentT,
6910        };
6911        // Gaussian / StudentT interpret theta as correlation; others
6912        // as a shape parameter. Minimal v3.5.4 only wires Gaussian in
6913        // the runtime, but the converter is general so follow-ups can
6914        // light up the other copulas.
6915        let theta = rho.clamp(-0.999, 0.999);
6916        Some(CopulaConfig {
6917            copula_type,
6918            theta,
6919            degrees_of_freedom: 4.0,
6920        })
6921    }
6922}
6923
6924/// Copula type for dependency modeling.
6925#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
6926#[serde(rename_all = "snake_case")]
6927pub enum CopulaSchemaType {
6928    /// Gaussian copula (symmetric, no tail dependence)
6929    #[default]
6930    Gaussian,
6931    /// Clayton copula (lower tail dependence)
6932    Clayton,
6933    /// Gumbel copula (upper tail dependence)
6934    Gumbel,
6935    /// Frank copula (symmetric, no tail dependence)
6936    Frank,
6937    /// Student-t copula (both tail dependencies)
6938    StudentT,
6939}
6940
6941/// Configuration for a correlated field.
6942#[derive(Debug, Clone, Serialize, Deserialize)]
6943pub struct CorrelatedFieldConfig {
6944    /// Field name.
6945    pub name: String,
6946
6947    /// Marginal distribution type.
6948    #[serde(default)]
6949    pub distribution: MarginalDistributionConfig,
6950}
6951
6952/// Marginal distribution configuration.
6953#[derive(Debug, Clone, Serialize, Deserialize)]
6954#[serde(tag = "type", rename_all = "snake_case")]
6955pub enum MarginalDistributionConfig {
6956    /// Normal distribution.
6957    Normal {
6958        /// Mean
6959        mu: f64,
6960        /// Standard deviation
6961        sigma: f64,
6962    },
6963    /// Log-normal distribution.
6964    LogNormal {
6965        /// Location parameter
6966        mu: f64,
6967        /// Scale parameter
6968        sigma: f64,
6969    },
6970    /// Uniform distribution.
6971    Uniform {
6972        /// Minimum value
6973        min: f64,
6974        /// Maximum value
6975        max: f64,
6976    },
6977    /// Discrete uniform distribution.
6978    DiscreteUniform {
6979        /// Minimum integer value
6980        min: i32,
6981        /// Maximum integer value
6982        max: i32,
6983    },
6984}
6985
6986impl Default for MarginalDistributionConfig {
6987    fn default() -> Self {
6988        Self::Normal {
6989            mu: 0.0,
6990            sigma: 1.0,
6991        }
6992    }
6993}
6994
6995/// Expected correlation for validation.
6996#[derive(Debug, Clone, Serialize, Deserialize)]
6997pub struct ExpectedCorrelationConfig {
6998    /// First field name.
6999    pub field1: String,
7000    /// Second field name.
7001    pub field2: String,
7002    /// Expected correlation coefficient.
7003    pub expected_r: f64,
7004    /// Acceptable tolerance.
7005    #[serde(default = "default_correlation_tolerance")]
7006    pub tolerance: f64,
7007}
7008
7009fn default_correlation_tolerance() -> f64 {
7010    0.10
7011}
7012
7013/// Conditional distribution configuration.
7014#[derive(Debug, Clone, Serialize, Deserialize)]
7015pub struct ConditionalDistributionSchemaConfig {
7016    /// Output field name to generate.
7017    pub output_field: String,
7018
7019    /// Input field name that conditions the distribution.
7020    pub input_field: String,
7021
7022    /// Breakpoints defining distribution changes.
7023    #[serde(default)]
7024    pub breakpoints: Vec<ConditionalBreakpointConfig>,
7025
7026    /// Default distribution when below all breakpoints.
7027    #[serde(default)]
7028    pub default_distribution: ConditionalDistributionParamsConfig,
7029
7030    /// Minimum output value constraint.
7031    #[serde(default)]
7032    pub min_value: Option<f64>,
7033
7034    /// Maximum output value constraint.
7035    #[serde(default)]
7036    pub max_value: Option<f64>,
7037
7038    /// Decimal places for output rounding.
7039    #[serde(default = "default_decimal_places")]
7040    pub decimal_places: u8,
7041}
7042
7043/// Breakpoint for conditional distribution.
7044#[derive(Debug, Clone, Serialize, Deserialize)]
7045pub struct ConditionalBreakpointConfig {
7046    /// Input value threshold.
7047    pub threshold: f64,
7048
7049    /// Distribution to use when input >= threshold.
7050    pub distribution: ConditionalDistributionParamsConfig,
7051}
7052
7053impl ConditionalDistributionSchemaConfig {
7054    /// Convert this schema config into a core
7055    /// `ConditionalDistributionConfig` suitable for
7056    /// `ConditionalSampler::new`. v3.5.3+.
7057    pub fn to_core_config(&self) -> datasynth_core::distributions::ConditionalDistributionConfig {
7058        use datasynth_core::distributions::{
7059            Breakpoint, ConditionalDistributionConfig, ConditionalDistributionParams,
7060        };
7061
7062        let default_distribution = convert_conditional_params(&self.default_distribution);
7063        let breakpoints: Vec<Breakpoint> = self
7064            .breakpoints
7065            .iter()
7066            .map(|bp| Breakpoint {
7067                threshold: bp.threshold,
7068                distribution: convert_conditional_params(&bp.distribution),
7069            })
7070            .collect();
7071
7072        // Use a sentinel default_distribution when the schema default is
7073        // its factory default (Fixed { value: 0.0 })  and we have
7074        // breakpoints — we don't want to clobber data for values below
7075        // the first breakpoint.
7076        let final_default = if breakpoints.is_empty() {
7077            default_distribution
7078        } else {
7079            match default_distribution {
7080                ConditionalDistributionParams::Fixed { value: 0.0 } => {
7081                    // Reuse the first breakpoint's distribution as the
7082                    // default to avoid surprising zeros.
7083                    breakpoints[0].distribution.clone()
7084                }
7085                other => other,
7086            }
7087        };
7088
7089        ConditionalDistributionConfig {
7090            output_field: self.output_field.clone(),
7091            input_field: self.input_field.clone(),
7092            breakpoints,
7093            default_distribution: final_default,
7094            min_value: self.min_value,
7095            max_value: self.max_value,
7096            decimal_places: self.decimal_places,
7097        }
7098    }
7099}
7100
7101fn convert_conditional_params(
7102    p: &ConditionalDistributionParamsConfig,
7103) -> datasynth_core::distributions::ConditionalDistributionParams {
7104    use datasynth_core::distributions::ConditionalDistributionParams as Core;
7105    match p {
7106        ConditionalDistributionParamsConfig::Fixed { value } => Core::Fixed { value: *value },
7107        ConditionalDistributionParamsConfig::Normal { mu, sigma } => Core::Normal {
7108            mu: *mu,
7109            sigma: *sigma,
7110        },
7111        ConditionalDistributionParamsConfig::LogNormal { mu, sigma } => Core::LogNormal {
7112            mu: *mu,
7113            sigma: *sigma,
7114        },
7115        ConditionalDistributionParamsConfig::Uniform { min, max } => Core::Uniform {
7116            min: *min,
7117            max: *max,
7118        },
7119        ConditionalDistributionParamsConfig::Beta {
7120            alpha,
7121            beta,
7122            min,
7123            max,
7124        } => Core::Beta {
7125            alpha: *alpha,
7126            beta: *beta,
7127            min: *min,
7128            max: *max,
7129        },
7130        ConditionalDistributionParamsConfig::Discrete { values, weights } => Core::Discrete {
7131            values: values.clone(),
7132            weights: weights.clone(),
7133        },
7134    }
7135}
7136
7137/// Distribution parameters for conditional distributions.
7138#[derive(Debug, Clone, Serialize, Deserialize)]
7139#[serde(tag = "type", rename_all = "snake_case")]
7140pub enum ConditionalDistributionParamsConfig {
7141    /// Fixed value.
7142    Fixed {
7143        /// The fixed value
7144        value: f64,
7145    },
7146    /// Normal distribution.
7147    Normal {
7148        /// Mean
7149        mu: f64,
7150        /// Standard deviation
7151        sigma: f64,
7152    },
7153    /// Log-normal distribution.
7154    LogNormal {
7155        /// Location parameter
7156        mu: f64,
7157        /// Scale parameter
7158        sigma: f64,
7159    },
7160    /// Uniform distribution.
7161    Uniform {
7162        /// Minimum
7163        min: f64,
7164        /// Maximum
7165        max: f64,
7166    },
7167    /// Beta distribution (scaled).
7168    Beta {
7169        /// Alpha parameter
7170        alpha: f64,
7171        /// Beta parameter
7172        beta: f64,
7173        /// Minimum output value
7174        min: f64,
7175        /// Maximum output value
7176        max: f64,
7177    },
7178    /// Discrete values with weights.
7179    Discrete {
7180        /// Possible values
7181        values: Vec<f64>,
7182        /// Weights (should sum to 1.0)
7183        weights: Vec<f64>,
7184    },
7185}
7186
7187impl Default for ConditionalDistributionParamsConfig {
7188    fn default() -> Self {
7189        Self::Normal {
7190            mu: 0.0,
7191            sigma: 1.0,
7192        }
7193    }
7194}
7195
7196/// Regime change configuration.
7197#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7198pub struct RegimeChangeSchemaConfig {
7199    /// Enable regime change modeling.
7200    #[serde(default)]
7201    pub enabled: bool,
7202
7203    /// List of regime changes.
7204    #[serde(default)]
7205    pub changes: Vec<RegimeChangeEventConfig>,
7206
7207    /// Economic cycle configuration.
7208    #[serde(default)]
7209    pub economic_cycle: Option<EconomicCycleSchemaConfig>,
7210
7211    /// Parameter drift configurations.
7212    #[serde(default)]
7213    pub parameter_drifts: Vec<ParameterDriftSchemaConfig>,
7214}
7215
7216/// A single regime change event.
7217#[derive(Debug, Clone, Serialize, Deserialize)]
7218pub struct RegimeChangeEventConfig {
7219    /// Date when the change occurs (ISO 8601 format).
7220    pub date: String,
7221
7222    /// Type of regime change.
7223    pub change_type: RegimeChangeTypeConfig,
7224
7225    /// Description of the change.
7226    #[serde(default)]
7227    pub description: Option<String>,
7228
7229    /// Effects of this regime change.
7230    #[serde(default)]
7231    pub effects: Vec<RegimeEffectConfig>,
7232}
7233
7234/// Type of regime change.
7235#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
7236#[serde(rename_all = "snake_case")]
7237pub enum RegimeChangeTypeConfig {
7238    /// Acquisition - sudden volume and amount increase
7239    Acquisition,
7240    /// Divestiture - sudden volume and amount decrease
7241    Divestiture,
7242    /// Price increase - amounts increase
7243    PriceIncrease,
7244    /// Price decrease - amounts decrease
7245    PriceDecrease,
7246    /// New product launch - volume ramp-up
7247    ProductLaunch,
7248    /// Product discontinuation - volume ramp-down
7249    ProductDiscontinuation,
7250    /// Policy change - affects patterns
7251    PolicyChange,
7252    /// Competitor entry - market disruption
7253    CompetitorEntry,
7254    /// Custom effect
7255    Custom,
7256}
7257
7258/// Effect of a regime change on a specific field.
7259#[derive(Debug, Clone, Serialize, Deserialize)]
7260pub struct RegimeEffectConfig {
7261    /// Field being affected.
7262    pub field: String,
7263
7264    /// Multiplier to apply (1.0 = no change, 1.5 = 50% increase).
7265    pub multiplier: f64,
7266}
7267
7268/// Economic cycle configuration.
7269#[derive(Debug, Clone, Serialize, Deserialize)]
7270pub struct EconomicCycleSchemaConfig {
7271    /// Enable economic cycle modeling.
7272    #[serde(default)]
7273    pub enabled: bool,
7274
7275    /// Cycle period in months (e.g., 48 for 4-year business cycle).
7276    #[serde(default = "default_cycle_period")]
7277    pub period_months: u32,
7278
7279    /// Amplitude of cycle effect (0.0-1.0).
7280    #[serde(default = "default_cycle_amplitude")]
7281    pub amplitude: f64,
7282
7283    /// Phase offset in months.
7284    #[serde(default)]
7285    pub phase_offset: u32,
7286
7287    /// Recession periods (start_month, duration_months).
7288    #[serde(default)]
7289    pub recessions: Vec<RecessionPeriodConfig>,
7290}
7291
7292fn default_cycle_period() -> u32 {
7293    48
7294}
7295
7296fn default_cycle_amplitude() -> f64 {
7297    0.15
7298}
7299
7300impl Default for EconomicCycleSchemaConfig {
7301    fn default() -> Self {
7302        Self {
7303            enabled: false,
7304            period_months: 48,
7305            amplitude: 0.15,
7306            phase_offset: 0,
7307            recessions: Vec::new(),
7308        }
7309    }
7310}
7311
7312/// Recession period configuration.
7313#[derive(Debug, Clone, Serialize, Deserialize)]
7314pub struct RecessionPeriodConfig {
7315    /// Start month (0-indexed from generation start).
7316    pub start_month: u32,
7317
7318    /// Duration in months.
7319    pub duration_months: u32,
7320
7321    /// Severity (0.0-1.0, affects volume reduction).
7322    #[serde(default = "default_recession_severity")]
7323    pub severity: f64,
7324}
7325
7326impl RegimeChangeSchemaConfig {
7327    /// Populate the regime-change, economic-cycle, and parameter-drift
7328    /// slots on a `DriftConfig` from this schema config. v3.5.2+.
7329    ///
7330    /// `generation_start` must match `config.global.start_date` so that
7331    /// absolute regime-change dates can be mapped to 0-indexed periods.
7332    /// Unparseable / out-of-range dates are silently skipped to keep
7333    /// runtime robust against user typos.
7334    pub fn apply_to(
7335        &self,
7336        drift: &mut datasynth_core::distributions::DriftConfig,
7337        generation_start: chrono::NaiveDate,
7338    ) {
7339        if !self.enabled {
7340            return;
7341        }
7342
7343        // Enable drift if any regime-change feature wants it.
7344        drift.enabled = true;
7345
7346        // Regime-change events (absolute dates → period offsets).
7347        for event in &self.changes {
7348            let period = match chrono::NaiveDate::parse_from_str(&event.date, "%Y-%m-%d") {
7349                Ok(d) => {
7350                    let days = (d - generation_start).num_days();
7351                    if days < 0 {
7352                        continue;
7353                    }
7354                    // Approximate month by dividing by 30.4 so we don't
7355                    // need chrono::Months arithmetic.
7356                    (days as f64 / 30.4).round() as u32
7357                }
7358                Err(_) => continue,
7359            };
7360            let change_type = convert_regime_change_type(event.change_type);
7361            let core_effects = event
7362                .effects
7363                .iter()
7364                .map(|e| datasynth_core::distributions::RegimeEffect {
7365                    field: e.field.clone(),
7366                    multiplier: e.multiplier,
7367                })
7368                .collect();
7369            drift
7370                .regime_changes
7371                .push(datasynth_core::distributions::RegimeChange {
7372                    period,
7373                    change_type,
7374                    description: event.description.clone(),
7375                    effects: core_effects,
7376                    transition_periods: 0,
7377                });
7378        }
7379
7380        // Economic cycle.
7381        if let Some(ec) = &self.economic_cycle {
7382            if ec.enabled {
7383                let recession_periods: Vec<u32> = ec
7384                    .recessions
7385                    .iter()
7386                    .flat_map(|r| r.start_month..r.start_month + r.duration_months)
7387                    .collect();
7388                // Use the most-severe recession as the severity driver;
7389                // fall back to default when none declared.
7390                let severity = ec
7391                    .recessions
7392                    .iter()
7393                    .map(|r| 1.0 - r.severity)
7394                    .fold(0.75f64, f64::min);
7395                drift.economic_cycle = datasynth_core::distributions::EconomicCycleConfig {
7396                    enabled: true,
7397                    cycle_length: ec.period_months,
7398                    amplitude: ec.amplitude,
7399                    phase_offset: ec.phase_offset,
7400                    recession_periods,
7401                    recession_severity: severity,
7402                };
7403                drift.drift_type = datasynth_core::distributions::DriftType::Mixed;
7404            }
7405        }
7406
7407        // Parameter drifts.
7408        for pd in &self.parameter_drifts {
7409            let drift_type = match pd.drift_type {
7410                ParameterDriftTypeConfig::Linear => {
7411                    datasynth_core::distributions::ParameterDriftType::Linear
7412                }
7413                ParameterDriftTypeConfig::Exponential => {
7414                    datasynth_core::distributions::ParameterDriftType::Exponential
7415                }
7416                ParameterDriftTypeConfig::Logistic => {
7417                    datasynth_core::distributions::ParameterDriftType::Logistic
7418                }
7419                ParameterDriftTypeConfig::Step => {
7420                    datasynth_core::distributions::ParameterDriftType::Step
7421                }
7422            };
7423            drift
7424                .parameter_drifts
7425                .push(datasynth_core::distributions::ParameterDrift {
7426                    parameter: pd.parameter.clone(),
7427                    drift_type,
7428                    initial_value: pd.start_value,
7429                    target_or_rate: pd.end_value,
7430                    start_period: pd.start_period,
7431                    end_period: pd.end_period,
7432                    steepness: 1.0,
7433                });
7434        }
7435    }
7436}
7437
7438fn convert_regime_change_type(
7439    t: RegimeChangeTypeConfig,
7440) -> datasynth_core::distributions::RegimeChangeType {
7441    use datasynth_core::distributions::RegimeChangeType as Core;
7442    match t {
7443        RegimeChangeTypeConfig::Acquisition => Core::Acquisition,
7444        RegimeChangeTypeConfig::Divestiture => Core::Divestiture,
7445        RegimeChangeTypeConfig::PriceIncrease => Core::PriceIncrease,
7446        RegimeChangeTypeConfig::PriceDecrease => Core::PriceDecrease,
7447        RegimeChangeTypeConfig::ProductLaunch => Core::ProductLaunch,
7448        RegimeChangeTypeConfig::ProductDiscontinuation => Core::ProductDiscontinuation,
7449        RegimeChangeTypeConfig::PolicyChange => Core::PolicyChange,
7450        RegimeChangeTypeConfig::CompetitorEntry => Core::CompetitorEntry,
7451        RegimeChangeTypeConfig::Custom => Core::Custom,
7452    }
7453}
7454
7455fn default_recession_severity() -> f64 {
7456    0.20
7457}
7458
7459/// Parameter drift configuration.
7460#[derive(Debug, Clone, Serialize, Deserialize)]
7461pub struct ParameterDriftSchemaConfig {
7462    /// Parameter being drifted.
7463    pub parameter: String,
7464
7465    /// Drift type.
7466    pub drift_type: ParameterDriftTypeConfig,
7467
7468    /// Start value.
7469    pub start_value: f64,
7470
7471    /// End value.
7472    pub end_value: f64,
7473
7474    /// Start period (month, 0-indexed).
7475    #[serde(default)]
7476    pub start_period: u32,
7477
7478    /// End period (month, optional - defaults to end of generation).
7479    #[serde(default)]
7480    pub end_period: Option<u32>,
7481}
7482
7483/// Parameter drift type.
7484#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7485#[serde(rename_all = "snake_case")]
7486pub enum ParameterDriftTypeConfig {
7487    /// Linear interpolation
7488    #[default]
7489    Linear,
7490    /// Exponential growth/decay
7491    Exponential,
7492    /// S-curve (logistic)
7493    Logistic,
7494    /// Step function
7495    Step,
7496}
7497
7498/// Statistical validation configuration.
7499#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7500pub struct StatisticalValidationSchemaConfig {
7501    /// Enable statistical validation.
7502    #[serde(default)]
7503    pub enabled: bool,
7504
7505    /// Statistical tests to run.
7506    #[serde(default)]
7507    pub tests: Vec<StatisticalTestConfig>,
7508
7509    /// Validation reporting configuration.
7510    #[serde(default)]
7511    pub reporting: ValidationReportingConfig,
7512}
7513
7514/// Statistical test configuration.
7515#[derive(Debug, Clone, Serialize, Deserialize)]
7516#[serde(tag = "type", rename_all = "snake_case")]
7517pub enum StatisticalTestConfig {
7518    /// Benford's Law first digit test.
7519    BenfordFirstDigit {
7520        /// Threshold MAD for failure.
7521        #[serde(default = "default_benford_threshold")]
7522        threshold_mad: f64,
7523        /// Warning MAD threshold.
7524        #[serde(default = "default_benford_warning")]
7525        warning_mad: f64,
7526    },
7527    /// Distribution fit test.
7528    DistributionFit {
7529        /// Target distribution to test.
7530        target: TargetDistributionConfig,
7531        /// K-S test significance level.
7532        #[serde(default = "default_ks_significance")]
7533        ks_significance: f64,
7534        /// Test method (ks, anderson_darling, chi_squared).
7535        #[serde(default)]
7536        method: DistributionFitMethod,
7537    },
7538    /// Correlation check.
7539    CorrelationCheck {
7540        /// Expected correlations to validate.
7541        expected_correlations: Vec<ExpectedCorrelationConfig>,
7542    },
7543    /// Chi-squared test.
7544    ChiSquared {
7545        /// Number of bins.
7546        #[serde(default = "default_chi_squared_bins")]
7547        bins: usize,
7548        /// Significance level.
7549        #[serde(default = "default_chi_squared_significance")]
7550        significance: f64,
7551    },
7552    /// Anderson-Darling test.
7553    AndersonDarling {
7554        /// Target distribution.
7555        target: TargetDistributionConfig,
7556        /// Significance level.
7557        #[serde(default = "default_ad_significance")]
7558        significance: f64,
7559    },
7560}
7561
7562fn default_benford_threshold() -> f64 {
7563    0.015
7564}
7565
7566fn default_benford_warning() -> f64 {
7567    0.010
7568}
7569
7570fn default_ks_significance() -> f64 {
7571    0.05
7572}
7573
7574fn default_chi_squared_bins() -> usize {
7575    10
7576}
7577
7578fn default_chi_squared_significance() -> f64 {
7579    0.05
7580}
7581
7582fn default_ad_significance() -> f64 {
7583    0.05
7584}
7585
7586/// Target distribution for fit tests.
7587#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7588#[serde(rename_all = "snake_case")]
7589pub enum TargetDistributionConfig {
7590    /// Normal distribution
7591    Normal,
7592    /// Log-normal distribution
7593    #[default]
7594    LogNormal,
7595    /// Exponential distribution
7596    Exponential,
7597    /// Uniform distribution
7598    Uniform,
7599}
7600
7601/// Distribution fit test method.
7602#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7603#[serde(rename_all = "snake_case")]
7604pub enum DistributionFitMethod {
7605    /// Kolmogorov-Smirnov test
7606    #[default]
7607    KolmogorovSmirnov,
7608    /// Anderson-Darling test
7609    AndersonDarling,
7610    /// Chi-squared test
7611    ChiSquared,
7612}
7613
7614/// Validation reporting configuration.
7615#[derive(Debug, Clone, Serialize, Deserialize)]
7616pub struct ValidationReportingConfig {
7617    /// Output validation report to file.
7618    #[serde(default)]
7619    pub output_report: bool,
7620
7621    /// Report format.
7622    #[serde(default)]
7623    pub format: ValidationReportFormat,
7624
7625    /// Fail generation if validation fails.
7626    #[serde(default)]
7627    pub fail_on_error: bool,
7628
7629    /// Include detailed statistics in report.
7630    #[serde(default = "default_true")]
7631    pub include_details: bool,
7632}
7633
7634impl Default for ValidationReportingConfig {
7635    fn default() -> Self {
7636        Self {
7637            output_report: false,
7638            format: ValidationReportFormat::Json,
7639            fail_on_error: false,
7640            include_details: true,
7641        }
7642    }
7643}
7644
7645/// Validation report format.
7646#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7647#[serde(rename_all = "snake_case")]
7648pub enum ValidationReportFormat {
7649    /// JSON format
7650    #[default]
7651    Json,
7652    /// YAML format
7653    Yaml,
7654    /// HTML report
7655    Html,
7656}
7657
7658// =============================================================================
7659// Temporal Patterns Configuration
7660// =============================================================================
7661
7662/// Temporal patterns configuration for business days, period-end dynamics, and processing lags.
7663///
7664/// This section enables sophisticated temporal modeling including:
7665/// - Business day calculations and settlement dates
7666/// - Regional holiday calendars
7667/// - Period-end decay curves (non-flat volume spikes)
7668/// - Processing lag modeling (event-to-posting delays)
7669#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7670pub struct TemporalPatternsConfig {
7671    /// Enable temporal patterns features.
7672    #[serde(default)]
7673    pub enabled: bool,
7674
7675    /// Business day calculation configuration.
7676    #[serde(default)]
7677    pub business_days: BusinessDaySchemaConfig,
7678
7679    /// Regional calendar configuration.
7680    #[serde(default)]
7681    pub calendars: CalendarSchemaConfig,
7682
7683    /// Period-end dynamics configuration.
7684    #[serde(default)]
7685    pub period_end: PeriodEndSchemaConfig,
7686
7687    /// Processing lag configuration.
7688    #[serde(default)]
7689    pub processing_lags: ProcessingLagSchemaConfig,
7690
7691    /// Fiscal calendar configuration (custom year start, 4-4-5, 13-period).
7692    #[serde(default)]
7693    pub fiscal_calendar: FiscalCalendarSchemaConfig,
7694
7695    /// Intra-day patterns configuration (morning spike, lunch dip, EOD rush).
7696    #[serde(default)]
7697    pub intraday: IntraDaySchemaConfig,
7698
7699    /// Timezone handling configuration.
7700    #[serde(default)]
7701    pub timezones: TimezoneSchemaConfig,
7702}
7703
7704/// Business day calculation configuration.
7705#[derive(Debug, Clone, Serialize, Deserialize)]
7706pub struct BusinessDaySchemaConfig {
7707    /// Enable business day calculations.
7708    #[serde(default = "default_true")]
7709    pub enabled: bool,
7710
7711    /// Half-day policy: "full_day", "half_day", "non_business_day".
7712    #[serde(default = "default_half_day_policy")]
7713    pub half_day_policy: String,
7714
7715    /// Settlement rules configuration.
7716    #[serde(default)]
7717    pub settlement_rules: SettlementRulesSchemaConfig,
7718
7719    /// Month-end convention: "modified_following", "preceding", "following", "end_of_month".
7720    #[serde(default = "default_month_end_convention")]
7721    pub month_end_convention: String,
7722
7723    /// Weekend days (e.g., ["saturday", "sunday"] or ["friday", "saturday"] for Middle East).
7724    #[serde(default)]
7725    pub weekend_days: Option<Vec<String>>,
7726}
7727
7728fn default_half_day_policy() -> String {
7729    "half_day".to_string()
7730}
7731
7732fn default_month_end_convention() -> String {
7733    "modified_following".to_string()
7734}
7735
7736impl Default for BusinessDaySchemaConfig {
7737    fn default() -> Self {
7738        Self {
7739            enabled: true,
7740            half_day_policy: "half_day".to_string(),
7741            settlement_rules: SettlementRulesSchemaConfig::default(),
7742            month_end_convention: "modified_following".to_string(),
7743            weekend_days: None,
7744        }
7745    }
7746}
7747
7748/// Settlement rules configuration.
7749#[derive(Debug, Clone, Serialize, Deserialize)]
7750pub struct SettlementRulesSchemaConfig {
7751    /// Equity settlement days (T+N).
7752    #[serde(default = "default_settlement_2")]
7753    pub equity_days: i32,
7754
7755    /// Government bonds settlement days.
7756    #[serde(default = "default_settlement_1")]
7757    pub government_bonds_days: i32,
7758
7759    /// FX spot settlement days.
7760    #[serde(default = "default_settlement_2")]
7761    pub fx_spot_days: i32,
7762
7763    /// Corporate bonds settlement days.
7764    #[serde(default = "default_settlement_2")]
7765    pub corporate_bonds_days: i32,
7766
7767    /// Wire transfer cutoff time (HH:MM format).
7768    #[serde(default = "default_wire_cutoff")]
7769    pub wire_cutoff_time: String,
7770
7771    /// International wire settlement days.
7772    #[serde(default = "default_settlement_1")]
7773    pub wire_international_days: i32,
7774
7775    /// ACH settlement days.
7776    #[serde(default = "default_settlement_1")]
7777    pub ach_days: i32,
7778}
7779
7780fn default_settlement_1() -> i32 {
7781    1
7782}
7783
7784fn default_settlement_2() -> i32 {
7785    2
7786}
7787
7788fn default_wire_cutoff() -> String {
7789    "14:00".to_string()
7790}
7791
7792impl Default for SettlementRulesSchemaConfig {
7793    fn default() -> Self {
7794        Self {
7795            equity_days: 2,
7796            government_bonds_days: 1,
7797            fx_spot_days: 2,
7798            corporate_bonds_days: 2,
7799            wire_cutoff_time: "14:00".to_string(),
7800            wire_international_days: 1,
7801            ach_days: 1,
7802        }
7803    }
7804}
7805
7806/// Regional calendar configuration.
7807#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7808pub struct CalendarSchemaConfig {
7809    /// List of regions to include (e.g., ["US", "DE", "BR", "SG", "KR"]).
7810    #[serde(default)]
7811    pub regions: Vec<String>,
7812
7813    /// Custom holidays (in addition to regional calendars).
7814    #[serde(default)]
7815    pub custom_holidays: Vec<CustomHolidaySchemaConfig>,
7816}
7817
7818/// Custom holiday configuration.
7819#[derive(Debug, Clone, Serialize, Deserialize)]
7820pub struct CustomHolidaySchemaConfig {
7821    /// Holiday name.
7822    pub name: String,
7823    /// Month (1-12).
7824    pub month: u8,
7825    /// Day of month.
7826    pub day: u8,
7827    /// Activity multiplier (0.0-1.0, default 0.05).
7828    #[serde(default = "default_holiday_multiplier")]
7829    pub activity_multiplier: f64,
7830}
7831
7832fn default_holiday_multiplier() -> f64 {
7833    0.05
7834}
7835
7836/// Period-end dynamics configuration.
7837#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7838pub struct PeriodEndSchemaConfig {
7839    /// Model type: "flat", "exponential", "extended_crunch", "daily_profile".
7840    #[serde(default)]
7841    pub model: Option<String>,
7842
7843    /// Month-end configuration.
7844    #[serde(default)]
7845    pub month_end: Option<PeriodEndModelSchemaConfig>,
7846
7847    /// Quarter-end configuration.
7848    #[serde(default)]
7849    pub quarter_end: Option<PeriodEndModelSchemaConfig>,
7850
7851    /// Year-end configuration.
7852    #[serde(default)]
7853    pub year_end: Option<PeriodEndModelSchemaConfig>,
7854}
7855
7856/// Period-end model configuration.
7857#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7858pub struct PeriodEndModelSchemaConfig {
7859    /// Inherit configuration from another period (e.g., "month_end").
7860    #[serde(default)]
7861    pub inherit_from: Option<String>,
7862
7863    /// Additional multiplier on top of inherited/base model.
7864    #[serde(default)]
7865    pub additional_multiplier: Option<f64>,
7866
7867    /// Days before period end to start acceleration (negative, e.g., -10).
7868    #[serde(default)]
7869    pub start_day: Option<i32>,
7870
7871    /// Base multiplier at start of acceleration.
7872    #[serde(default)]
7873    pub base_multiplier: Option<f64>,
7874
7875    /// Peak multiplier on last day.
7876    #[serde(default)]
7877    pub peak_multiplier: Option<f64>,
7878
7879    /// Decay rate for exponential model (0.1-0.5 typical).
7880    #[serde(default)]
7881    pub decay_rate: Option<f64>,
7882
7883    /// Sustained high days for crunch model.
7884    #[serde(default)]
7885    pub sustained_high_days: Option<i32>,
7886}
7887
7888/// Processing lag configuration.
7889#[derive(Debug, Clone, Serialize, Deserialize)]
7890pub struct ProcessingLagSchemaConfig {
7891    /// Enable processing lag calculations.
7892    #[serde(default = "default_true")]
7893    pub enabled: bool,
7894
7895    /// Sales order lag configuration (log-normal mu, sigma).
7896    #[serde(default)]
7897    pub sales_order_lag: Option<LagDistributionSchemaConfig>,
7898
7899    /// Purchase order lag configuration.
7900    #[serde(default)]
7901    pub purchase_order_lag: Option<LagDistributionSchemaConfig>,
7902
7903    /// Goods receipt lag configuration.
7904    #[serde(default)]
7905    pub goods_receipt_lag: Option<LagDistributionSchemaConfig>,
7906
7907    /// Invoice receipt lag configuration.
7908    #[serde(default)]
7909    pub invoice_receipt_lag: Option<LagDistributionSchemaConfig>,
7910
7911    /// Invoice issue lag configuration.
7912    #[serde(default)]
7913    pub invoice_issue_lag: Option<LagDistributionSchemaConfig>,
7914
7915    /// Payment lag configuration.
7916    #[serde(default)]
7917    pub payment_lag: Option<LagDistributionSchemaConfig>,
7918
7919    /// Journal entry lag configuration.
7920    #[serde(default)]
7921    pub journal_entry_lag: Option<LagDistributionSchemaConfig>,
7922
7923    /// Cross-day posting configuration.
7924    #[serde(default)]
7925    pub cross_day_posting: Option<CrossDayPostingSchemaConfig>,
7926}
7927
7928impl Default for ProcessingLagSchemaConfig {
7929    fn default() -> Self {
7930        Self {
7931            enabled: true,
7932            sales_order_lag: None,
7933            purchase_order_lag: None,
7934            goods_receipt_lag: None,
7935            invoice_receipt_lag: None,
7936            invoice_issue_lag: None,
7937            payment_lag: None,
7938            journal_entry_lag: None,
7939            cross_day_posting: None,
7940        }
7941    }
7942}
7943
7944/// Lag distribution configuration (log-normal parameters).
7945#[derive(Debug, Clone, Serialize, Deserialize)]
7946pub struct LagDistributionSchemaConfig {
7947    /// Log-scale mean (mu for log-normal).
7948    pub mu: f64,
7949    /// Log-scale standard deviation (sigma for log-normal).
7950    pub sigma: f64,
7951    /// Minimum lag in hours.
7952    #[serde(default)]
7953    pub min_hours: Option<f64>,
7954    /// Maximum lag in hours.
7955    #[serde(default)]
7956    pub max_hours: Option<f64>,
7957}
7958
7959/// Cross-day posting configuration.
7960#[derive(Debug, Clone, Serialize, Deserialize)]
7961pub struct CrossDayPostingSchemaConfig {
7962    /// Enable cross-day posting logic.
7963    #[serde(default = "default_true")]
7964    pub enabled: bool,
7965
7966    /// Probability of next-day posting by hour (map of hour -> probability).
7967    /// E.g., { 17: 0.7, 19: 0.9, 21: 0.99 }
7968    #[serde(default)]
7969    pub probability_by_hour: std::collections::HashMap<u8, f64>,
7970}
7971
7972impl Default for CrossDayPostingSchemaConfig {
7973    fn default() -> Self {
7974        let mut probability_by_hour = std::collections::HashMap::new();
7975        probability_by_hour.insert(17, 0.3);
7976        probability_by_hour.insert(18, 0.6);
7977        probability_by_hour.insert(19, 0.8);
7978        probability_by_hour.insert(20, 0.9);
7979        probability_by_hour.insert(21, 0.95);
7980        probability_by_hour.insert(22, 0.99);
7981
7982        Self {
7983            enabled: true,
7984            probability_by_hour,
7985        }
7986    }
7987}
7988
7989// =============================================================================
7990// Fiscal Calendar Configuration (P2)
7991// =============================================================================
7992
7993/// Fiscal calendar configuration.
7994///
7995/// Supports calendar year, custom year start, 4-4-5 retail calendar,
7996/// and 13-period calendars.
7997#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7998pub struct FiscalCalendarSchemaConfig {
7999    /// Enable non-standard fiscal calendar.
8000    #[serde(default)]
8001    pub enabled: bool,
8002
8003    /// Fiscal calendar type: "calendar_year", "custom", "four_four_five", "thirteen_period".
8004    #[serde(default = "default_fiscal_calendar_type")]
8005    pub calendar_type: String,
8006
8007    /// Month the fiscal year starts (1-12). Used for custom year start.
8008    #[serde(default)]
8009    pub year_start_month: Option<u8>,
8010
8011    /// Day the fiscal year starts (1-31). Used for custom year start.
8012    #[serde(default)]
8013    pub year_start_day: Option<u8>,
8014
8015    /// 4-4-5 calendar configuration (if calendar_type is "four_four_five").
8016    #[serde(default)]
8017    pub four_four_five: Option<FourFourFiveSchemaConfig>,
8018}
8019
8020fn default_fiscal_calendar_type() -> String {
8021    "calendar_year".to_string()
8022}
8023
8024/// 4-4-5 retail calendar configuration.
8025#[derive(Debug, Clone, Serialize, Deserialize)]
8026pub struct FourFourFiveSchemaConfig {
8027    /// Week pattern: "four_four_five", "four_five_four", "five_four_four".
8028    #[serde(default = "default_week_pattern")]
8029    pub pattern: String,
8030
8031    /// Anchor type: "first_sunday", "last_saturday", "nearest_saturday".
8032    #[serde(default = "default_anchor_type")]
8033    pub anchor_type: String,
8034
8035    /// Anchor month (1-12).
8036    #[serde(default = "default_anchor_month")]
8037    pub anchor_month: u8,
8038
8039    /// Where to place leap week: "q4_period3" or "q1_period1".
8040    #[serde(default = "default_leap_week_placement")]
8041    pub leap_week_placement: String,
8042}
8043
8044fn default_week_pattern() -> String {
8045    "four_four_five".to_string()
8046}
8047
8048fn default_anchor_type() -> String {
8049    "last_saturday".to_string()
8050}
8051
8052fn default_anchor_month() -> u8 {
8053    1 // January
8054}
8055
8056fn default_leap_week_placement() -> String {
8057    "q4_period3".to_string()
8058}
8059
8060impl Default for FourFourFiveSchemaConfig {
8061    fn default() -> Self {
8062        Self {
8063            pattern: "four_four_five".to_string(),
8064            anchor_type: "last_saturday".to_string(),
8065            anchor_month: 1,
8066            leap_week_placement: "q4_period3".to_string(),
8067        }
8068    }
8069}
8070
8071// =============================================================================
8072// Intra-Day Patterns Configuration (P2)
8073// =============================================================================
8074
8075/// Intra-day patterns configuration.
8076///
8077/// Defines time-of-day segments with different activity multipliers
8078/// for realistic modeling of morning spikes, lunch dips, and end-of-day rushes.
8079#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8080pub struct IntraDaySchemaConfig {
8081    /// Enable intra-day patterns.
8082    #[serde(default)]
8083    pub enabled: bool,
8084
8085    /// Custom intra-day segments.
8086    #[serde(default)]
8087    pub segments: Vec<IntraDaySegmentSchemaConfig>,
8088}
8089
8090/// Intra-day segment configuration.
8091#[derive(Debug, Clone, Serialize, Deserialize)]
8092pub struct IntraDaySegmentSchemaConfig {
8093    /// Name of the segment (e.g., "morning_spike", "lunch_dip").
8094    pub name: String,
8095
8096    /// Start time (HH:MM format).
8097    pub start: String,
8098
8099    /// End time (HH:MM format).
8100    pub end: String,
8101
8102    /// Activity multiplier (1.0 = normal).
8103    #[serde(default = "default_multiplier")]
8104    pub multiplier: f64,
8105
8106    /// Posting type: "human", "system", "both".
8107    #[serde(default = "default_posting_type")]
8108    pub posting_type: String,
8109}
8110
8111fn default_multiplier() -> f64 {
8112    1.0
8113}
8114
8115fn default_posting_type() -> String {
8116    "both".to_string()
8117}
8118
8119// =============================================================================
8120// Timezone Configuration
8121// =============================================================================
8122
8123/// Timezone handling configuration for multi-region entities.
8124#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8125pub struct TimezoneSchemaConfig {
8126    /// Enable timezone handling.
8127    #[serde(default)]
8128    pub enabled: bool,
8129
8130    /// Default timezone (IANA format, e.g., "America/New_York").
8131    #[serde(default = "default_timezone")]
8132    pub default_timezone: String,
8133
8134    /// Consolidation timezone for group reporting (IANA format).
8135    #[serde(default = "default_consolidation_timezone")]
8136    pub consolidation_timezone: String,
8137
8138    /// Entity-to-timezone mappings.
8139    /// Supports patterns like "EU_*" -> "Europe/London".
8140    #[serde(default)]
8141    pub entity_mappings: Vec<EntityTimezoneMapping>,
8142}
8143
8144fn default_timezone() -> String {
8145    "America/New_York".to_string()
8146}
8147
8148fn default_consolidation_timezone() -> String {
8149    "UTC".to_string()
8150}
8151
8152/// Mapping from entity pattern to timezone.
8153#[derive(Debug, Clone, Serialize, Deserialize)]
8154pub struct EntityTimezoneMapping {
8155    /// Entity code pattern (e.g., "EU_*", "*_APAC", "1000").
8156    pub pattern: String,
8157
8158    /// Timezone (IANA format, e.g., "Europe/London").
8159    pub timezone: String,
8160}
8161
8162// =============================================================================
8163// Vendor Network Configuration
8164// =============================================================================
8165
8166/// Configuration for multi-tier vendor network generation.
8167#[derive(Debug, Clone, Serialize, Deserialize)]
8168pub struct VendorNetworkSchemaConfig {
8169    /// Enable vendor network generation.
8170    #[serde(default)]
8171    pub enabled: bool,
8172
8173    /// Maximum depth of supply chain tiers (1-3).
8174    #[serde(default = "default_vendor_tier_depth")]
8175    pub depth: u8,
8176
8177    /// Tier 1 vendor count configuration.
8178    #[serde(default)]
8179    pub tier1: TierCountSchemaConfig,
8180
8181    /// Tier 2 vendors per Tier 1 parent.
8182    #[serde(default)]
8183    pub tier2_per_parent: TierCountSchemaConfig,
8184
8185    /// Tier 3 vendors per Tier 2 parent.
8186    #[serde(default)]
8187    pub tier3_per_parent: TierCountSchemaConfig,
8188
8189    /// Vendor cluster distribution.
8190    #[serde(default)]
8191    pub clusters: VendorClusterSchemaConfig,
8192
8193    /// Concentration limits.
8194    #[serde(default)]
8195    pub dependencies: DependencySchemaConfig,
8196}
8197
8198fn default_vendor_tier_depth() -> u8 {
8199    3
8200}
8201
8202impl Default for VendorNetworkSchemaConfig {
8203    fn default() -> Self {
8204        Self {
8205            enabled: false,
8206            depth: 3,
8207            tier1: TierCountSchemaConfig { min: 50, max: 100 },
8208            tier2_per_parent: TierCountSchemaConfig { min: 4, max: 10 },
8209            tier3_per_parent: TierCountSchemaConfig { min: 2, max: 5 },
8210            clusters: VendorClusterSchemaConfig::default(),
8211            dependencies: DependencySchemaConfig::default(),
8212        }
8213    }
8214}
8215
8216/// Tier count configuration.
8217#[derive(Debug, Clone, Serialize, Deserialize)]
8218pub struct TierCountSchemaConfig {
8219    /// Minimum count.
8220    #[serde(default = "default_tier_min")]
8221    pub min: usize,
8222
8223    /// Maximum count.
8224    #[serde(default = "default_tier_max")]
8225    pub max: usize,
8226}
8227
8228fn default_tier_min() -> usize {
8229    5
8230}
8231
8232fn default_tier_max() -> usize {
8233    20
8234}
8235
8236impl Default for TierCountSchemaConfig {
8237    fn default() -> Self {
8238        Self {
8239            min: default_tier_min(),
8240            max: default_tier_max(),
8241        }
8242    }
8243}
8244
8245/// Vendor cluster distribution configuration.
8246#[derive(Debug, Clone, Serialize, Deserialize)]
8247pub struct VendorClusterSchemaConfig {
8248    /// Reliable strategic vendors percentage (default: 0.20).
8249    #[serde(default = "default_reliable_strategic")]
8250    pub reliable_strategic: f64,
8251
8252    /// Standard operational vendors percentage (default: 0.50).
8253    #[serde(default = "default_standard_operational")]
8254    pub standard_operational: f64,
8255
8256    /// Transactional vendors percentage (default: 0.25).
8257    #[serde(default = "default_transactional")]
8258    pub transactional: f64,
8259
8260    /// Problematic vendors percentage (default: 0.05).
8261    #[serde(default = "default_problematic")]
8262    pub problematic: f64,
8263}
8264
8265fn default_reliable_strategic() -> f64 {
8266    0.20
8267}
8268
8269fn default_standard_operational() -> f64 {
8270    0.50
8271}
8272
8273fn default_transactional() -> f64 {
8274    0.25
8275}
8276
8277fn default_problematic() -> f64 {
8278    0.05
8279}
8280
8281impl Default for VendorClusterSchemaConfig {
8282    fn default() -> Self {
8283        Self {
8284            reliable_strategic: 0.20,
8285            standard_operational: 0.50,
8286            transactional: 0.25,
8287            problematic: 0.05,
8288        }
8289    }
8290}
8291
8292/// Dependency and concentration limits configuration.
8293#[derive(Debug, Clone, Serialize, Deserialize)]
8294pub struct DependencySchemaConfig {
8295    /// Maximum concentration for a single vendor (default: 0.15).
8296    #[serde(default = "default_max_single_vendor")]
8297    pub max_single_vendor_concentration: f64,
8298
8299    /// Maximum concentration for top 5 vendors (default: 0.45).
8300    #[serde(default = "default_max_top5")]
8301    pub top_5_concentration: f64,
8302
8303    /// Percentage of single-source vendors (default: 0.05).
8304    #[serde(default = "default_single_source_percent")]
8305    pub single_source_percent: f64,
8306}
8307
8308fn default_max_single_vendor() -> f64 {
8309    0.15
8310}
8311
8312fn default_max_top5() -> f64 {
8313    0.45
8314}
8315
8316fn default_single_source_percent() -> f64 {
8317    0.05
8318}
8319
8320impl Default for DependencySchemaConfig {
8321    fn default() -> Self {
8322        Self {
8323            max_single_vendor_concentration: 0.15,
8324            top_5_concentration: 0.45,
8325            single_source_percent: 0.05,
8326        }
8327    }
8328}
8329
8330// =============================================================================
8331// Customer Segmentation Configuration
8332// =============================================================================
8333
8334/// Configuration for customer segmentation generation.
8335#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8336pub struct CustomerSegmentationSchemaConfig {
8337    /// Enable customer segmentation generation.
8338    #[serde(default)]
8339    pub enabled: bool,
8340
8341    /// Value segment distribution.
8342    #[serde(default)]
8343    pub value_segments: ValueSegmentsSchemaConfig,
8344
8345    /// Lifecycle stage configuration.
8346    #[serde(default)]
8347    pub lifecycle: LifecycleSchemaConfig,
8348
8349    /// Network (referrals, hierarchies) configuration.
8350    #[serde(default)]
8351    pub networks: CustomerNetworksSchemaConfig,
8352}
8353
8354/// Customer value segments distribution configuration.
8355#[derive(Debug, Clone, Serialize, Deserialize)]
8356pub struct ValueSegmentsSchemaConfig {
8357    /// Enterprise segment configuration.
8358    #[serde(default)]
8359    pub enterprise: SegmentDetailSchemaConfig,
8360
8361    /// Mid-market segment configuration.
8362    #[serde(default)]
8363    pub mid_market: SegmentDetailSchemaConfig,
8364
8365    /// SMB segment configuration.
8366    #[serde(default)]
8367    pub smb: SegmentDetailSchemaConfig,
8368
8369    /// Consumer segment configuration.
8370    #[serde(default)]
8371    pub consumer: SegmentDetailSchemaConfig,
8372}
8373
8374impl Default for ValueSegmentsSchemaConfig {
8375    fn default() -> Self {
8376        Self {
8377            enterprise: SegmentDetailSchemaConfig {
8378                revenue_share: 0.40,
8379                customer_share: 0.05,
8380                avg_order_value_range: "50000+".to_string(),
8381            },
8382            mid_market: SegmentDetailSchemaConfig {
8383                revenue_share: 0.35,
8384                customer_share: 0.20,
8385                avg_order_value_range: "5000-50000".to_string(),
8386            },
8387            smb: SegmentDetailSchemaConfig {
8388                revenue_share: 0.20,
8389                customer_share: 0.50,
8390                avg_order_value_range: "500-5000".to_string(),
8391            },
8392            consumer: SegmentDetailSchemaConfig {
8393                revenue_share: 0.05,
8394                customer_share: 0.25,
8395                avg_order_value_range: "50-500".to_string(),
8396            },
8397        }
8398    }
8399}
8400
8401/// Individual segment detail configuration.
8402#[derive(Debug, Clone, Serialize, Deserialize)]
8403pub struct SegmentDetailSchemaConfig {
8404    /// Revenue share for this segment.
8405    #[serde(default)]
8406    pub revenue_share: f64,
8407
8408    /// Customer share for this segment.
8409    #[serde(default)]
8410    pub customer_share: f64,
8411
8412    /// Average order value range (e.g., "5000-50000" or "50000+").
8413    #[serde(default)]
8414    pub avg_order_value_range: String,
8415}
8416
8417impl Default for SegmentDetailSchemaConfig {
8418    fn default() -> Self {
8419        Self {
8420            revenue_share: 0.25,
8421            customer_share: 0.25,
8422            avg_order_value_range: "1000-10000".to_string(),
8423        }
8424    }
8425}
8426
8427/// Customer lifecycle stage configuration.
8428#[derive(Debug, Clone, Serialize, Deserialize)]
8429pub struct LifecycleSchemaConfig {
8430    /// Prospect stage rate.
8431    #[serde(default)]
8432    pub prospect_rate: f64,
8433
8434    /// New customer stage rate.
8435    #[serde(default = "default_new_rate")]
8436    pub new_rate: f64,
8437
8438    /// Growth stage rate.
8439    #[serde(default = "default_growth_rate")]
8440    pub growth_rate: f64,
8441
8442    /// Mature stage rate.
8443    #[serde(default = "default_mature_rate")]
8444    pub mature_rate: f64,
8445
8446    /// At-risk stage rate.
8447    #[serde(default = "default_at_risk_rate")]
8448    pub at_risk_rate: f64,
8449
8450    /// Churned stage rate.
8451    #[serde(default = "default_churned_rate")]
8452    pub churned_rate: f64,
8453
8454    /// Won-back stage rate (churned customers reacquired).
8455    #[serde(default)]
8456    pub won_back_rate: f64,
8457}
8458
8459fn default_new_rate() -> f64 {
8460    0.10
8461}
8462
8463fn default_growth_rate() -> f64 {
8464    0.15
8465}
8466
8467fn default_mature_rate() -> f64 {
8468    0.60
8469}
8470
8471fn default_at_risk_rate() -> f64 {
8472    0.10
8473}
8474
8475fn default_churned_rate() -> f64 {
8476    0.05
8477}
8478
8479impl Default for LifecycleSchemaConfig {
8480    fn default() -> Self {
8481        Self {
8482            prospect_rate: 0.0,
8483            new_rate: 0.10,
8484            growth_rate: 0.15,
8485            mature_rate: 0.60,
8486            at_risk_rate: 0.10,
8487            churned_rate: 0.05,
8488            won_back_rate: 0.0,
8489        }
8490    }
8491}
8492
8493/// Customer networks configuration (referrals, hierarchies).
8494#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8495pub struct CustomerNetworksSchemaConfig {
8496    /// Referral network configuration.
8497    #[serde(default)]
8498    pub referrals: ReferralSchemaConfig,
8499
8500    /// Corporate hierarchy configuration.
8501    #[serde(default)]
8502    pub corporate_hierarchies: HierarchySchemaConfig,
8503}
8504
8505/// Referral network configuration.
8506#[derive(Debug, Clone, Serialize, Deserialize)]
8507pub struct ReferralSchemaConfig {
8508    /// Enable referral generation.
8509    #[serde(default = "default_true")]
8510    pub enabled: bool,
8511
8512    /// Rate of customers acquired via referral.
8513    #[serde(default = "default_referral_rate")]
8514    pub referral_rate: f64,
8515}
8516
8517fn default_referral_rate() -> f64 {
8518    0.15
8519}
8520
8521impl Default for ReferralSchemaConfig {
8522    fn default() -> Self {
8523        Self {
8524            enabled: true,
8525            referral_rate: 0.15,
8526        }
8527    }
8528}
8529
8530/// Corporate hierarchy configuration.
8531#[derive(Debug, Clone, Serialize, Deserialize)]
8532pub struct HierarchySchemaConfig {
8533    /// Enable corporate hierarchy generation.
8534    #[serde(default = "default_true")]
8535    pub enabled: bool,
8536
8537    /// Rate of customers in hierarchies.
8538    #[serde(default = "default_hierarchy_rate")]
8539    pub probability: f64,
8540}
8541
8542fn default_hierarchy_rate() -> f64 {
8543    0.30
8544}
8545
8546impl Default for HierarchySchemaConfig {
8547    fn default() -> Self {
8548        Self {
8549            enabled: true,
8550            probability: 0.30,
8551        }
8552    }
8553}
8554
8555// =============================================================================
8556// Relationship Strength Configuration
8557// =============================================================================
8558
8559/// Configuration for relationship strength calculation.
8560#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8561pub struct RelationshipStrengthSchemaConfig {
8562    /// Enable relationship strength calculation.
8563    #[serde(default)]
8564    pub enabled: bool,
8565
8566    /// Calculation weights.
8567    #[serde(default)]
8568    pub calculation: StrengthCalculationSchemaConfig,
8569
8570    /// Strength thresholds for classification.
8571    #[serde(default)]
8572    pub thresholds: StrengthThresholdsSchemaConfig,
8573}
8574
8575/// Strength calculation weights configuration.
8576#[derive(Debug, Clone, Serialize, Deserialize)]
8577pub struct StrengthCalculationSchemaConfig {
8578    /// Weight for transaction volume (default: 0.30).
8579    #[serde(default = "default_volume_weight")]
8580    pub transaction_volume_weight: f64,
8581
8582    /// Weight for transaction count (default: 0.25).
8583    #[serde(default = "default_count_weight")]
8584    pub transaction_count_weight: f64,
8585
8586    /// Weight for relationship duration (default: 0.20).
8587    #[serde(default = "default_duration_weight")]
8588    pub relationship_duration_weight: f64,
8589
8590    /// Weight for recency (default: 0.15).
8591    #[serde(default = "default_recency_weight")]
8592    pub recency_weight: f64,
8593
8594    /// Weight for mutual connections (default: 0.10).
8595    #[serde(default = "default_mutual_weight")]
8596    pub mutual_connections_weight: f64,
8597
8598    /// Recency half-life in days (default: 90).
8599    #[serde(default = "default_recency_half_life")]
8600    pub recency_half_life_days: u32,
8601}
8602
8603fn default_volume_weight() -> f64 {
8604    0.30
8605}
8606
8607fn default_count_weight() -> f64 {
8608    0.25
8609}
8610
8611fn default_duration_weight() -> f64 {
8612    0.20
8613}
8614
8615fn default_recency_weight() -> f64 {
8616    0.15
8617}
8618
8619fn default_mutual_weight() -> f64 {
8620    0.10
8621}
8622
8623fn default_recency_half_life() -> u32 {
8624    90
8625}
8626
8627impl Default for StrengthCalculationSchemaConfig {
8628    fn default() -> Self {
8629        Self {
8630            transaction_volume_weight: 0.30,
8631            transaction_count_weight: 0.25,
8632            relationship_duration_weight: 0.20,
8633            recency_weight: 0.15,
8634            mutual_connections_weight: 0.10,
8635            recency_half_life_days: 90,
8636        }
8637    }
8638}
8639
8640/// Strength thresholds for relationship classification.
8641#[derive(Debug, Clone, Serialize, Deserialize)]
8642pub struct StrengthThresholdsSchemaConfig {
8643    /// Threshold for strong relationships (default: 0.7).
8644    #[serde(default = "default_strong_threshold")]
8645    pub strong: f64,
8646
8647    /// Threshold for moderate relationships (default: 0.4).
8648    #[serde(default = "default_moderate_threshold")]
8649    pub moderate: f64,
8650
8651    /// Threshold for weak relationships (default: 0.1).
8652    #[serde(default = "default_weak_threshold")]
8653    pub weak: f64,
8654}
8655
8656fn default_strong_threshold() -> f64 {
8657    0.7
8658}
8659
8660fn default_moderate_threshold() -> f64 {
8661    0.4
8662}
8663
8664fn default_weak_threshold() -> f64 {
8665    0.1
8666}
8667
8668impl Default for StrengthThresholdsSchemaConfig {
8669    fn default() -> Self {
8670        Self {
8671            strong: 0.7,
8672            moderate: 0.4,
8673            weak: 0.1,
8674        }
8675    }
8676}
8677
8678// =============================================================================
8679// Cross-Process Links Configuration
8680// =============================================================================
8681
8682/// Configuration for cross-process linkages.
8683#[derive(Debug, Clone, Serialize, Deserialize)]
8684pub struct CrossProcessLinksSchemaConfig {
8685    /// Enable cross-process link generation.
8686    #[serde(default)]
8687    pub enabled: bool,
8688
8689    /// Enable inventory links between P2P and O2C.
8690    #[serde(default = "default_true")]
8691    pub inventory_p2p_o2c: bool,
8692
8693    /// Enable payment to bank reconciliation links.
8694    #[serde(default = "default_true")]
8695    pub payment_bank_reconciliation: bool,
8696
8697    /// Enable intercompany bilateral matching.
8698    #[serde(default = "default_true")]
8699    pub intercompany_bilateral: bool,
8700
8701    /// Percentage of GR/Deliveries to link via inventory (0.0 - 1.0).
8702    #[serde(default = "default_inventory_link_rate")]
8703    pub inventory_link_rate: f64,
8704}
8705
8706fn default_inventory_link_rate() -> f64 {
8707    0.30
8708}
8709
8710impl Default for CrossProcessLinksSchemaConfig {
8711    fn default() -> Self {
8712        Self {
8713            enabled: false,
8714            inventory_p2p_o2c: true,
8715            payment_bank_reconciliation: true,
8716            intercompany_bilateral: true,
8717            inventory_link_rate: 0.30,
8718        }
8719    }
8720}
8721
8722// =============================================================================
8723// Organizational Events Configuration
8724// =============================================================================
8725
8726/// Configuration for organizational events (acquisitions, divestitures, etc.).
8727#[derive(Debug, Clone, Serialize, Deserialize, Default)]
8728pub struct OrganizationalEventsSchemaConfig {
8729    /// Enable organizational events.
8730    #[serde(default)]
8731    pub enabled: bool,
8732
8733    /// Effect blending mode (multiplicative, additive, maximum, minimum).
8734    #[serde(default)]
8735    pub effect_blending: EffectBlendingModeConfig,
8736
8737    /// Organizational events (acquisitions, divestitures, reorganizations, etc.).
8738    #[serde(default)]
8739    pub events: Vec<OrganizationalEventSchemaConfig>,
8740
8741    /// Process evolution events.
8742    #[serde(default)]
8743    pub process_evolution: Vec<ProcessEvolutionSchemaConfig>,
8744
8745    /// Technology transition events.
8746    #[serde(default)]
8747    pub technology_transitions: Vec<TechnologyTransitionSchemaConfig>,
8748}
8749
8750/// Effect blending mode for combining multiple event effects.
8751#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
8752#[serde(rename_all = "snake_case")]
8753pub enum EffectBlendingModeConfig {
8754    /// Multiply effects together.
8755    #[default]
8756    Multiplicative,
8757    /// Add effects together.
8758    Additive,
8759    /// Take the maximum effect.
8760    Maximum,
8761    /// Take the minimum effect.
8762    Minimum,
8763}
8764
8765/// Configuration for a single organizational event.
8766#[derive(Debug, Clone, Serialize, Deserialize)]
8767pub struct OrganizationalEventSchemaConfig {
8768    /// Event ID.
8769    pub id: String,
8770
8771    /// Event type and configuration.
8772    pub event_type: OrganizationalEventTypeSchemaConfig,
8773
8774    /// Effective date.
8775    pub effective_date: String,
8776
8777    /// Transition duration in months.
8778    #[serde(default = "default_org_transition_months")]
8779    pub transition_months: u32,
8780
8781    /// Description.
8782    #[serde(default)]
8783    pub description: Option<String>,
8784}
8785
8786fn default_org_transition_months() -> u32 {
8787    6
8788}
8789
8790/// Organizational event type configuration.
8791#[derive(Debug, Clone, Serialize, Deserialize)]
8792#[serde(tag = "type", rename_all = "snake_case")]
8793pub enum OrganizationalEventTypeSchemaConfig {
8794    /// Acquisition event.
8795    Acquisition {
8796        /// Acquired entity code.
8797        acquired_entity: String,
8798        /// Volume increase multiplier.
8799        #[serde(default = "default_acquisition_volume")]
8800        volume_increase: f64,
8801        /// Integration error rate.
8802        #[serde(default = "default_acquisition_error")]
8803        integration_error_rate: f64,
8804        /// Parallel posting days.
8805        #[serde(default = "default_parallel_days")]
8806        parallel_posting_days: u32,
8807    },
8808    /// Divestiture event.
8809    Divestiture {
8810        /// Divested entity code.
8811        divested_entity: String,
8812        /// Volume reduction factor.
8813        #[serde(default = "default_divestiture_volume")]
8814        volume_reduction: f64,
8815        /// Remove entity from generation.
8816        #[serde(default = "default_true_val")]
8817        remove_entity: bool,
8818    },
8819    /// Reorganization event.
8820    Reorganization {
8821        /// Cost center remapping.
8822        #[serde(default)]
8823        cost_center_remapping: std::collections::HashMap<String, String>,
8824        /// Transition error rate.
8825        #[serde(default = "default_reorg_error")]
8826        transition_error_rate: f64,
8827    },
8828    /// Leadership change event.
8829    LeadershipChange {
8830        /// Role that changed.
8831        role: String,
8832        /// Policy changes.
8833        #[serde(default)]
8834        policy_changes: Vec<String>,
8835    },
8836    /// Workforce reduction event.
8837    WorkforceReduction {
8838        /// Reduction percentage.
8839        #[serde(default = "default_workforce_reduction")]
8840        reduction_percent: f64,
8841        /// Error rate increase.
8842        #[serde(default = "default_workforce_error")]
8843        error_rate_increase: f64,
8844    },
8845    /// Merger event.
8846    Merger {
8847        /// Merged entity code.
8848        merged_entity: String,
8849        /// Volume increase multiplier.
8850        #[serde(default = "default_merger_volume")]
8851        volume_increase: f64,
8852    },
8853}
8854
8855fn default_acquisition_volume() -> f64 {
8856    1.35
8857}
8858
8859fn default_acquisition_error() -> f64 {
8860    0.05
8861}
8862
8863fn default_parallel_days() -> u32 {
8864    30
8865}
8866
8867fn default_divestiture_volume() -> f64 {
8868    0.70
8869}
8870
8871fn default_true_val() -> bool {
8872    true
8873}
8874
8875fn default_reorg_error() -> f64 {
8876    0.04
8877}
8878
8879fn default_workforce_reduction() -> f64 {
8880    0.10
8881}
8882
8883fn default_workforce_error() -> f64 {
8884    0.05
8885}
8886
8887fn default_merger_volume() -> f64 {
8888    1.80
8889}
8890
8891/// Configuration for a process evolution event.
8892#[derive(Debug, Clone, Serialize, Deserialize)]
8893pub struct ProcessEvolutionSchemaConfig {
8894    /// Event ID.
8895    pub id: String,
8896
8897    /// Event type.
8898    pub event_type: ProcessEvolutionTypeSchemaConfig,
8899
8900    /// Effective date.
8901    pub effective_date: String,
8902
8903    /// Description.
8904    #[serde(default)]
8905    pub description: Option<String>,
8906}
8907
8908/// Process evolution type configuration.
8909#[derive(Debug, Clone, Serialize, Deserialize)]
8910#[serde(tag = "type", rename_all = "snake_case")]
8911pub enum ProcessEvolutionTypeSchemaConfig {
8912    /// Process automation.
8913    ProcessAutomation {
8914        /// Process name.
8915        process_name: String,
8916        /// Manual rate before.
8917        #[serde(default = "default_manual_before")]
8918        manual_rate_before: f64,
8919        /// Manual rate after.
8920        #[serde(default = "default_manual_after")]
8921        manual_rate_after: f64,
8922    },
8923    /// Approval workflow change.
8924    ApprovalWorkflowChange {
8925        /// Description.
8926        description: String,
8927    },
8928    /// Control enhancement.
8929    ControlEnhancement {
8930        /// Control ID.
8931        control_id: String,
8932        /// Error reduction.
8933        #[serde(default = "default_error_reduction")]
8934        error_reduction: f64,
8935    },
8936}
8937
8938fn default_manual_before() -> f64 {
8939    0.80
8940}
8941
8942fn default_manual_after() -> f64 {
8943    0.15
8944}
8945
8946fn default_error_reduction() -> f64 {
8947    0.02
8948}
8949
8950/// Configuration for a technology transition event.
8951#[derive(Debug, Clone, Serialize, Deserialize)]
8952pub struct TechnologyTransitionSchemaConfig {
8953    /// Event ID.
8954    pub id: String,
8955
8956    /// Event type.
8957    pub event_type: TechnologyTransitionTypeSchemaConfig,
8958
8959    /// Description.
8960    #[serde(default)]
8961    pub description: Option<String>,
8962}
8963
8964/// Technology transition type configuration.
8965#[derive(Debug, Clone, Serialize, Deserialize)]
8966#[serde(tag = "type", rename_all = "snake_case")]
8967pub enum TechnologyTransitionTypeSchemaConfig {
8968    /// ERP migration.
8969    ErpMigration {
8970        /// Source system.
8971        source_system: String,
8972        /// Target system.
8973        target_system: String,
8974        /// Cutover date.
8975        cutover_date: String,
8976        /// Stabilization end date.
8977        stabilization_end: String,
8978        /// Duplicate rate during migration.
8979        #[serde(default = "default_erp_duplicate_rate")]
8980        duplicate_rate: f64,
8981        /// Format mismatch rate.
8982        #[serde(default = "default_format_mismatch")]
8983        format_mismatch_rate: f64,
8984    },
8985    /// Module implementation.
8986    ModuleImplementation {
8987        /// Module name.
8988        module_name: String,
8989        /// Go-live date.
8990        go_live_date: String,
8991    },
8992}
8993
8994fn default_erp_duplicate_rate() -> f64 {
8995    0.02
8996}
8997
8998fn default_format_mismatch() -> f64 {
8999    0.03
9000}
9001
9002// =============================================================================
9003// Behavioral Drift Configuration
9004// =============================================================================
9005
9006/// Configuration for behavioral drift (vendor, customer, employee behavior).
9007///
9008/// **Deprecated (v4.1.2):** this schema section is currently
9009/// validated-but-inert — no runtime code consumes its fields. Users
9010/// who want behavioral drift-style effects should reach for
9011/// `distributions.regime_changes` (v3.5.2+), which drives the
9012/// `DriftController` via the parameter-drift path. The schema type
9013/// remains for backward-compatible YAML loading; it will be removed
9014/// in a future major version once `regime_changes` gains per-entity
9015/// (vendor / customer / employee) targeting.
9016#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9017pub struct BehavioralDriftSchemaConfig {
9018    /// Enable behavioral drift.
9019    #[serde(default)]
9020    pub enabled: bool,
9021
9022    /// Vendor behavior drift.
9023    #[serde(default)]
9024    pub vendor_behavior: VendorBehaviorSchemaConfig,
9025
9026    /// Customer behavior drift.
9027    #[serde(default)]
9028    pub customer_behavior: CustomerBehaviorSchemaConfig,
9029
9030    /// Employee behavior drift.
9031    #[serde(default)]
9032    pub employee_behavior: EmployeeBehaviorSchemaConfig,
9033
9034    /// Collective behavior drift.
9035    #[serde(default)]
9036    pub collective: CollectiveBehaviorSchemaConfig,
9037}
9038
9039/// Vendor behavior drift configuration.
9040#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9041pub struct VendorBehaviorSchemaConfig {
9042    /// Payment terms drift.
9043    #[serde(default)]
9044    pub payment_terms_drift: PaymentTermsDriftSchemaConfig,
9045
9046    /// Quality drift.
9047    #[serde(default)]
9048    pub quality_drift: QualityDriftSchemaConfig,
9049}
9050
9051/// Payment terms drift configuration.
9052#[derive(Debug, Clone, Serialize, Deserialize)]
9053pub struct PaymentTermsDriftSchemaConfig {
9054    /// Extension rate per year (days).
9055    #[serde(default = "default_extension_rate")]
9056    pub extension_rate_per_year: f64,
9057
9058    /// Economic sensitivity.
9059    #[serde(default = "default_economic_sensitivity")]
9060    pub economic_sensitivity: f64,
9061}
9062
9063fn default_extension_rate() -> f64 {
9064    2.5
9065}
9066
9067fn default_economic_sensitivity() -> f64 {
9068    1.0
9069}
9070
9071impl Default for PaymentTermsDriftSchemaConfig {
9072    fn default() -> Self {
9073        Self {
9074            extension_rate_per_year: 2.5,
9075            economic_sensitivity: 1.0,
9076        }
9077    }
9078}
9079
9080/// Quality drift configuration.
9081#[derive(Debug, Clone, Serialize, Deserialize)]
9082pub struct QualityDriftSchemaConfig {
9083    /// New vendor improvement rate (per year).
9084    #[serde(default = "default_improvement_rate")]
9085    pub new_vendor_improvement_rate: f64,
9086
9087    /// Complacency decline rate (per year after first year).
9088    #[serde(default = "default_decline_rate")]
9089    pub complacency_decline_rate: f64,
9090}
9091
9092fn default_improvement_rate() -> f64 {
9093    0.02
9094}
9095
9096fn default_decline_rate() -> f64 {
9097    0.01
9098}
9099
9100impl Default for QualityDriftSchemaConfig {
9101    fn default() -> Self {
9102        Self {
9103            new_vendor_improvement_rate: 0.02,
9104            complacency_decline_rate: 0.01,
9105        }
9106    }
9107}
9108
9109/// Customer behavior drift configuration.
9110#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9111pub struct CustomerBehaviorSchemaConfig {
9112    /// Payment drift.
9113    #[serde(default)]
9114    pub payment_drift: CustomerPaymentDriftSchemaConfig,
9115
9116    /// Order drift.
9117    #[serde(default)]
9118    pub order_drift: OrderDriftSchemaConfig,
9119}
9120
9121/// Customer payment drift configuration.
9122#[derive(Debug, Clone, Serialize, Deserialize)]
9123pub struct CustomerPaymentDriftSchemaConfig {
9124    /// Days extension during downturn (min, max).
9125    #[serde(default = "default_downturn_extension")]
9126    pub downturn_days_extension: (u32, u32),
9127
9128    /// Bad debt increase during downturn.
9129    #[serde(default = "default_bad_debt_increase")]
9130    pub downturn_bad_debt_increase: f64,
9131}
9132
9133fn default_downturn_extension() -> (u32, u32) {
9134    (5, 15)
9135}
9136
9137fn default_bad_debt_increase() -> f64 {
9138    0.02
9139}
9140
9141impl Default for CustomerPaymentDriftSchemaConfig {
9142    fn default() -> Self {
9143        Self {
9144            downturn_days_extension: (5, 15),
9145            downturn_bad_debt_increase: 0.02,
9146        }
9147    }
9148}
9149
9150/// Order drift configuration.
9151#[derive(Debug, Clone, Serialize, Deserialize)]
9152pub struct OrderDriftSchemaConfig {
9153    /// Digital shift rate (per year).
9154    #[serde(default = "default_digital_shift")]
9155    pub digital_shift_rate: f64,
9156}
9157
9158fn default_digital_shift() -> f64 {
9159    0.05
9160}
9161
9162impl Default for OrderDriftSchemaConfig {
9163    fn default() -> Self {
9164        Self {
9165            digital_shift_rate: 0.05,
9166        }
9167    }
9168}
9169
9170/// Employee behavior drift configuration.
9171#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9172pub struct EmployeeBehaviorSchemaConfig {
9173    /// Approval drift.
9174    #[serde(default)]
9175    pub approval_drift: ApprovalDriftSchemaConfig,
9176
9177    /// Error drift.
9178    #[serde(default)]
9179    pub error_drift: ErrorDriftSchemaConfig,
9180}
9181
9182/// Approval drift configuration.
9183#[derive(Debug, Clone, Serialize, Deserialize)]
9184pub struct ApprovalDriftSchemaConfig {
9185    /// EOM intensity increase per year.
9186    #[serde(default = "default_eom_intensity")]
9187    pub eom_intensity_increase_per_year: f64,
9188
9189    /// Rubber stamp volume threshold.
9190    #[serde(default = "default_rubber_stamp")]
9191    pub rubber_stamp_volume_threshold: u32,
9192}
9193
9194fn default_eom_intensity() -> f64 {
9195    0.05
9196}
9197
9198fn default_rubber_stamp() -> u32 {
9199    50
9200}
9201
9202impl Default for ApprovalDriftSchemaConfig {
9203    fn default() -> Self {
9204        Self {
9205            eom_intensity_increase_per_year: 0.05,
9206            rubber_stamp_volume_threshold: 50,
9207        }
9208    }
9209}
9210
9211/// Error drift configuration.
9212#[derive(Debug, Clone, Serialize, Deserialize)]
9213pub struct ErrorDriftSchemaConfig {
9214    /// New employee error rate.
9215    #[serde(default = "default_new_error")]
9216    pub new_employee_error_rate: f64,
9217
9218    /// Learning curve months.
9219    #[serde(default = "default_learning_months")]
9220    pub learning_curve_months: u32,
9221}
9222
9223fn default_new_error() -> f64 {
9224    0.08
9225}
9226
9227fn default_learning_months() -> u32 {
9228    6
9229}
9230
9231impl Default for ErrorDriftSchemaConfig {
9232    fn default() -> Self {
9233        Self {
9234            new_employee_error_rate: 0.08,
9235            learning_curve_months: 6,
9236        }
9237    }
9238}
9239
9240/// Collective behavior drift configuration.
9241#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9242pub struct CollectiveBehaviorSchemaConfig {
9243    /// Automation adoption configuration.
9244    #[serde(default)]
9245    pub automation_adoption: AutomationAdoptionSchemaConfig,
9246}
9247
9248/// Automation adoption configuration.
9249#[derive(Debug, Clone, Serialize, Deserialize)]
9250pub struct AutomationAdoptionSchemaConfig {
9251    /// Enable S-curve adoption model.
9252    #[serde(default)]
9253    pub s_curve_enabled: bool,
9254
9255    /// Adoption midpoint in months.
9256    #[serde(default = "default_midpoint")]
9257    pub adoption_midpoint_months: u32,
9258
9259    /// Steepness of adoption curve.
9260    #[serde(default = "default_steepness")]
9261    pub steepness: f64,
9262}
9263
9264fn default_midpoint() -> u32 {
9265    24
9266}
9267
9268fn default_steepness() -> f64 {
9269    0.15
9270}
9271
9272impl Default for AutomationAdoptionSchemaConfig {
9273    fn default() -> Self {
9274        Self {
9275            s_curve_enabled: false,
9276            adoption_midpoint_months: 24,
9277            steepness: 0.15,
9278        }
9279    }
9280}
9281
9282// =============================================================================
9283// Market Drift Configuration
9284// =============================================================================
9285
9286/// Configuration for market drift (economic cycles, commodities, price shocks).
9287///
9288/// **Deprecated (v4.1.2):** validated-but-inert. Use
9289/// `distributions.regime_changes.economic_cycle` +
9290/// `distributions.regime_changes.parameter_drifts` for the
9291/// equivalent runtime behaviour (shipped in v3.5.2). The schema
9292/// type remains for backward-compatible YAML loading; will be
9293/// removed in v5.0.
9294#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9295pub struct MarketDriftSchemaConfig {
9296    /// Enable market drift.
9297    #[serde(default)]
9298    pub enabled: bool,
9299
9300    /// Economic cycle configuration.
9301    #[serde(default)]
9302    pub economic_cycle: MarketEconomicCycleSchemaConfig,
9303
9304    /// Industry-specific cycles.
9305    #[serde(default)]
9306    pub industry_cycles: std::collections::HashMap<String, IndustryCycleSchemaConfig>,
9307
9308    /// Commodity drift configuration.
9309    #[serde(default)]
9310    pub commodities: CommoditiesSchemaConfig,
9311}
9312
9313/// Market economic cycle configuration.
9314#[derive(Debug, Clone, Serialize, Deserialize)]
9315pub struct MarketEconomicCycleSchemaConfig {
9316    /// Enable economic cycle.
9317    #[serde(default)]
9318    pub enabled: bool,
9319
9320    /// Cycle type.
9321    #[serde(default)]
9322    pub cycle_type: CycleTypeSchemaConfig,
9323
9324    /// Cycle period in months.
9325    #[serde(default = "default_market_cycle_period")]
9326    pub period_months: u32,
9327
9328    /// Amplitude.
9329    #[serde(default = "default_market_amplitude")]
9330    pub amplitude: f64,
9331
9332    /// Recession configuration.
9333    #[serde(default)]
9334    pub recession: RecessionSchemaConfig,
9335}
9336
9337fn default_market_cycle_period() -> u32 {
9338    48
9339}
9340
9341fn default_market_amplitude() -> f64 {
9342    0.15
9343}
9344
9345impl Default for MarketEconomicCycleSchemaConfig {
9346    fn default() -> Self {
9347        Self {
9348            enabled: false,
9349            cycle_type: CycleTypeSchemaConfig::Sinusoidal,
9350            period_months: 48,
9351            amplitude: 0.15,
9352            recession: RecessionSchemaConfig::default(),
9353        }
9354    }
9355}
9356
9357/// Cycle type configuration.
9358#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9359#[serde(rename_all = "snake_case")]
9360pub enum CycleTypeSchemaConfig {
9361    /// Sinusoidal cycle.
9362    #[default]
9363    Sinusoidal,
9364    /// Asymmetric cycle.
9365    Asymmetric,
9366    /// Mean-reverting cycle.
9367    MeanReverting,
9368}
9369
9370/// Recession configuration.
9371#[derive(Debug, Clone, Serialize, Deserialize)]
9372pub struct RecessionSchemaConfig {
9373    /// Enable recession simulation.
9374    #[serde(default)]
9375    pub enabled: bool,
9376
9377    /// Probability per year.
9378    #[serde(default = "default_recession_prob")]
9379    pub probability_per_year: f64,
9380
9381    /// Severity.
9382    #[serde(default)]
9383    pub severity: RecessionSeveritySchemaConfig,
9384
9385    /// Specific recession periods.
9386    #[serde(default)]
9387    pub recession_periods: Vec<RecessionPeriodSchemaConfig>,
9388}
9389
9390fn default_recession_prob() -> f64 {
9391    0.10
9392}
9393
9394impl Default for RecessionSchemaConfig {
9395    fn default() -> Self {
9396        Self {
9397            enabled: false,
9398            probability_per_year: 0.10,
9399            severity: RecessionSeveritySchemaConfig::Moderate,
9400            recession_periods: Vec::new(),
9401        }
9402    }
9403}
9404
9405/// Recession severity configuration.
9406#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9407#[serde(rename_all = "snake_case")]
9408pub enum RecessionSeveritySchemaConfig {
9409    /// Mild recession.
9410    Mild,
9411    /// Moderate recession.
9412    #[default]
9413    Moderate,
9414    /// Severe recession.
9415    Severe,
9416}
9417
9418/// Recession period configuration.
9419#[derive(Debug, Clone, Serialize, Deserialize)]
9420pub struct RecessionPeriodSchemaConfig {
9421    /// Start month.
9422    pub start_month: u32,
9423    /// Duration in months.
9424    pub duration_months: u32,
9425}
9426
9427/// Industry cycle configuration.
9428#[derive(Debug, Clone, Serialize, Deserialize)]
9429pub struct IndustryCycleSchemaConfig {
9430    /// Period in months.
9431    #[serde(default = "default_industry_period")]
9432    pub period_months: u32,
9433
9434    /// Amplitude.
9435    #[serde(default = "default_industry_amp")]
9436    pub amplitude: f64,
9437}
9438
9439fn default_industry_period() -> u32 {
9440    36
9441}
9442
9443fn default_industry_amp() -> f64 {
9444    0.20
9445}
9446
9447/// Commodities drift configuration.
9448#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9449pub struct CommoditiesSchemaConfig {
9450    /// Enable commodity drift.
9451    #[serde(default)]
9452    pub enabled: bool,
9453
9454    /// Commodity items.
9455    #[serde(default)]
9456    pub items: Vec<CommodityItemSchemaConfig>,
9457}
9458
9459/// Commodity item configuration.
9460#[derive(Debug, Clone, Serialize, Deserialize)]
9461pub struct CommodityItemSchemaConfig {
9462    /// Commodity name.
9463    pub name: String,
9464
9465    /// Volatility.
9466    #[serde(default = "default_volatility")]
9467    pub volatility: f64,
9468
9469    /// COGS pass-through.
9470    #[serde(default)]
9471    pub cogs_pass_through: f64,
9472
9473    /// Overhead pass-through.
9474    #[serde(default)]
9475    pub overhead_pass_through: f64,
9476}
9477
9478fn default_volatility() -> f64 {
9479    0.20
9480}
9481
9482// =============================================================================
9483// Drift Labeling Configuration
9484// =============================================================================
9485
9486/// Configuration for drift ground truth labeling.
9487///
9488/// **Deprecated (v4.1.2):** validated-but-inert. The v3.3.0
9489/// analytics-metadata phase (`DriftEventGenerator` +
9490/// `AnalyticsMetadataSnapshot.drift_events`) produces drift labels
9491/// at runtime — configure it via `analytics_metadata.drift_events`
9492/// instead. The schema type remains for backward-compatible YAML
9493/// loading; will be removed in v5.0.
9494#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9495pub struct DriftLabelingSchemaConfig {
9496    /// Enable drift labeling.
9497    #[serde(default)]
9498    pub enabled: bool,
9499
9500    /// Statistical drift labeling.
9501    #[serde(default)]
9502    pub statistical: StatisticalDriftLabelingSchemaConfig,
9503
9504    /// Categorical drift labeling.
9505    #[serde(default)]
9506    pub categorical: CategoricalDriftLabelingSchemaConfig,
9507
9508    /// Temporal drift labeling.
9509    #[serde(default)]
9510    pub temporal: TemporalDriftLabelingSchemaConfig,
9511
9512    /// Regulatory calendar preset.
9513    #[serde(default)]
9514    pub regulatory_calendar_preset: Option<String>,
9515}
9516
9517/// Statistical drift labeling configuration.
9518#[derive(Debug, Clone, Serialize, Deserialize)]
9519pub struct StatisticalDriftLabelingSchemaConfig {
9520    /// Enable statistical drift labeling.
9521    #[serde(default = "default_true_val")]
9522    pub enabled: bool,
9523
9524    /// Minimum magnitude threshold.
9525    #[serde(default = "default_min_magnitude")]
9526    pub min_magnitude_threshold: f64,
9527}
9528
9529fn default_min_magnitude() -> f64 {
9530    0.05
9531}
9532
9533impl Default for StatisticalDriftLabelingSchemaConfig {
9534    fn default() -> Self {
9535        Self {
9536            enabled: true,
9537            min_magnitude_threshold: 0.05,
9538        }
9539    }
9540}
9541
9542/// Categorical drift labeling configuration.
9543#[derive(Debug, Clone, Serialize, Deserialize)]
9544pub struct CategoricalDriftLabelingSchemaConfig {
9545    /// Enable categorical drift labeling.
9546    #[serde(default = "default_true_val")]
9547    pub enabled: bool,
9548}
9549
9550impl Default for CategoricalDriftLabelingSchemaConfig {
9551    fn default() -> Self {
9552        Self { enabled: true }
9553    }
9554}
9555
9556/// Temporal drift labeling configuration.
9557#[derive(Debug, Clone, Serialize, Deserialize)]
9558pub struct TemporalDriftLabelingSchemaConfig {
9559    /// Enable temporal drift labeling.
9560    #[serde(default = "default_true_val")]
9561    pub enabled: bool,
9562}
9563
9564impl Default for TemporalDriftLabelingSchemaConfig {
9565    fn default() -> Self {
9566        Self { enabled: true }
9567    }
9568}
9569
9570// =============================================================================
9571// Enhanced Anomaly Injection Configuration
9572// =============================================================================
9573
9574/// Enhanced anomaly injection configuration.
9575///
9576/// Provides comprehensive anomaly injection capabilities including:
9577/// - Multi-stage fraud schemes (embezzlement, revenue manipulation, kickbacks)
9578/// - Correlated anomaly injection (co-occurrence patterns, error cascades)
9579/// - Near-miss generation for false positive reduction
9580/// - Detection difficulty classification
9581/// - Context-aware injection based on entity behavior
9582#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9583pub struct EnhancedAnomalyConfig {
9584    /// Enable enhanced anomaly injection.
9585    #[serde(default)]
9586    pub enabled: bool,
9587
9588    /// Base anomaly rates.
9589    #[serde(default)]
9590    pub rates: AnomalyRateConfig,
9591
9592    /// Multi-stage fraud scheme configuration.
9593    #[serde(default)]
9594    pub multi_stage_schemes: MultiStageSchemeConfig,
9595
9596    /// Correlated anomaly injection configuration.
9597    #[serde(default)]
9598    pub correlated_injection: CorrelatedInjectionConfig,
9599
9600    /// Near-miss generation configuration.
9601    #[serde(default)]
9602    pub near_miss: NearMissConfig,
9603
9604    /// Detection difficulty classification configuration.
9605    #[serde(default)]
9606    pub difficulty_classification: DifficultyClassificationConfig,
9607
9608    /// Context-aware injection configuration.
9609    #[serde(default)]
9610    pub context_aware: ContextAwareConfig,
9611
9612    /// Enhanced labeling configuration.
9613    #[serde(default)]
9614    pub labeling: EnhancedLabelingConfig,
9615
9616    /// SOTA-12 (#140, FINDINGS §13): post-process tagger that tags the top
9617    /// `rate × n_jes` JEs whose `(source, gl_account)` is rare under the
9618    /// per-source empirical PMF as `RelationalAnomalyType::SourceConditional-
9619    /// Rarity`. `None` = disabled (default); typical value `0.01` matches the
9620    /// audit-packet hot-list size. Runs AFTER per-entry strategies — additive,
9621    /// doesn't replace them.
9622    ///
9623    /// **Phase 1 deprecation note:** this key remains the source of truth for
9624    /// back-compat. If `concentration.source_conditional_rarity.rate` is also
9625    /// set, that value wins (it's an opt-in to the unified DSL).
9626    #[serde(default)]
9627    pub source_conditional_rarity_rate: Option<f64>,
9628}
9629
9630// ---------------------------------------------------------------------------
9631// ConcentrationConfig — central post-process pass pipeline (#143, Phase 1).
9632//
9633// Design reference:
9634//   docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md
9635//
9636// Phase 1 fields: SourceConditionalRarityPass (wrapping shipped SOTA-12) +
9637// TradingPartnerPoolPass (closes SOTA-11.1 / #142).
9638// Phase 2 will add: account_pair_substitution (closes SOTA-8.1 / #141).
9639// ---------------------------------------------------------------------------
9640
9641/// Top-level configuration for the post-generation concentration pipeline.
9642///
9643/// Each sub-field is `Option<_>`; presence enables the corresponding pass.
9644/// `enabled = false` (default) disables the pipeline regardless of sub-fields,
9645/// matching the parent proposal's "opt-in" guidance.
9646#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9647pub struct ConcentrationConfig {
9648    /// Master switch. `false` (default) → pipeline is no-op.
9649    #[serde(default)]
9650    pub enabled: bool,
9651
9652    /// Phase 1: source-conditional rarity tagger (wraps shipped SOTA-12).
9653    /// If also `anomaly_injection.source_conditional_rarity_rate` is set, this
9654    /// field wins.
9655    #[serde(default)]
9656    pub source_conditional_rarity: Option<SourceConditionalRarityPassConfig>,
9657
9658    /// Phase 1: trading-partner pool resizing (closes SOTA-11.1 / #142).
9659    #[serde(default)]
9660    pub trading_partner_pool: Option<TradingPartnerPoolPassConfig>,
9661
9662    /// Phase 2: account-pair substitution against a corpus-derived PMF
9663    /// (closes SOTA-8.1 / #141). Defers to Phase 2 when wired.
9664    #[serde(default)]
9665    pub account_pair_substitution: Option<AccountPairSubstitutionPassConfig>,
9666
9667    /// Phase 1.5: blank-source post-process (closes SOTA-7 / #132). Nulls
9668    /// `sap_source_code` on a configurable fraction of JEs to match the
9669    /// corpus's ~21% blank-source rate. Runs LAST in the pipeline so
9670    /// earlier passes (`SourceConditionalRarityPass`,
9671    /// `AccountPairSubstitutionPass`) see full source coverage.
9672    #[serde(default)]
9673    pub source_blanking: Option<SourceBlankingPassConfig>,
9674
9675    /// v5.30 B2 (#154) — heavy-tail consolidation outlier emission.
9676    /// Reshapes a small fraction of JEs (~0.001 typical) into
9677    /// multi-100-line postings touching bridge / suspense / clearing
9678    /// accounts. Lifts the synthetic relational_score p99/max
9679    /// percentiles toward the corpus's heavy tail without distorting
9680    /// the median. Honors `anomaly_injection.consolidation_outlier_rate`
9681    /// as a back-compat alias — if both are set, this DSL field wins.
9682    #[serde(default)]
9683    pub consolidation_outlier: Option<ConsolidationOutlierPassConfig>,
9684}
9685
9686/// Per-pass config for SourceConditionalRarityPass.
9687#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9688pub struct SourceConditionalRarityPassConfig {
9689    /// Fraction of input JEs to tag (typically `0.01`).
9690    pub rate: f64,
9691    /// Optional min surprise floor (Σ -log P(account|source)). Default `5.0`.
9692    #[serde(default)]
9693    pub min_surprise: Option<f64>,
9694    /// Per-source line-count floor (sources below have unreliable PMFs).
9695    /// Default `5`.
9696    #[serde(default)]
9697    pub min_per_source_lines: Option<u32>,
9698}
9699
9700/// Per-pass config for TradingPartnerPoolPass.
9701#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9702pub struct TradingPartnerPoolPassConfig {
9703    /// Target distinct trading-partner pool size. `0` is clamped to `1` at
9704    /// runtime. Typical corpus value `~12`; synthetic default `~40`.
9705    pub target_size: usize,
9706}
9707
9708/// Per-pass config for SourceBlankingPass (Phase 1.5 / SOTA-7).
9709#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9710pub struct SourceBlankingPassConfig {
9711    /// Fraction of JEs whose `sap_source_code` should be nulled. Typical
9712    /// corpus-matching value `0.21`. Clamped to `[0.0, 1.0]` at runtime.
9713    pub rate: f64,
9714}
9715
9716/// Per-pass config for ConsolidationOutlierPass (v5.30 B2 / #154).
9717///
9718/// Amounts are stored as `f64` here (schema layer) and converted to
9719/// `rust_decimal::Decimal` in the pass constructor. The synthetic
9720/// bridge-line amounts are log-uniformly distributed and the pp99
9721/// metric reads scale rather than exact value, so the f64 → Decimal
9722/// rounding is irrelevant for the heavy-tail signal we're trying to
9723/// emit. Keeping `rust_decimal` out of `datasynth-config`'s direct
9724/// dependency graph avoids a downstream crate-pull.
9725#[derive(Debug, Clone, Serialize, Deserialize)]
9726pub struct ConsolidationOutlierPassConfig {
9727    /// Fraction of JEs to reshape into multi-line bridge-account
9728    /// postings. Typical baseline `0.001` (one in a thousand).
9729    /// Clamped to `[0.0, 1.0]` at runtime.
9730    pub rate: f64,
9731    /// Minimum number of extra lines to append (always rounded up to
9732    /// an even number — lines are added in balanced DR/CR pairs).
9733    /// Default `50`.
9734    #[serde(default = "default_consolidation_outlier_min_lines")]
9735    pub min_extra_lines: usize,
9736    /// Maximum number of extra lines to append. Default `200`.
9737    #[serde(default = "default_consolidation_outlier_max_lines")]
9738    pub max_extra_lines: usize,
9739    /// Bridge / suspense / clearing accounts the appended lines use.
9740    /// Empty (default) → use the pass's built-in default list.
9741    #[serde(default)]
9742    pub bridge_accounts: Vec<String>,
9743    /// Minimum bridge-line amount (log-uniform draw). Default `100.0`.
9744    #[serde(default = "default_consolidation_outlier_min_amount")]
9745    pub line_amount_min: f64,
9746    /// Maximum bridge-line amount (log-uniform draw). Default `50_000.0`.
9747    #[serde(default = "default_consolidation_outlier_max_amount")]
9748    pub line_amount_max: f64,
9749}
9750
9751impl Default for ConsolidationOutlierPassConfig {
9752    fn default() -> Self {
9753        Self {
9754            rate: 0.0,
9755            min_extra_lines: default_consolidation_outlier_min_lines(),
9756            max_extra_lines: default_consolidation_outlier_max_lines(),
9757            bridge_accounts: Vec::new(),
9758            line_amount_min: default_consolidation_outlier_min_amount(),
9759            line_amount_max: default_consolidation_outlier_max_amount(),
9760        }
9761    }
9762}
9763
9764fn default_consolidation_outlier_min_lines() -> usize {
9765    50
9766}
9767fn default_consolidation_outlier_max_lines() -> usize {
9768    200
9769}
9770fn default_consolidation_outlier_min_amount() -> f64 {
9771    100.0
9772}
9773fn default_consolidation_outlier_max_amount() -> f64 {
9774    50_000.0
9775}
9776
9777/// Per-pass config for AccountPairSubstitutionPass (Phase 2).
9778#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9779pub struct AccountPairSubstitutionPassConfig {
9780    /// Path to a per-source pair-PMF JSON (produced by
9781    /// `corpus_vs_synth_gap.py --emit-pair-pmf`). Aggregate-only; never
9782    /// contains row content or client identifiers.
9783    pub pmf_path: String,
9784    /// JEs whose dominant (debit, credit) pair has corpus probability ≥ this
9785    /// threshold are left alone (they're already plausible). Default `0.005`.
9786    #[serde(default)]
9787    pub rarity_threshold: Option<f64>,
9788    /// When substituting, draw from the top-K corpus pairs (weighted by
9789    /// probability). Default `10`.
9790    #[serde(default)]
9791    pub top_k: Option<usize>,
9792}
9793
9794/// Base anomaly rate configuration.
9795#[derive(Debug, Clone, Serialize, Deserialize)]
9796pub struct AnomalyRateConfig {
9797    /// Total anomaly rate (0.0 to 1.0).
9798    #[serde(default = "default_total_anomaly_rate")]
9799    pub total_rate: f64,
9800
9801    /// Fraud anomaly rate.
9802    #[serde(default = "default_fraud_anomaly_rate")]
9803    pub fraud_rate: f64,
9804
9805    /// Error anomaly rate.
9806    #[serde(default = "default_error_anomaly_rate")]
9807    pub error_rate: f64,
9808
9809    /// Process issue rate.
9810    #[serde(default = "default_process_anomaly_rate")]
9811    pub process_rate: f64,
9812
9813    /// v5.30 B2 (#154) — heavy-tail outlier JE rate. Fraction of
9814    /// emitted JEs that get re-shaped into multi-100-line postings
9815    /// touching bridge accounts. Models real consolidation entries,
9816    /// period-end accruals, and manual reclasses. Default `0.0`
9817    /// preserves v5.29 byte-identical output; opt in (e.g. `0.001`)
9818    /// to lift synth p99/max relational_score percentiles toward the
9819    /// reference shard's heavy tail (~20× vs synth's default ~12×).
9820    #[serde(
9821        default = "default_consolidation_outlier_rate",
9822        alias = "consolidationOutlierRate"
9823    )]
9824    pub consolidation_outlier_rate: f64,
9825}
9826
9827fn default_total_anomaly_rate() -> f64 {
9828    0.03
9829}
9830fn default_fraud_anomaly_rate() -> f64 {
9831    0.01
9832}
9833fn default_error_anomaly_rate() -> f64 {
9834    0.015
9835}
9836fn default_process_anomaly_rate() -> f64 {
9837    0.005
9838}
9839fn default_consolidation_outlier_rate() -> f64 {
9840    // v5.30 B2 (#154) — small baseline so the synth heavy tail moves
9841    // toward the reference shard's p99 / max relational_score (~20×)
9842    // without overpowering downstream metrics. At 0.001, roughly 1 in
9843    // 1000 JEs becomes a multi-100-line bridge-account posting —
9844    // matching the observed corpus frequency of period-close /
9845    // manual reclass / consolidation entries.
9846    0.001
9847}
9848
9849impl Default for AnomalyRateConfig {
9850    fn default() -> Self {
9851        Self {
9852            total_rate: default_total_anomaly_rate(),
9853            fraud_rate: default_fraud_anomaly_rate(),
9854            error_rate: default_error_anomaly_rate(),
9855            process_rate: default_process_anomaly_rate(),
9856            consolidation_outlier_rate: default_consolidation_outlier_rate(),
9857        }
9858    }
9859}
9860
9861/// Multi-stage fraud scheme configuration.
9862#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9863pub struct MultiStageSchemeConfig {
9864    /// Enable multi-stage fraud schemes.
9865    #[serde(default)]
9866    pub enabled: bool,
9867
9868    /// Embezzlement scheme configuration.
9869    #[serde(default)]
9870    pub embezzlement: EmbezzlementSchemeConfig,
9871
9872    /// Revenue manipulation scheme configuration.
9873    #[serde(default)]
9874    pub revenue_manipulation: RevenueManipulationSchemeConfig,
9875
9876    /// Vendor kickback scheme configuration.
9877    #[serde(default)]
9878    pub kickback: KickbackSchemeConfig,
9879}
9880
9881/// Embezzlement scheme configuration.
9882#[derive(Debug, Clone, Serialize, Deserialize)]
9883pub struct EmbezzlementSchemeConfig {
9884    /// Probability of starting an embezzlement scheme per perpetrator per year.
9885    #[serde(default = "default_embezzlement_probability")]
9886    pub probability: f64,
9887
9888    /// Testing stage configuration.
9889    #[serde(default)]
9890    pub testing_stage: SchemeStageConfig,
9891
9892    /// Escalation stage configuration.
9893    #[serde(default)]
9894    pub escalation_stage: SchemeStageConfig,
9895
9896    /// Acceleration stage configuration.
9897    #[serde(default)]
9898    pub acceleration_stage: SchemeStageConfig,
9899
9900    /// Desperation stage configuration.
9901    #[serde(default)]
9902    pub desperation_stage: SchemeStageConfig,
9903}
9904
9905fn default_embezzlement_probability() -> f64 {
9906    0.02
9907}
9908
9909impl Default for EmbezzlementSchemeConfig {
9910    fn default() -> Self {
9911        Self {
9912            probability: default_embezzlement_probability(),
9913            testing_stage: SchemeStageConfig {
9914                duration_months: 2,
9915                amount_min: 100.0,
9916                amount_max: 500.0,
9917                transaction_count_min: 2,
9918                transaction_count_max: 5,
9919                difficulty: "hard".to_string(),
9920            },
9921            escalation_stage: SchemeStageConfig {
9922                duration_months: 6,
9923                amount_min: 500.0,
9924                amount_max: 2000.0,
9925                transaction_count_min: 3,
9926                transaction_count_max: 8,
9927                difficulty: "moderate".to_string(),
9928            },
9929            acceleration_stage: SchemeStageConfig {
9930                duration_months: 3,
9931                amount_min: 2000.0,
9932                amount_max: 10000.0,
9933                transaction_count_min: 5,
9934                transaction_count_max: 12,
9935                difficulty: "easy".to_string(),
9936            },
9937            desperation_stage: SchemeStageConfig {
9938                duration_months: 1,
9939                amount_min: 10000.0,
9940                amount_max: 50000.0,
9941                transaction_count_min: 3,
9942                transaction_count_max: 6,
9943                difficulty: "trivial".to_string(),
9944            },
9945        }
9946    }
9947}
9948
9949/// Revenue manipulation scheme configuration.
9950#[derive(Debug, Clone, Serialize, Deserialize)]
9951pub struct RevenueManipulationSchemeConfig {
9952    /// Probability of starting a revenue manipulation scheme per period.
9953    #[serde(default = "default_revenue_manipulation_probability")]
9954    pub probability: f64,
9955
9956    /// Early revenue recognition inflation target (Q4).
9957    #[serde(default = "default_early_recognition_target")]
9958    pub early_recognition_target: f64,
9959
9960    /// Expense deferral inflation target (Q1).
9961    #[serde(default = "default_expense_deferral_target")]
9962    pub expense_deferral_target: f64,
9963
9964    /// Reserve release inflation target (Q2).
9965    #[serde(default = "default_reserve_release_target")]
9966    pub reserve_release_target: f64,
9967
9968    /// Channel stuffing inflation target (Q4).
9969    #[serde(default = "default_channel_stuffing_target")]
9970    pub channel_stuffing_target: f64,
9971}
9972
9973fn default_revenue_manipulation_probability() -> f64 {
9974    0.01
9975}
9976fn default_early_recognition_target() -> f64 {
9977    0.02
9978}
9979fn default_expense_deferral_target() -> f64 {
9980    0.03
9981}
9982fn default_reserve_release_target() -> f64 {
9983    0.02
9984}
9985fn default_channel_stuffing_target() -> f64 {
9986    0.05
9987}
9988
9989impl Default for RevenueManipulationSchemeConfig {
9990    fn default() -> Self {
9991        Self {
9992            probability: default_revenue_manipulation_probability(),
9993            early_recognition_target: default_early_recognition_target(),
9994            expense_deferral_target: default_expense_deferral_target(),
9995            reserve_release_target: default_reserve_release_target(),
9996            channel_stuffing_target: default_channel_stuffing_target(),
9997        }
9998    }
9999}
10000
10001/// Vendor kickback scheme configuration.
10002#[derive(Debug, Clone, Serialize, Deserialize)]
10003pub struct KickbackSchemeConfig {
10004    /// Probability of starting a kickback scheme.
10005    #[serde(default = "default_kickback_probability")]
10006    pub probability: f64,
10007
10008    /// Minimum price inflation percentage.
10009    #[serde(default = "default_kickback_inflation_min")]
10010    pub inflation_min: f64,
10011
10012    /// Maximum price inflation percentage.
10013    #[serde(default = "default_kickback_inflation_max")]
10014    pub inflation_max: f64,
10015
10016    /// Kickback percentage (of inflation).
10017    #[serde(default = "default_kickback_percent")]
10018    pub kickback_percent: f64,
10019
10020    /// Setup duration in months.
10021    #[serde(default = "default_kickback_setup_months")]
10022    pub setup_months: u32,
10023
10024    /// Main operation duration in months.
10025    #[serde(default = "default_kickback_operation_months")]
10026    pub operation_months: u32,
10027}
10028
10029fn default_kickback_probability() -> f64 {
10030    0.01
10031}
10032fn default_kickback_inflation_min() -> f64 {
10033    0.10
10034}
10035fn default_kickback_inflation_max() -> f64 {
10036    0.25
10037}
10038fn default_kickback_percent() -> f64 {
10039    0.50
10040}
10041fn default_kickback_setup_months() -> u32 {
10042    3
10043}
10044fn default_kickback_operation_months() -> u32 {
10045    12
10046}
10047
10048impl Default for KickbackSchemeConfig {
10049    fn default() -> Self {
10050        Self {
10051            probability: default_kickback_probability(),
10052            inflation_min: default_kickback_inflation_min(),
10053            inflation_max: default_kickback_inflation_max(),
10054            kickback_percent: default_kickback_percent(),
10055            setup_months: default_kickback_setup_months(),
10056            operation_months: default_kickback_operation_months(),
10057        }
10058    }
10059}
10060
10061/// Individual scheme stage configuration.
10062#[derive(Debug, Clone, Serialize, Deserialize)]
10063pub struct SchemeStageConfig {
10064    /// Duration in months.
10065    pub duration_months: u32,
10066
10067    /// Minimum transaction amount.
10068    pub amount_min: f64,
10069
10070    /// Maximum transaction amount.
10071    pub amount_max: f64,
10072
10073    /// Minimum number of transactions.
10074    pub transaction_count_min: u32,
10075
10076    /// Maximum number of transactions.
10077    pub transaction_count_max: u32,
10078
10079    /// Detection difficulty level (trivial, easy, moderate, hard, expert).
10080    pub difficulty: String,
10081}
10082
10083impl Default for SchemeStageConfig {
10084    fn default() -> Self {
10085        Self {
10086            duration_months: 3,
10087            amount_min: 100.0,
10088            amount_max: 1000.0,
10089            transaction_count_min: 2,
10090            transaction_count_max: 10,
10091            difficulty: "moderate".to_string(),
10092        }
10093    }
10094}
10095
10096/// Correlated anomaly injection configuration.
10097#[derive(Debug, Clone, Serialize, Deserialize)]
10098pub struct CorrelatedInjectionConfig {
10099    /// Enable correlated anomaly injection.
10100    #[serde(default)]
10101    pub enabled: bool,
10102
10103    /// Enable fraud concealment co-occurrence patterns.
10104    #[serde(default = "default_true_val")]
10105    pub fraud_concealment: bool,
10106
10107    /// Enable error cascade patterns.
10108    #[serde(default = "default_true_val")]
10109    pub error_cascade: bool,
10110
10111    /// Enable temporal clustering (period-end spikes).
10112    #[serde(default = "default_true_val")]
10113    pub temporal_clustering: bool,
10114
10115    /// Temporal clustering configuration.
10116    #[serde(default)]
10117    pub temporal_clustering_config: TemporalClusteringConfig,
10118
10119    /// Co-occurrence patterns.
10120    #[serde(default)]
10121    pub co_occurrence_patterns: Vec<CoOccurrencePatternConfig>,
10122}
10123
10124impl Default for CorrelatedInjectionConfig {
10125    fn default() -> Self {
10126        Self {
10127            enabled: false,
10128            fraud_concealment: true,
10129            error_cascade: true,
10130            temporal_clustering: true,
10131            temporal_clustering_config: TemporalClusteringConfig::default(),
10132            co_occurrence_patterns: Vec::new(),
10133        }
10134    }
10135}
10136
10137/// Temporal clustering configuration.
10138#[derive(Debug, Clone, Serialize, Deserialize)]
10139pub struct TemporalClusteringConfig {
10140    /// Period-end error multiplier.
10141    #[serde(default = "default_period_end_multiplier")]
10142    pub period_end_multiplier: f64,
10143
10144    /// Number of business days before period end to apply multiplier.
10145    #[serde(default = "default_period_end_days")]
10146    pub period_end_days: u32,
10147
10148    /// Quarter-end additional multiplier.
10149    #[serde(default = "default_quarter_end_multiplier")]
10150    pub quarter_end_multiplier: f64,
10151
10152    /// Year-end additional multiplier.
10153    #[serde(default = "default_year_end_multiplier")]
10154    pub year_end_multiplier: f64,
10155}
10156
10157fn default_period_end_multiplier() -> f64 {
10158    2.5
10159}
10160fn default_period_end_days() -> u32 {
10161    5
10162}
10163fn default_quarter_end_multiplier() -> f64 {
10164    1.5
10165}
10166fn default_year_end_multiplier() -> f64 {
10167    2.0
10168}
10169
10170impl Default for TemporalClusteringConfig {
10171    fn default() -> Self {
10172        Self {
10173            period_end_multiplier: default_period_end_multiplier(),
10174            period_end_days: default_period_end_days(),
10175            quarter_end_multiplier: default_quarter_end_multiplier(),
10176            year_end_multiplier: default_year_end_multiplier(),
10177        }
10178    }
10179}
10180
10181/// Co-occurrence pattern configuration.
10182#[derive(Debug, Clone, Serialize, Deserialize)]
10183pub struct CoOccurrencePatternConfig {
10184    /// Pattern name.
10185    pub name: String,
10186
10187    /// Primary anomaly type that triggers the pattern.
10188    pub primary_type: String,
10189
10190    /// Correlated anomalies.
10191    pub correlated: Vec<CorrelatedAnomalyConfig>,
10192}
10193
10194/// Correlated anomaly configuration.
10195#[derive(Debug, Clone, Serialize, Deserialize)]
10196pub struct CorrelatedAnomalyConfig {
10197    /// Anomaly type.
10198    pub anomaly_type: String,
10199
10200    /// Probability of occurrence (0.0 to 1.0).
10201    pub probability: f64,
10202
10203    /// Minimum lag in days.
10204    pub lag_days_min: i32,
10205
10206    /// Maximum lag in days.
10207    pub lag_days_max: i32,
10208}
10209
10210/// Near-miss generation configuration.
10211#[derive(Debug, Clone, Serialize, Deserialize)]
10212pub struct NearMissConfig {
10213    /// Enable near-miss generation.
10214    #[serde(default)]
10215    pub enabled: bool,
10216
10217    /// Proportion of "anomalies" that are actually near-misses (0.0 to 1.0).
10218    #[serde(default = "default_near_miss_proportion")]
10219    pub proportion: f64,
10220
10221    /// Enable near-duplicate pattern.
10222    #[serde(default = "default_true_val")]
10223    pub near_duplicate: bool,
10224
10225    /// Near-duplicate date difference range in days.
10226    #[serde(default)]
10227    pub near_duplicate_days: NearDuplicateDaysConfig,
10228
10229    /// Enable threshold proximity pattern.
10230    #[serde(default = "default_true_val")]
10231    pub threshold_proximity: bool,
10232
10233    /// Threshold proximity range (e.g., 0.90-0.99 of threshold).
10234    #[serde(default)]
10235    pub threshold_proximity_range: ThresholdProximityRangeConfig,
10236
10237    /// Enable unusual but legitimate patterns.
10238    #[serde(default = "default_true_val")]
10239    pub unusual_legitimate: bool,
10240
10241    /// Types of unusual legitimate patterns to generate.
10242    #[serde(default = "default_unusual_legitimate_types")]
10243    pub unusual_legitimate_types: Vec<String>,
10244
10245    /// Enable corrected error patterns.
10246    #[serde(default = "default_true_val")]
10247    pub corrected_errors: bool,
10248
10249    /// Corrected error correction lag range in days.
10250    #[serde(default)]
10251    pub corrected_error_lag: CorrectedErrorLagConfig,
10252}
10253
10254fn default_near_miss_proportion() -> f64 {
10255    0.30
10256}
10257
10258fn default_unusual_legitimate_types() -> Vec<String> {
10259    vec![
10260        "year_end_bonus".to_string(),
10261        "contract_prepayment".to_string(),
10262        "insurance_claim".to_string(),
10263        "settlement_payment".to_string(),
10264    ]
10265}
10266
10267impl Default for NearMissConfig {
10268    fn default() -> Self {
10269        Self {
10270            enabled: false,
10271            proportion: default_near_miss_proportion(),
10272            near_duplicate: true,
10273            near_duplicate_days: NearDuplicateDaysConfig::default(),
10274            threshold_proximity: true,
10275            threshold_proximity_range: ThresholdProximityRangeConfig::default(),
10276            unusual_legitimate: true,
10277            unusual_legitimate_types: default_unusual_legitimate_types(),
10278            corrected_errors: true,
10279            corrected_error_lag: CorrectedErrorLagConfig::default(),
10280        }
10281    }
10282}
10283
10284/// Near-duplicate days configuration.
10285#[derive(Debug, Clone, Serialize, Deserialize)]
10286pub struct NearDuplicateDaysConfig {
10287    /// Minimum days apart.
10288    #[serde(default = "default_near_duplicate_min")]
10289    pub min: u32,
10290
10291    /// Maximum days apart.
10292    #[serde(default = "default_near_duplicate_max")]
10293    pub max: u32,
10294}
10295
10296fn default_near_duplicate_min() -> u32 {
10297    1
10298}
10299fn default_near_duplicate_max() -> u32 {
10300    3
10301}
10302
10303impl Default for NearDuplicateDaysConfig {
10304    fn default() -> Self {
10305        Self {
10306            min: default_near_duplicate_min(),
10307            max: default_near_duplicate_max(),
10308        }
10309    }
10310}
10311
10312/// Threshold proximity range configuration.
10313#[derive(Debug, Clone, Serialize, Deserialize)]
10314pub struct ThresholdProximityRangeConfig {
10315    /// Minimum proximity (e.g., 0.90 = 90% of threshold).
10316    #[serde(default = "default_threshold_proximity_min")]
10317    pub min: f64,
10318
10319    /// Maximum proximity (e.g., 0.99 = 99% of threshold).
10320    #[serde(default = "default_threshold_proximity_max")]
10321    pub max: f64,
10322}
10323
10324fn default_threshold_proximity_min() -> f64 {
10325    0.90
10326}
10327fn default_threshold_proximity_max() -> f64 {
10328    0.99
10329}
10330
10331impl Default for ThresholdProximityRangeConfig {
10332    fn default() -> Self {
10333        Self {
10334            min: default_threshold_proximity_min(),
10335            max: default_threshold_proximity_max(),
10336        }
10337    }
10338}
10339
10340/// Corrected error lag configuration.
10341#[derive(Debug, Clone, Serialize, Deserialize)]
10342pub struct CorrectedErrorLagConfig {
10343    /// Minimum correction lag in days.
10344    #[serde(default = "default_corrected_error_lag_min")]
10345    pub min: u32,
10346
10347    /// Maximum correction lag in days.
10348    #[serde(default = "default_corrected_error_lag_max")]
10349    pub max: u32,
10350}
10351
10352fn default_corrected_error_lag_min() -> u32 {
10353    1
10354}
10355fn default_corrected_error_lag_max() -> u32 {
10356    5
10357}
10358
10359impl Default for CorrectedErrorLagConfig {
10360    fn default() -> Self {
10361        Self {
10362            min: default_corrected_error_lag_min(),
10363            max: default_corrected_error_lag_max(),
10364        }
10365    }
10366}
10367
10368/// Detection difficulty classification configuration.
10369#[derive(Debug, Clone, Serialize, Deserialize)]
10370pub struct DifficultyClassificationConfig {
10371    /// Enable detection difficulty classification.
10372    #[serde(default)]
10373    pub enabled: bool,
10374
10375    /// Target distribution of difficulty levels.
10376    #[serde(default)]
10377    pub target_distribution: DifficultyDistributionConfig,
10378}
10379
10380impl Default for DifficultyClassificationConfig {
10381    fn default() -> Self {
10382        Self {
10383            enabled: true,
10384            target_distribution: DifficultyDistributionConfig::default(),
10385        }
10386    }
10387}
10388
10389/// Target distribution of detection difficulty levels.
10390#[derive(Debug, Clone, Serialize, Deserialize)]
10391pub struct DifficultyDistributionConfig {
10392    /// Proportion of trivial anomalies (expected 99% detection).
10393    #[serde(default = "default_difficulty_trivial")]
10394    pub trivial: f64,
10395
10396    /// Proportion of easy anomalies (expected 90% detection).
10397    #[serde(default = "default_difficulty_easy")]
10398    pub easy: f64,
10399
10400    /// Proportion of moderate anomalies (expected 70% detection).
10401    #[serde(default = "default_difficulty_moderate")]
10402    pub moderate: f64,
10403
10404    /// Proportion of hard anomalies (expected 40% detection).
10405    #[serde(default = "default_difficulty_hard")]
10406    pub hard: f64,
10407
10408    /// Proportion of expert anomalies (expected 15% detection).
10409    #[serde(default = "default_difficulty_expert")]
10410    pub expert: f64,
10411}
10412
10413fn default_difficulty_trivial() -> f64 {
10414    0.15
10415}
10416fn default_difficulty_easy() -> f64 {
10417    0.25
10418}
10419fn default_difficulty_moderate() -> f64 {
10420    0.30
10421}
10422fn default_difficulty_hard() -> f64 {
10423    0.20
10424}
10425fn default_difficulty_expert() -> f64 {
10426    0.10
10427}
10428
10429impl Default for DifficultyDistributionConfig {
10430    fn default() -> Self {
10431        Self {
10432            trivial: default_difficulty_trivial(),
10433            easy: default_difficulty_easy(),
10434            moderate: default_difficulty_moderate(),
10435            hard: default_difficulty_hard(),
10436            expert: default_difficulty_expert(),
10437        }
10438    }
10439}
10440
10441/// Context-aware injection configuration.
10442#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10443pub struct ContextAwareConfig {
10444    /// Enable context-aware injection.
10445    #[serde(default)]
10446    pub enabled: bool,
10447
10448    /// Vendor-specific anomaly rules.
10449    #[serde(default)]
10450    pub vendor_rules: VendorAnomalyRulesConfig,
10451
10452    /// Employee-specific anomaly rules.
10453    #[serde(default)]
10454    pub employee_rules: EmployeeAnomalyRulesConfig,
10455
10456    /// Account-specific anomaly rules.
10457    #[serde(default)]
10458    pub account_rules: AccountAnomalyRulesConfig,
10459
10460    /// Behavioral baseline configuration.
10461    #[serde(default)]
10462    pub behavioral_baseline: BehavioralBaselineConfig,
10463}
10464
10465/// Vendor-specific anomaly rules configuration.
10466#[derive(Debug, Clone, Serialize, Deserialize)]
10467pub struct VendorAnomalyRulesConfig {
10468    /// Error rate multiplier for new vendors (< threshold days).
10469    #[serde(default = "default_new_vendor_multiplier")]
10470    pub new_vendor_error_multiplier: f64,
10471
10472    /// Days threshold for "new" vendor classification.
10473    #[serde(default = "default_new_vendor_threshold")]
10474    pub new_vendor_threshold_days: u32,
10475
10476    /// Error rate multiplier for international vendors.
10477    #[serde(default = "default_international_multiplier")]
10478    pub international_error_multiplier: f64,
10479
10480    /// Strategic vendor anomaly types (may differ from general vendors).
10481    #[serde(default = "default_strategic_vendor_types")]
10482    pub strategic_vendor_anomaly_types: Vec<String>,
10483}
10484
10485fn default_new_vendor_multiplier() -> f64 {
10486    2.5
10487}
10488fn default_new_vendor_threshold() -> u32 {
10489    90
10490}
10491fn default_international_multiplier() -> f64 {
10492    1.5
10493}
10494fn default_strategic_vendor_types() -> Vec<String> {
10495    vec![
10496        "pricing_dispute".to_string(),
10497        "contract_violation".to_string(),
10498    ]
10499}
10500
10501impl Default for VendorAnomalyRulesConfig {
10502    fn default() -> Self {
10503        Self {
10504            new_vendor_error_multiplier: default_new_vendor_multiplier(),
10505            new_vendor_threshold_days: default_new_vendor_threshold(),
10506            international_error_multiplier: default_international_multiplier(),
10507            strategic_vendor_anomaly_types: default_strategic_vendor_types(),
10508        }
10509    }
10510}
10511
10512/// Employee-specific anomaly rules configuration.
10513#[derive(Debug, Clone, Serialize, Deserialize)]
10514pub struct EmployeeAnomalyRulesConfig {
10515    /// Error rate for new employees (< threshold days).
10516    #[serde(default = "default_new_employee_rate")]
10517    pub new_employee_error_rate: f64,
10518
10519    /// Days threshold for "new" employee classification.
10520    #[serde(default = "default_new_employee_threshold")]
10521    pub new_employee_threshold_days: u32,
10522
10523    /// Transaction volume threshold for fatigue errors.
10524    #[serde(default = "default_volume_fatigue_threshold")]
10525    pub volume_fatigue_threshold: u32,
10526
10527    /// Error rate multiplier when primary approver is absent.
10528    #[serde(default = "default_coverage_multiplier")]
10529    pub coverage_error_multiplier: f64,
10530}
10531
10532fn default_new_employee_rate() -> f64 {
10533    0.05
10534}
10535fn default_new_employee_threshold() -> u32 {
10536    180
10537}
10538fn default_volume_fatigue_threshold() -> u32 {
10539    50
10540}
10541fn default_coverage_multiplier() -> f64 {
10542    1.8
10543}
10544
10545impl Default for EmployeeAnomalyRulesConfig {
10546    fn default() -> Self {
10547        Self {
10548            new_employee_error_rate: default_new_employee_rate(),
10549            new_employee_threshold_days: default_new_employee_threshold(),
10550            volume_fatigue_threshold: default_volume_fatigue_threshold(),
10551            coverage_error_multiplier: default_coverage_multiplier(),
10552        }
10553    }
10554}
10555
10556/// Account-specific anomaly rules configuration.
10557#[derive(Debug, Clone, Serialize, Deserialize)]
10558pub struct AccountAnomalyRulesConfig {
10559    /// Error rate multiplier for high-risk accounts.
10560    #[serde(default = "default_high_risk_multiplier")]
10561    pub high_risk_account_multiplier: f64,
10562
10563    /// Account codes considered high-risk.
10564    #[serde(default = "default_high_risk_accounts")]
10565    pub high_risk_accounts: Vec<String>,
10566
10567    /// Error rate multiplier for suspense accounts.
10568    #[serde(default = "default_suspense_multiplier")]
10569    pub suspense_account_multiplier: f64,
10570
10571    /// Account codes considered suspense accounts.
10572    #[serde(default = "default_suspense_accounts")]
10573    pub suspense_accounts: Vec<String>,
10574
10575    /// Error rate multiplier for intercompany accounts.
10576    #[serde(default = "default_intercompany_multiplier")]
10577    pub intercompany_account_multiplier: f64,
10578}
10579
10580fn default_high_risk_multiplier() -> f64 {
10581    2.0
10582}
10583fn default_high_risk_accounts() -> Vec<String> {
10584    vec![
10585        "1100".to_string(), // AR Control
10586        "2000".to_string(), // AP Control
10587        "3000".to_string(), // Cash
10588    ]
10589}
10590fn default_suspense_multiplier() -> f64 {
10591    3.0
10592}
10593fn default_suspense_accounts() -> Vec<String> {
10594    vec!["9999".to_string(), "9998".to_string()]
10595}
10596fn default_intercompany_multiplier() -> f64 {
10597    1.5
10598}
10599
10600impl Default for AccountAnomalyRulesConfig {
10601    fn default() -> Self {
10602        Self {
10603            high_risk_account_multiplier: default_high_risk_multiplier(),
10604            high_risk_accounts: default_high_risk_accounts(),
10605            suspense_account_multiplier: default_suspense_multiplier(),
10606            suspense_accounts: default_suspense_accounts(),
10607            intercompany_account_multiplier: default_intercompany_multiplier(),
10608        }
10609    }
10610}
10611
10612/// Behavioral baseline configuration.
10613#[derive(Debug, Clone, Serialize, Deserialize)]
10614pub struct BehavioralBaselineConfig {
10615    /// Enable behavioral baseline tracking.
10616    #[serde(default)]
10617    pub enabled: bool,
10618
10619    /// Number of days to build baseline from.
10620    #[serde(default = "default_baseline_period")]
10621    pub baseline_period_days: u32,
10622
10623    /// Standard deviation threshold for amount anomalies.
10624    #[serde(default = "default_deviation_threshold")]
10625    pub deviation_threshold_std: f64,
10626
10627    /// Standard deviation threshold for frequency anomalies.
10628    #[serde(default = "default_frequency_deviation")]
10629    pub frequency_deviation_threshold: f64,
10630}
10631
10632fn default_baseline_period() -> u32 {
10633    90
10634}
10635fn default_deviation_threshold() -> f64 {
10636    3.0
10637}
10638fn default_frequency_deviation() -> f64 {
10639    2.0
10640}
10641
10642impl Default for BehavioralBaselineConfig {
10643    fn default() -> Self {
10644        Self {
10645            enabled: false,
10646            baseline_period_days: default_baseline_period(),
10647            deviation_threshold_std: default_deviation_threshold(),
10648            frequency_deviation_threshold: default_frequency_deviation(),
10649        }
10650    }
10651}
10652
10653/// Enhanced labeling configuration.
10654#[derive(Debug, Clone, Serialize, Deserialize)]
10655pub struct EnhancedLabelingConfig {
10656    /// Enable severity scoring.
10657    #[serde(default = "default_true_val")]
10658    pub severity_scoring: bool,
10659
10660    /// Enable difficulty classification.
10661    #[serde(default = "default_true_val")]
10662    pub difficulty_classification: bool,
10663
10664    /// Materiality thresholds for severity classification.
10665    #[serde(default)]
10666    pub materiality_thresholds: MaterialityThresholdsConfig,
10667}
10668
10669impl Default for EnhancedLabelingConfig {
10670    fn default() -> Self {
10671        Self {
10672            severity_scoring: true,
10673            difficulty_classification: true,
10674            materiality_thresholds: MaterialityThresholdsConfig::default(),
10675        }
10676    }
10677}
10678
10679/// Materiality thresholds configuration.
10680#[derive(Debug, Clone, Serialize, Deserialize)]
10681pub struct MaterialityThresholdsConfig {
10682    /// Threshold for trivial impact (as percentage of total).
10683    #[serde(default = "default_materiality_trivial")]
10684    pub trivial: f64,
10685
10686    /// Threshold for immaterial impact.
10687    #[serde(default = "default_materiality_immaterial")]
10688    pub immaterial: f64,
10689
10690    /// Threshold for material impact.
10691    #[serde(default = "default_materiality_material")]
10692    pub material: f64,
10693
10694    /// Threshold for highly material impact.
10695    #[serde(default = "default_materiality_highly_material")]
10696    pub highly_material: f64,
10697}
10698
10699fn default_materiality_trivial() -> f64 {
10700    0.001
10701}
10702fn default_materiality_immaterial() -> f64 {
10703    0.01
10704}
10705fn default_materiality_material() -> f64 {
10706    0.05
10707}
10708fn default_materiality_highly_material() -> f64 {
10709    0.10
10710}
10711
10712impl Default for MaterialityThresholdsConfig {
10713    fn default() -> Self {
10714        Self {
10715            trivial: default_materiality_trivial(),
10716            immaterial: default_materiality_immaterial(),
10717            material: default_materiality_material(),
10718            highly_material: default_materiality_highly_material(),
10719        }
10720    }
10721}
10722
10723// =============================================================================
10724// Industry-Specific Configuration
10725// =============================================================================
10726
10727/// Industry-specific transaction and anomaly generation configuration.
10728///
10729/// This configuration enables generation of industry-authentic:
10730/// - Transaction types with appropriate terminology
10731/// - Master data (BOM, routings, clinical codes, etc.)
10732/// - Industry-specific anomaly patterns
10733/// - Regulatory framework compliance
10734#[derive(Debug, Clone, Serialize, Deserialize, Default)]
10735pub struct IndustrySpecificConfig {
10736    /// Enable industry-specific generation.
10737    #[serde(default)]
10738    pub enabled: bool,
10739
10740    /// Manufacturing industry settings.
10741    #[serde(default)]
10742    pub manufacturing: ManufacturingConfig,
10743
10744    /// Retail industry settings.
10745    #[serde(default)]
10746    pub retail: RetailConfig,
10747
10748    /// Healthcare industry settings.
10749    #[serde(default)]
10750    pub healthcare: HealthcareConfig,
10751
10752    /// Technology industry settings.
10753    #[serde(default)]
10754    pub technology: TechnologyConfig,
10755
10756    /// Financial services industry settings.
10757    #[serde(default)]
10758    pub financial_services: FinancialServicesConfig,
10759
10760    /// Professional services industry settings.
10761    #[serde(default)]
10762    pub professional_services: ProfessionalServicesConfig,
10763}
10764
10765/// Manufacturing industry configuration.
10766#[derive(Debug, Clone, Serialize, Deserialize)]
10767pub struct ManufacturingConfig {
10768    /// Enable manufacturing-specific generation.
10769    #[serde(default)]
10770    pub enabled: bool,
10771
10772    /// Bill of Materials depth (typical: 3-7).
10773    #[serde(default = "default_bom_depth")]
10774    pub bom_depth: u32,
10775
10776    /// Whether to use just-in-time inventory.
10777    #[serde(default)]
10778    pub just_in_time: bool,
10779
10780    /// Production order types to generate.
10781    #[serde(default = "default_production_order_types")]
10782    pub production_order_types: Vec<String>,
10783
10784    /// Quality framework (ISO_9001, Six_Sigma, etc.).
10785    #[serde(default)]
10786    pub quality_framework: Option<String>,
10787
10788    /// Number of supplier tiers to model (1-3).
10789    #[serde(default = "default_supplier_tiers")]
10790    pub supplier_tiers: u32,
10791
10792    /// Standard cost update frequency.
10793    #[serde(default = "default_cost_frequency")]
10794    pub standard_cost_frequency: String,
10795
10796    /// Target yield rate (0.95-0.99 typical).
10797    #[serde(default = "default_yield_rate")]
10798    pub target_yield_rate: f64,
10799
10800    /// Scrap percentage threshold for alerts.
10801    #[serde(default = "default_scrap_threshold")]
10802    pub scrap_alert_threshold: f64,
10803
10804    /// Manufacturing anomaly injection rates.
10805    #[serde(default)]
10806    pub anomaly_rates: ManufacturingAnomalyRates,
10807
10808    /// Cost accounting configuration (WIP → FG → COGS pipeline).
10809    #[serde(default)]
10810    pub cost_accounting: ManufacturingCostAccountingConfig,
10811}
10812
10813/// Configuration for manufacturing cost accounting JE generation.
10814#[derive(Debug, Clone, Serialize, Deserialize)]
10815pub struct ManufacturingCostAccountingConfig {
10816    /// Enable multi-stage cost flow (WIP → FG → COGS) instead of flat JEs.
10817    #[serde(default = "default_true")]
10818    pub enabled: bool,
10819
10820    /// Generate standard cost variance JEs.
10821    #[serde(default = "default_true")]
10822    pub variance_accounts_enabled: bool,
10823
10824    /// Generate warranty provisions from quality inspection failures.
10825    #[serde(default = "default_true")]
10826    pub warranty_provisions_enabled: bool,
10827
10828    /// Minimum defect rate (0.0-1.0) to trigger warranty provision generation.
10829    #[serde(default = "default_warranty_defect_threshold")]
10830    pub warranty_defect_threshold: f64,
10831}
10832
10833fn default_warranty_defect_threshold() -> f64 {
10834    0.01
10835}
10836
10837impl Default for ManufacturingCostAccountingConfig {
10838    fn default() -> Self {
10839        Self {
10840            enabled: true,
10841            variance_accounts_enabled: true,
10842            warranty_provisions_enabled: true,
10843            warranty_defect_threshold: 0.01,
10844        }
10845    }
10846}
10847
10848fn default_bom_depth() -> u32 {
10849    4
10850}
10851
10852fn default_production_order_types() -> Vec<String> {
10853    vec![
10854        "standard".to_string(),
10855        "rework".to_string(),
10856        "prototype".to_string(),
10857    ]
10858}
10859
10860fn default_supplier_tiers() -> u32 {
10861    2
10862}
10863
10864fn default_cost_frequency() -> String {
10865    "quarterly".to_string()
10866}
10867
10868fn default_yield_rate() -> f64 {
10869    0.97
10870}
10871
10872fn default_scrap_threshold() -> f64 {
10873    0.03
10874}
10875
10876impl Default for ManufacturingConfig {
10877    fn default() -> Self {
10878        Self {
10879            enabled: false,
10880            bom_depth: default_bom_depth(),
10881            just_in_time: false,
10882            production_order_types: default_production_order_types(),
10883            quality_framework: Some("ISO_9001".to_string()),
10884            supplier_tiers: default_supplier_tiers(),
10885            standard_cost_frequency: default_cost_frequency(),
10886            target_yield_rate: default_yield_rate(),
10887            scrap_alert_threshold: default_scrap_threshold(),
10888            anomaly_rates: ManufacturingAnomalyRates::default(),
10889            cost_accounting: ManufacturingCostAccountingConfig::default(),
10890        }
10891    }
10892}
10893
10894/// Manufacturing anomaly injection rates.
10895#[derive(Debug, Clone, Serialize, Deserialize)]
10896pub struct ManufacturingAnomalyRates {
10897    /// Yield manipulation rate.
10898    #[serde(default = "default_mfg_yield_rate")]
10899    pub yield_manipulation: f64,
10900
10901    /// Labor misallocation rate.
10902    #[serde(default = "default_mfg_labor_rate")]
10903    pub labor_misallocation: f64,
10904
10905    /// Phantom production rate.
10906    #[serde(default = "default_mfg_phantom_rate")]
10907    pub phantom_production: f64,
10908
10909    /// Standard cost manipulation rate.
10910    #[serde(default = "default_mfg_cost_rate")]
10911    pub standard_cost_manipulation: f64,
10912
10913    /// Inventory fraud rate.
10914    #[serde(default = "default_mfg_inventory_rate")]
10915    pub inventory_fraud: f64,
10916}
10917
10918fn default_mfg_yield_rate() -> f64 {
10919    0.015
10920}
10921
10922fn default_mfg_labor_rate() -> f64 {
10923    0.02
10924}
10925
10926fn default_mfg_phantom_rate() -> f64 {
10927    0.005
10928}
10929
10930fn default_mfg_cost_rate() -> f64 {
10931    0.01
10932}
10933
10934fn default_mfg_inventory_rate() -> f64 {
10935    0.008
10936}
10937
10938impl Default for ManufacturingAnomalyRates {
10939    fn default() -> Self {
10940        Self {
10941            yield_manipulation: default_mfg_yield_rate(),
10942            labor_misallocation: default_mfg_labor_rate(),
10943            phantom_production: default_mfg_phantom_rate(),
10944            standard_cost_manipulation: default_mfg_cost_rate(),
10945            inventory_fraud: default_mfg_inventory_rate(),
10946        }
10947    }
10948}
10949
10950/// Retail industry configuration.
10951#[derive(Debug, Clone, Serialize, Deserialize)]
10952pub struct RetailConfig {
10953    /// Enable retail-specific generation.
10954    #[serde(default)]
10955    pub enabled: bool,
10956
10957    /// Store type distribution.
10958    #[serde(default)]
10959    pub store_types: RetailStoreTypeConfig,
10960
10961    /// Average daily transactions per store.
10962    #[serde(default = "default_retail_daily_txns")]
10963    pub avg_daily_transactions: u32,
10964
10965    /// Enable loss prevention tracking.
10966    #[serde(default = "default_true")]
10967    pub loss_prevention: bool,
10968
10969    /// Shrinkage rate (0.01-0.03 typical).
10970    #[serde(default = "default_shrinkage_rate")]
10971    pub shrinkage_rate: f64,
10972
10973    /// Retail anomaly injection rates.
10974    #[serde(default)]
10975    pub anomaly_rates: RetailAnomalyRates,
10976}
10977
10978fn default_retail_daily_txns() -> u32 {
10979    500
10980}
10981
10982fn default_shrinkage_rate() -> f64 {
10983    0.015
10984}
10985
10986impl Default for RetailConfig {
10987    fn default() -> Self {
10988        Self {
10989            enabled: false,
10990            store_types: RetailStoreTypeConfig::default(),
10991            avg_daily_transactions: default_retail_daily_txns(),
10992            loss_prevention: true,
10993            shrinkage_rate: default_shrinkage_rate(),
10994            anomaly_rates: RetailAnomalyRates::default(),
10995        }
10996    }
10997}
10998
10999/// Retail store type distribution.
11000#[derive(Debug, Clone, Serialize, Deserialize)]
11001pub struct RetailStoreTypeConfig {
11002    /// Percentage of flagship stores.
11003    #[serde(default = "default_flagship_pct")]
11004    pub flagship: f64,
11005
11006    /// Percentage of regional stores.
11007    #[serde(default = "default_regional_pct")]
11008    pub regional: f64,
11009
11010    /// Percentage of outlet stores.
11011    #[serde(default = "default_outlet_pct")]
11012    pub outlet: f64,
11013
11014    /// Percentage of e-commerce.
11015    #[serde(default = "default_ecommerce_pct")]
11016    pub ecommerce: f64,
11017}
11018
11019fn default_flagship_pct() -> f64 {
11020    0.10
11021}
11022
11023fn default_regional_pct() -> f64 {
11024    0.50
11025}
11026
11027fn default_outlet_pct() -> f64 {
11028    0.25
11029}
11030
11031fn default_ecommerce_pct() -> f64 {
11032    0.15
11033}
11034
11035impl Default for RetailStoreTypeConfig {
11036    fn default() -> Self {
11037        Self {
11038            flagship: default_flagship_pct(),
11039            regional: default_regional_pct(),
11040            outlet: default_outlet_pct(),
11041            ecommerce: default_ecommerce_pct(),
11042        }
11043    }
11044}
11045
11046/// Retail anomaly injection rates.
11047#[derive(Debug, Clone, Serialize, Deserialize)]
11048pub struct RetailAnomalyRates {
11049    /// Sweethearting rate.
11050    #[serde(default = "default_sweethearting_rate")]
11051    pub sweethearting: f64,
11052
11053    /// Skimming rate.
11054    #[serde(default = "default_skimming_rate")]
11055    pub skimming: f64,
11056
11057    /// Refund fraud rate.
11058    #[serde(default = "default_refund_fraud_rate")]
11059    pub refund_fraud: f64,
11060
11061    /// Void abuse rate.
11062    #[serde(default = "default_void_abuse_rate")]
11063    pub void_abuse: f64,
11064
11065    /// Gift card fraud rate.
11066    #[serde(default = "default_gift_card_rate")]
11067    pub gift_card_fraud: f64,
11068
11069    /// Vendor kickback rate.
11070    #[serde(default = "default_retail_kickback_rate")]
11071    pub vendor_kickback: f64,
11072}
11073
11074fn default_sweethearting_rate() -> f64 {
11075    0.02
11076}
11077
11078fn default_skimming_rate() -> f64 {
11079    0.005
11080}
11081
11082fn default_refund_fraud_rate() -> f64 {
11083    0.015
11084}
11085
11086fn default_void_abuse_rate() -> f64 {
11087    0.01
11088}
11089
11090fn default_gift_card_rate() -> f64 {
11091    0.008
11092}
11093
11094fn default_retail_kickback_rate() -> f64 {
11095    0.003
11096}
11097
11098impl Default for RetailAnomalyRates {
11099    fn default() -> Self {
11100        Self {
11101            sweethearting: default_sweethearting_rate(),
11102            skimming: default_skimming_rate(),
11103            refund_fraud: default_refund_fraud_rate(),
11104            void_abuse: default_void_abuse_rate(),
11105            gift_card_fraud: default_gift_card_rate(),
11106            vendor_kickback: default_retail_kickback_rate(),
11107        }
11108    }
11109}
11110
11111/// Healthcare industry configuration.
11112#[derive(Debug, Clone, Serialize, Deserialize)]
11113pub struct HealthcareConfig {
11114    /// Enable healthcare-specific generation.
11115    #[serde(default)]
11116    pub enabled: bool,
11117
11118    /// Healthcare facility type.
11119    #[serde(default = "default_facility_type")]
11120    pub facility_type: String,
11121
11122    /// Payer mix distribution.
11123    #[serde(default)]
11124    pub payer_mix: HealthcarePayerMix,
11125
11126    /// Coding systems enabled.
11127    #[serde(default)]
11128    pub coding_systems: HealthcareCodingSystems,
11129
11130    /// Healthcare compliance settings.
11131    #[serde(default)]
11132    pub compliance: HealthcareComplianceConfig,
11133
11134    /// Average daily encounters.
11135    #[serde(default = "default_daily_encounters")]
11136    pub avg_daily_encounters: u32,
11137
11138    /// Average charges per encounter.
11139    #[serde(default = "default_charges_per_encounter")]
11140    pub avg_charges_per_encounter: u32,
11141
11142    /// Denial rate (0.0-1.0).
11143    #[serde(default = "default_hc_denial_rate")]
11144    pub denial_rate: f64,
11145
11146    /// Bad debt rate (0.0-1.0).
11147    #[serde(default = "default_hc_bad_debt_rate")]
11148    pub bad_debt_rate: f64,
11149
11150    /// Charity care rate (0.0-1.0).
11151    #[serde(default = "default_hc_charity_care_rate")]
11152    pub charity_care_rate: f64,
11153
11154    /// Healthcare anomaly injection rates.
11155    #[serde(default)]
11156    pub anomaly_rates: HealthcareAnomalyRates,
11157}
11158
11159fn default_facility_type() -> String {
11160    "hospital".to_string()
11161}
11162
11163fn default_daily_encounters() -> u32 {
11164    150
11165}
11166
11167fn default_charges_per_encounter() -> u32 {
11168    8
11169}
11170
11171fn default_hc_denial_rate() -> f64 {
11172    0.05
11173}
11174
11175fn default_hc_bad_debt_rate() -> f64 {
11176    0.03
11177}
11178
11179fn default_hc_charity_care_rate() -> f64 {
11180    0.02
11181}
11182
11183impl Default for HealthcareConfig {
11184    fn default() -> Self {
11185        Self {
11186            enabled: false,
11187            facility_type: default_facility_type(),
11188            payer_mix: HealthcarePayerMix::default(),
11189            coding_systems: HealthcareCodingSystems::default(),
11190            compliance: HealthcareComplianceConfig::default(),
11191            avg_daily_encounters: default_daily_encounters(),
11192            avg_charges_per_encounter: default_charges_per_encounter(),
11193            denial_rate: default_hc_denial_rate(),
11194            bad_debt_rate: default_hc_bad_debt_rate(),
11195            charity_care_rate: default_hc_charity_care_rate(),
11196            anomaly_rates: HealthcareAnomalyRates::default(),
11197        }
11198    }
11199}
11200
11201/// Healthcare payer mix distribution.
11202#[derive(Debug, Clone, Serialize, Deserialize)]
11203pub struct HealthcarePayerMix {
11204    /// Medicare percentage.
11205    #[serde(default = "default_medicare_pct")]
11206    pub medicare: f64,
11207
11208    /// Medicaid percentage.
11209    #[serde(default = "default_medicaid_pct")]
11210    pub medicaid: f64,
11211
11212    /// Commercial insurance percentage.
11213    #[serde(default = "default_commercial_pct")]
11214    pub commercial: f64,
11215
11216    /// Self-pay percentage.
11217    #[serde(default = "default_self_pay_pct")]
11218    pub self_pay: f64,
11219}
11220
11221fn default_medicare_pct() -> f64 {
11222    0.40
11223}
11224
11225fn default_medicaid_pct() -> f64 {
11226    0.20
11227}
11228
11229fn default_commercial_pct() -> f64 {
11230    0.30
11231}
11232
11233fn default_self_pay_pct() -> f64 {
11234    0.10
11235}
11236
11237impl Default for HealthcarePayerMix {
11238    fn default() -> Self {
11239        Self {
11240            medicare: default_medicare_pct(),
11241            medicaid: default_medicaid_pct(),
11242            commercial: default_commercial_pct(),
11243            self_pay: default_self_pay_pct(),
11244        }
11245    }
11246}
11247
11248/// Healthcare coding systems configuration.
11249#[derive(Debug, Clone, Serialize, Deserialize)]
11250pub struct HealthcareCodingSystems {
11251    /// Enable ICD-10 diagnosis coding.
11252    #[serde(default = "default_true")]
11253    pub icd10: bool,
11254
11255    /// Enable CPT procedure coding.
11256    #[serde(default = "default_true")]
11257    pub cpt: bool,
11258
11259    /// Enable DRG grouping.
11260    #[serde(default = "default_true")]
11261    pub drg: bool,
11262
11263    /// Enable HCPCS Level II coding.
11264    #[serde(default = "default_true")]
11265    pub hcpcs: bool,
11266
11267    /// Enable revenue codes.
11268    #[serde(default = "default_true")]
11269    pub revenue_codes: bool,
11270}
11271
11272impl Default for HealthcareCodingSystems {
11273    fn default() -> Self {
11274        Self {
11275            icd10: true,
11276            cpt: true,
11277            drg: true,
11278            hcpcs: true,
11279            revenue_codes: true,
11280        }
11281    }
11282}
11283
11284/// Healthcare compliance configuration.
11285#[derive(Debug, Clone, Serialize, Deserialize)]
11286pub struct HealthcareComplianceConfig {
11287    /// Enable HIPAA compliance.
11288    #[serde(default = "default_true")]
11289    pub hipaa: bool,
11290
11291    /// Enable Stark Law compliance.
11292    #[serde(default = "default_true")]
11293    pub stark_law: bool,
11294
11295    /// Enable Anti-Kickback Statute compliance.
11296    #[serde(default = "default_true")]
11297    pub anti_kickback: bool,
11298
11299    /// Enable False Claims Act compliance.
11300    #[serde(default = "default_true")]
11301    pub false_claims_act: bool,
11302
11303    /// Enable EMTALA compliance (for hospitals).
11304    #[serde(default = "default_true")]
11305    pub emtala: bool,
11306}
11307
11308impl Default for HealthcareComplianceConfig {
11309    fn default() -> Self {
11310        Self {
11311            hipaa: true,
11312            stark_law: true,
11313            anti_kickback: true,
11314            false_claims_act: true,
11315            emtala: true,
11316        }
11317    }
11318}
11319
11320/// Healthcare anomaly injection rates.
11321#[derive(Debug, Clone, Serialize, Deserialize)]
11322pub struct HealthcareAnomalyRates {
11323    /// Upcoding rate.
11324    #[serde(default = "default_upcoding_rate")]
11325    pub upcoding: f64,
11326
11327    /// Unbundling rate.
11328    #[serde(default = "default_unbundling_rate")]
11329    pub unbundling: f64,
11330
11331    /// Phantom billing rate.
11332    #[serde(default = "default_phantom_billing_rate")]
11333    pub phantom_billing: f64,
11334
11335    /// Kickback rate.
11336    #[serde(default = "default_healthcare_kickback_rate")]
11337    pub kickbacks: f64,
11338
11339    /// Duplicate billing rate.
11340    #[serde(default = "default_duplicate_billing_rate")]
11341    pub duplicate_billing: f64,
11342
11343    /// Medical necessity abuse rate.
11344    #[serde(default = "default_med_necessity_rate")]
11345    pub medical_necessity_abuse: f64,
11346}
11347
11348fn default_upcoding_rate() -> f64 {
11349    0.02
11350}
11351
11352fn default_unbundling_rate() -> f64 {
11353    0.015
11354}
11355
11356fn default_phantom_billing_rate() -> f64 {
11357    0.005
11358}
11359
11360fn default_healthcare_kickback_rate() -> f64 {
11361    0.003
11362}
11363
11364fn default_duplicate_billing_rate() -> f64 {
11365    0.008
11366}
11367
11368fn default_med_necessity_rate() -> f64 {
11369    0.01
11370}
11371
11372impl Default for HealthcareAnomalyRates {
11373    fn default() -> Self {
11374        Self {
11375            upcoding: default_upcoding_rate(),
11376            unbundling: default_unbundling_rate(),
11377            phantom_billing: default_phantom_billing_rate(),
11378            kickbacks: default_healthcare_kickback_rate(),
11379            duplicate_billing: default_duplicate_billing_rate(),
11380            medical_necessity_abuse: default_med_necessity_rate(),
11381        }
11382    }
11383}
11384
11385/// Technology industry configuration.
11386#[derive(Debug, Clone, Serialize, Deserialize)]
11387pub struct TechnologyConfig {
11388    /// Enable technology-specific generation.
11389    #[serde(default)]
11390    pub enabled: bool,
11391
11392    /// Revenue model type.
11393    #[serde(default = "default_revenue_model")]
11394    pub revenue_model: String,
11395
11396    /// Subscription revenue percentage (for SaaS).
11397    #[serde(default = "default_subscription_pct")]
11398    pub subscription_revenue_pct: f64,
11399
11400    /// License revenue percentage.
11401    #[serde(default = "default_license_pct")]
11402    pub license_revenue_pct: f64,
11403
11404    /// Services revenue percentage.
11405    #[serde(default = "default_services_pct")]
11406    pub services_revenue_pct: f64,
11407
11408    /// R&D capitalization settings.
11409    #[serde(default)]
11410    pub rd_capitalization: RdCapitalizationConfig,
11411
11412    /// Technology anomaly injection rates.
11413    #[serde(default)]
11414    pub anomaly_rates: TechnologyAnomalyRates,
11415}
11416
11417fn default_revenue_model() -> String {
11418    "saas".to_string()
11419}
11420
11421fn default_subscription_pct() -> f64 {
11422    0.60
11423}
11424
11425fn default_license_pct() -> f64 {
11426    0.25
11427}
11428
11429fn default_services_pct() -> f64 {
11430    0.15
11431}
11432
11433impl Default for TechnologyConfig {
11434    fn default() -> Self {
11435        Self {
11436            enabled: false,
11437            revenue_model: default_revenue_model(),
11438            subscription_revenue_pct: default_subscription_pct(),
11439            license_revenue_pct: default_license_pct(),
11440            services_revenue_pct: default_services_pct(),
11441            rd_capitalization: RdCapitalizationConfig::default(),
11442            anomaly_rates: TechnologyAnomalyRates::default(),
11443        }
11444    }
11445}
11446
11447/// R&D capitalization configuration.
11448#[derive(Debug, Clone, Serialize, Deserialize)]
11449pub struct RdCapitalizationConfig {
11450    /// Enable R&D capitalization.
11451    #[serde(default = "default_true")]
11452    pub enabled: bool,
11453
11454    /// Capitalization rate (0.0-1.0).
11455    #[serde(default = "default_cap_rate")]
11456    pub capitalization_rate: f64,
11457
11458    /// Useful life in years.
11459    #[serde(default = "default_useful_life")]
11460    pub useful_life_years: u32,
11461}
11462
11463fn default_cap_rate() -> f64 {
11464    0.30
11465}
11466
11467fn default_useful_life() -> u32 {
11468    3
11469}
11470
11471impl Default for RdCapitalizationConfig {
11472    fn default() -> Self {
11473        Self {
11474            enabled: true,
11475            capitalization_rate: default_cap_rate(),
11476            useful_life_years: default_useful_life(),
11477        }
11478    }
11479}
11480
11481/// Technology anomaly injection rates.
11482#[derive(Debug, Clone, Serialize, Deserialize)]
11483pub struct TechnologyAnomalyRates {
11484    /// Premature revenue recognition rate.
11485    #[serde(default = "default_premature_rev_rate")]
11486    pub premature_revenue: f64,
11487
11488    /// Side letter abuse rate.
11489    #[serde(default = "default_side_letter_rate")]
11490    pub side_letter_abuse: f64,
11491
11492    /// Channel stuffing rate.
11493    #[serde(default = "default_channel_stuffing_rate")]
11494    pub channel_stuffing: f64,
11495
11496    /// Improper capitalization rate.
11497    #[serde(default = "default_improper_cap_rate")]
11498    pub improper_capitalization: f64,
11499}
11500
11501fn default_premature_rev_rate() -> f64 {
11502    0.015
11503}
11504
11505fn default_side_letter_rate() -> f64 {
11506    0.008
11507}
11508
11509fn default_channel_stuffing_rate() -> f64 {
11510    0.01
11511}
11512
11513fn default_improper_cap_rate() -> f64 {
11514    0.012
11515}
11516
11517impl Default for TechnologyAnomalyRates {
11518    fn default() -> Self {
11519        Self {
11520            premature_revenue: default_premature_rev_rate(),
11521            side_letter_abuse: default_side_letter_rate(),
11522            channel_stuffing: default_channel_stuffing_rate(),
11523            improper_capitalization: default_improper_cap_rate(),
11524        }
11525    }
11526}
11527
11528/// Financial services industry configuration.
11529#[derive(Debug, Clone, Serialize, Deserialize)]
11530pub struct FinancialServicesConfig {
11531    /// Enable financial services-specific generation.
11532    #[serde(default)]
11533    pub enabled: bool,
11534
11535    /// Financial institution type.
11536    #[serde(default = "default_fi_type")]
11537    pub institution_type: String,
11538
11539    /// Regulatory framework.
11540    #[serde(default = "default_fi_regulatory")]
11541    pub regulatory_framework: String,
11542
11543    /// Financial services anomaly injection rates.
11544    #[serde(default)]
11545    pub anomaly_rates: FinancialServicesAnomalyRates,
11546}
11547
11548fn default_fi_type() -> String {
11549    "commercial_bank".to_string()
11550}
11551
11552fn default_fi_regulatory() -> String {
11553    "us_banking".to_string()
11554}
11555
11556impl Default for FinancialServicesConfig {
11557    fn default() -> Self {
11558        Self {
11559            enabled: false,
11560            institution_type: default_fi_type(),
11561            regulatory_framework: default_fi_regulatory(),
11562            anomaly_rates: FinancialServicesAnomalyRates::default(),
11563        }
11564    }
11565}
11566
11567/// Financial services anomaly injection rates.
11568#[derive(Debug, Clone, Serialize, Deserialize)]
11569pub struct FinancialServicesAnomalyRates {
11570    /// Loan fraud rate.
11571    #[serde(default = "default_loan_fraud_rate")]
11572    pub loan_fraud: f64,
11573
11574    /// Trading fraud rate.
11575    #[serde(default = "default_trading_fraud_rate")]
11576    pub trading_fraud: f64,
11577
11578    /// Insurance fraud rate.
11579    #[serde(default = "default_insurance_fraud_rate")]
11580    pub insurance_fraud: f64,
11581
11582    /// Account manipulation rate.
11583    #[serde(default = "default_account_manip_rate")]
11584    pub account_manipulation: f64,
11585}
11586
11587fn default_loan_fraud_rate() -> f64 {
11588    0.01
11589}
11590
11591fn default_trading_fraud_rate() -> f64 {
11592    0.008
11593}
11594
11595fn default_insurance_fraud_rate() -> f64 {
11596    0.012
11597}
11598
11599fn default_account_manip_rate() -> f64 {
11600    0.005
11601}
11602
11603impl Default for FinancialServicesAnomalyRates {
11604    fn default() -> Self {
11605        Self {
11606            loan_fraud: default_loan_fraud_rate(),
11607            trading_fraud: default_trading_fraud_rate(),
11608            insurance_fraud: default_insurance_fraud_rate(),
11609            account_manipulation: default_account_manip_rate(),
11610        }
11611    }
11612}
11613
11614/// Professional services industry configuration.
11615#[derive(Debug, Clone, Serialize, Deserialize)]
11616pub struct ProfessionalServicesConfig {
11617    /// Enable professional services-specific generation.
11618    #[serde(default)]
11619    pub enabled: bool,
11620
11621    /// Firm type.
11622    #[serde(default = "default_firm_type")]
11623    pub firm_type: String,
11624
11625    /// Billing model.
11626    #[serde(default = "default_billing_model")]
11627    pub billing_model: String,
11628
11629    /// Average hourly rate.
11630    #[serde(default = "default_hourly_rate")]
11631    pub avg_hourly_rate: f64,
11632
11633    /// Trust account settings (for law firms).
11634    #[serde(default)]
11635    pub trust_accounting: TrustAccountingConfig,
11636
11637    /// Professional services anomaly injection rates.
11638    #[serde(default)]
11639    pub anomaly_rates: ProfessionalServicesAnomalyRates,
11640}
11641
11642fn default_firm_type() -> String {
11643    "consulting".to_string()
11644}
11645
11646fn default_billing_model() -> String {
11647    "time_and_materials".to_string()
11648}
11649
11650fn default_hourly_rate() -> f64 {
11651    250.0
11652}
11653
11654impl Default for ProfessionalServicesConfig {
11655    fn default() -> Self {
11656        Self {
11657            enabled: false,
11658            firm_type: default_firm_type(),
11659            billing_model: default_billing_model(),
11660            avg_hourly_rate: default_hourly_rate(),
11661            trust_accounting: TrustAccountingConfig::default(),
11662            anomaly_rates: ProfessionalServicesAnomalyRates::default(),
11663        }
11664    }
11665}
11666
11667/// Trust accounting configuration for law firms.
11668#[derive(Debug, Clone, Serialize, Deserialize)]
11669pub struct TrustAccountingConfig {
11670    /// Enable trust accounting.
11671    #[serde(default)]
11672    pub enabled: bool,
11673
11674    /// Require three-way reconciliation.
11675    #[serde(default = "default_true")]
11676    pub require_three_way_reconciliation: bool,
11677}
11678
11679impl Default for TrustAccountingConfig {
11680    fn default() -> Self {
11681        Self {
11682            enabled: false,
11683            require_three_way_reconciliation: true,
11684        }
11685    }
11686}
11687
11688/// Professional services anomaly injection rates.
11689#[derive(Debug, Clone, Serialize, Deserialize)]
11690pub struct ProfessionalServicesAnomalyRates {
11691    /// Time billing fraud rate.
11692    #[serde(default = "default_time_fraud_rate")]
11693    pub time_billing_fraud: f64,
11694
11695    /// Expense report fraud rate.
11696    #[serde(default = "default_expense_fraud_rate")]
11697    pub expense_fraud: f64,
11698
11699    /// Trust misappropriation rate.
11700    #[serde(default = "default_trust_misappropriation_rate")]
11701    pub trust_misappropriation: f64,
11702}
11703
11704fn default_time_fraud_rate() -> f64 {
11705    0.02
11706}
11707
11708fn default_expense_fraud_rate() -> f64 {
11709    0.015
11710}
11711
11712fn default_trust_misappropriation_rate() -> f64 {
11713    0.003
11714}
11715
11716impl Default for ProfessionalServicesAnomalyRates {
11717    fn default() -> Self {
11718        Self {
11719            time_billing_fraud: default_time_fraud_rate(),
11720            expense_fraud: default_expense_fraud_rate(),
11721            trust_misappropriation: default_trust_misappropriation_rate(),
11722        }
11723    }
11724}
11725
11726/// Fingerprint privacy configuration for extraction and synthesis.
11727///
11728/// Controls the privacy parameters used when extracting fingerprints
11729/// from sensitive data. Supports predefined levels or custom (epsilon, delta) tuples.
11730///
11731/// ```yaml
11732/// fingerprint_privacy:
11733///   level: custom
11734///   epsilon: 0.5
11735///   delta: 1.0e-5
11736///   k_anonymity: 10
11737///   composition_method: renyi_dp
11738/// ```
11739#[derive(Debug, Clone, Serialize, Deserialize)]
11740pub struct FingerprintPrivacyConfig {
11741    /// Privacy level preset. Use "custom" for user-specified epsilon/delta.
11742    #[serde(default)]
11743    pub level: String,
11744    /// Custom epsilon value (only used when level = "custom").
11745    #[serde(default = "default_epsilon")]
11746    pub epsilon: f64,
11747    /// Custom delta value for (epsilon, delta)-DP (only used with RDP/zCDP).
11748    #[serde(default = "default_delta")]
11749    pub delta: f64,
11750    /// K-anonymity threshold.
11751    #[serde(default = "default_k_anonymity")]
11752    pub k_anonymity: u32,
11753    /// Composition method: "naive", "advanced", "renyi_dp", "zcdp".
11754    #[serde(default)]
11755    pub composition_method: String,
11756}
11757
11758fn default_epsilon() -> f64 {
11759    1.0
11760}
11761
11762fn default_delta() -> f64 {
11763    1e-5
11764}
11765
11766fn default_k_anonymity() -> u32 {
11767    5
11768}
11769
11770impl Default for FingerprintPrivacyConfig {
11771    fn default() -> Self {
11772        Self {
11773            level: "standard".to_string(),
11774            epsilon: default_epsilon(),
11775            delta: default_delta(),
11776            k_anonymity: default_k_anonymity(),
11777            composition_method: "naive".to_string(),
11778        }
11779    }
11780}
11781
11782/// Quality gates configuration for pass/fail thresholds on generation runs.
11783///
11784/// ```yaml
11785/// quality_gates:
11786///   enabled: true
11787///   profile: strict  # strict, default, lenient, custom
11788///   fail_on_violation: true
11789///   custom_gates:
11790///     - name: benford_compliance
11791///       metric: benford_mad
11792///       threshold: 0.015
11793///       comparison: lte
11794/// ```
11795#[derive(Debug, Clone, Serialize, Deserialize)]
11796pub struct QualityGatesSchemaConfig {
11797    /// Enable quality gate evaluation.
11798    #[serde(default)]
11799    pub enabled: bool,
11800    /// Gate profile: "strict", "default", "lenient", or "custom".
11801    #[serde(default = "default_gate_profile_name")]
11802    pub profile: String,
11803    /// Whether to fail the generation on gate violations.
11804    #[serde(default)]
11805    pub fail_on_violation: bool,
11806    /// Custom gate definitions (used when profile = "custom").
11807    #[serde(default)]
11808    pub custom_gates: Vec<QualityGateEntry>,
11809}
11810
11811fn default_gate_profile_name() -> String {
11812    "default".to_string()
11813}
11814
11815impl Default for QualityGatesSchemaConfig {
11816    fn default() -> Self {
11817        Self {
11818            enabled: false,
11819            profile: default_gate_profile_name(),
11820            fail_on_violation: false,
11821            custom_gates: Vec::new(),
11822        }
11823    }
11824}
11825
11826/// A single quality gate entry in configuration.
11827#[derive(Debug, Clone, Serialize, Deserialize)]
11828pub struct QualityGateEntry {
11829    /// Gate name.
11830    pub name: String,
11831    /// Metric to check: benford_mad, balance_coherence, document_chain_integrity,
11832    /// correlation_preservation, temporal_consistency, privacy_mia_auc,
11833    /// completion_rate, duplicate_rate, referential_integrity, ic_match_rate.
11834    pub metric: String,
11835    /// Threshold value.
11836    pub threshold: f64,
11837    /// Upper threshold for "between" comparison.
11838    #[serde(default)]
11839    pub upper_threshold: Option<f64>,
11840    /// Comparison operator: "gte", "lte", "eq", "between".
11841    #[serde(default = "default_gate_comparison")]
11842    pub comparison: String,
11843}
11844
11845fn default_gate_comparison() -> String {
11846    "gte".to_string()
11847}
11848
11849/// Compliance configuration for regulatory requirements.
11850///
11851/// ```yaml
11852/// compliance:
11853///   content_marking:
11854///     enabled: true
11855///     format: embedded  # embedded, sidecar, both
11856///   article10_report: true
11857/// ```
11858#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11859pub struct ComplianceSchemaConfig {
11860    /// Synthetic content marking configuration (EU AI Act Article 50).
11861    #[serde(default)]
11862    pub content_marking: ContentMarkingSchemaConfig,
11863    /// Generate Article 10 data governance report.
11864    #[serde(default)]
11865    pub article10_report: bool,
11866    /// Certificate configuration for proving DP guarantees.
11867    #[serde(default)]
11868    pub certificates: CertificateSchemaConfig,
11869}
11870
11871/// Configuration for synthetic data certificates.
11872#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11873pub struct CertificateSchemaConfig {
11874    /// Whether certificate generation is enabled.
11875    #[serde(default)]
11876    pub enabled: bool,
11877    /// Environment variable name for the signing key.
11878    #[serde(default)]
11879    pub signing_key_env: Option<String>,
11880    /// Whether to include quality metrics in the certificate.
11881    #[serde(default)]
11882    pub include_quality_metrics: bool,
11883}
11884
11885/// Content marking configuration for synthetic data output.
11886#[derive(Debug, Clone, Serialize, Deserialize)]
11887pub struct ContentMarkingSchemaConfig {
11888    /// Whether content marking is enabled.
11889    #[serde(default = "default_true")]
11890    pub enabled: bool,
11891    /// Marking format: "embedded", "sidecar", or "both".
11892    #[serde(default = "default_marking_format")]
11893    pub format: String,
11894}
11895
11896fn default_marking_format() -> String {
11897    "embedded".to_string()
11898}
11899
11900impl Default for ContentMarkingSchemaConfig {
11901    fn default() -> Self {
11902        Self {
11903            enabled: true,
11904            format: default_marking_format(),
11905        }
11906    }
11907}
11908
11909/// Webhook notification configuration.
11910#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11911pub struct WebhookSchemaConfig {
11912    /// Whether webhooks are enabled.
11913    #[serde(default)]
11914    pub enabled: bool,
11915    /// Webhook endpoint configurations.
11916    #[serde(default)]
11917    pub endpoints: Vec<WebhookEndpointConfig>,
11918}
11919
11920/// Configuration for a single webhook endpoint.
11921#[derive(Debug, Clone, Serialize, Deserialize)]
11922pub struct WebhookEndpointConfig {
11923    /// Target URL for the webhook.
11924    pub url: String,
11925    /// Event types this endpoint subscribes to.
11926    #[serde(default)]
11927    pub events: Vec<String>,
11928    /// Optional secret for HMAC-SHA256 signature.
11929    #[serde(default)]
11930    pub secret: Option<String>,
11931    /// Maximum retry attempts (default: 3).
11932    #[serde(default = "default_webhook_retries")]
11933    pub max_retries: u32,
11934    /// Timeout in seconds (default: 10).
11935    #[serde(default = "default_webhook_timeout")]
11936    pub timeout_secs: u64,
11937}
11938
11939fn default_webhook_retries() -> u32 {
11940    3
11941}
11942fn default_webhook_timeout() -> u64 {
11943    10
11944}
11945
11946// ===== Enterprise Process Chain Config Structs =====
11947
11948// ----- Source-to-Pay (S2C/S2P) -----
11949
11950/// Source-to-Pay configuration covering the entire sourcing lifecycle.
11951#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11952pub struct SourceToPayConfig {
11953    /// Enable source-to-pay generation
11954    #[serde(default)]
11955    pub enabled: bool,
11956    /// Spend analysis configuration
11957    #[serde(default)]
11958    pub spend_analysis: SpendAnalysisConfig,
11959    /// Sourcing project configuration
11960    #[serde(default)]
11961    pub sourcing: SourcingConfig,
11962    /// Supplier qualification configuration
11963    #[serde(default)]
11964    pub qualification: QualificationConfig,
11965    /// RFx event configuration
11966    #[serde(default)]
11967    pub rfx: RfxConfig,
11968    /// Contract configuration
11969    #[serde(default)]
11970    pub contracts: ContractConfig,
11971    /// Catalog configuration
11972    #[serde(default)]
11973    pub catalog: CatalogConfig,
11974    /// Scorecard configuration
11975    #[serde(default)]
11976    pub scorecards: ScorecardConfig,
11977    /// P2P integration settings
11978    #[serde(default)]
11979    pub p2p_integration: P2PIntegrationConfig,
11980}
11981
11982/// Spend analysis configuration.
11983#[derive(Debug, Clone, Serialize, Deserialize)]
11984pub struct SpendAnalysisConfig {
11985    /// HHI threshold for triggering sourcing project
11986    #[serde(default = "default_hhi_threshold")]
11987    pub hhi_threshold: f64,
11988    /// Target spend coverage under contracts
11989    #[serde(default = "default_contract_coverage_target")]
11990    pub contract_coverage_target: f64,
11991}
11992
11993impl Default for SpendAnalysisConfig {
11994    fn default() -> Self {
11995        Self {
11996            hhi_threshold: default_hhi_threshold(),
11997            contract_coverage_target: default_contract_coverage_target(),
11998        }
11999    }
12000}
12001
12002fn default_hhi_threshold() -> f64 {
12003    2500.0
12004}
12005fn default_contract_coverage_target() -> f64 {
12006    0.80
12007}
12008
12009/// Sourcing project configuration.
12010#[derive(Debug, Clone, Serialize, Deserialize)]
12011pub struct SourcingConfig {
12012    /// Number of sourcing projects per year
12013    #[serde(default = "default_sourcing_projects_per_year")]
12014    pub projects_per_year: u32,
12015    /// Months before expiry to trigger renewal project
12016    #[serde(default = "default_renewal_horizon_months")]
12017    pub renewal_horizon_months: u32,
12018    /// Average project duration in months
12019    #[serde(default = "default_project_duration_months")]
12020    pub project_duration_months: u32,
12021}
12022
12023impl Default for SourcingConfig {
12024    fn default() -> Self {
12025        Self {
12026            projects_per_year: default_sourcing_projects_per_year(),
12027            renewal_horizon_months: default_renewal_horizon_months(),
12028            project_duration_months: default_project_duration_months(),
12029        }
12030    }
12031}
12032
12033fn default_sourcing_projects_per_year() -> u32 {
12034    10
12035}
12036fn default_renewal_horizon_months() -> u32 {
12037    3
12038}
12039fn default_project_duration_months() -> u32 {
12040    4
12041}
12042
12043/// Supplier qualification configuration.
12044#[derive(Debug, Clone, Serialize, Deserialize)]
12045pub struct QualificationConfig {
12046    /// Pass rate for qualification
12047    #[serde(default = "default_qualification_pass_rate")]
12048    pub pass_rate: f64,
12049    /// Qualification validity in days
12050    #[serde(default = "default_qualification_validity_days")]
12051    pub validity_days: u32,
12052    /// Financial stability weight
12053    #[serde(default = "default_financial_weight")]
12054    pub financial_weight: f64,
12055    /// Quality management weight
12056    #[serde(default = "default_quality_weight")]
12057    pub quality_weight: f64,
12058    /// Delivery performance weight
12059    #[serde(default = "default_delivery_weight")]
12060    pub delivery_weight: f64,
12061    /// Compliance weight
12062    #[serde(default = "default_compliance_weight")]
12063    pub compliance_weight: f64,
12064}
12065
12066impl Default for QualificationConfig {
12067    fn default() -> Self {
12068        Self {
12069            pass_rate: default_qualification_pass_rate(),
12070            validity_days: default_qualification_validity_days(),
12071            financial_weight: default_financial_weight(),
12072            quality_weight: default_quality_weight(),
12073            delivery_weight: default_delivery_weight(),
12074            compliance_weight: default_compliance_weight(),
12075        }
12076    }
12077}
12078
12079fn default_qualification_pass_rate() -> f64 {
12080    0.75
12081}
12082fn default_qualification_validity_days() -> u32 {
12083    365
12084}
12085fn default_financial_weight() -> f64 {
12086    0.25
12087}
12088fn default_quality_weight() -> f64 {
12089    0.30
12090}
12091fn default_delivery_weight() -> f64 {
12092    0.25
12093}
12094fn default_compliance_weight() -> f64 {
12095    0.20
12096}
12097
12098/// RFx event configuration.
12099#[derive(Debug, Clone, Serialize, Deserialize)]
12100pub struct RfxConfig {
12101    /// Spend threshold above which RFI is required before RFP
12102    #[serde(default = "default_rfi_threshold")]
12103    pub rfi_threshold: f64,
12104    /// Minimum vendors invited per RFx
12105    #[serde(default = "default_min_invited_vendors")]
12106    pub min_invited_vendors: u32,
12107    /// Maximum vendors invited per RFx
12108    #[serde(default = "default_max_invited_vendors")]
12109    pub max_invited_vendors: u32,
12110    /// Response rate (% of invited vendors that submit bids)
12111    #[serde(default = "default_response_rate")]
12112    pub response_rate: f64,
12113    /// Default price weight in evaluation
12114    #[serde(default = "default_price_weight")]
12115    pub default_price_weight: f64,
12116    /// Default quality weight in evaluation
12117    #[serde(default = "default_rfx_quality_weight")]
12118    pub default_quality_weight: f64,
12119    /// Default delivery weight in evaluation
12120    #[serde(default = "default_rfx_delivery_weight")]
12121    pub default_delivery_weight: f64,
12122}
12123
12124impl Default for RfxConfig {
12125    fn default() -> Self {
12126        Self {
12127            rfi_threshold: default_rfi_threshold(),
12128            min_invited_vendors: default_min_invited_vendors(),
12129            max_invited_vendors: default_max_invited_vendors(),
12130            response_rate: default_response_rate(),
12131            default_price_weight: default_price_weight(),
12132            default_quality_weight: default_rfx_quality_weight(),
12133            default_delivery_weight: default_rfx_delivery_weight(),
12134        }
12135    }
12136}
12137
12138fn default_rfi_threshold() -> f64 {
12139    100_000.0
12140}
12141fn default_min_invited_vendors() -> u32 {
12142    3
12143}
12144fn default_max_invited_vendors() -> u32 {
12145    8
12146}
12147fn default_response_rate() -> f64 {
12148    0.70
12149}
12150fn default_price_weight() -> f64 {
12151    0.40
12152}
12153fn default_rfx_quality_weight() -> f64 {
12154    0.35
12155}
12156fn default_rfx_delivery_weight() -> f64 {
12157    0.25
12158}
12159
12160/// Contract configuration.
12161#[derive(Debug, Clone, Serialize, Deserialize)]
12162pub struct ContractConfig {
12163    /// Minimum contract duration in months
12164    #[serde(default = "default_min_contract_months")]
12165    pub min_duration_months: u32,
12166    /// Maximum contract duration in months
12167    #[serde(default = "default_max_contract_months")]
12168    pub max_duration_months: u32,
12169    /// Auto-renewal rate
12170    #[serde(default = "default_auto_renewal_rate")]
12171    pub auto_renewal_rate: f64,
12172    /// Amendment rate (% of contracts with at least one amendment)
12173    #[serde(default = "default_amendment_rate")]
12174    pub amendment_rate: f64,
12175    /// Distribution of contract types
12176    #[serde(default)]
12177    pub type_distribution: ContractTypeDistribution,
12178}
12179
12180impl Default for ContractConfig {
12181    fn default() -> Self {
12182        Self {
12183            min_duration_months: default_min_contract_months(),
12184            max_duration_months: default_max_contract_months(),
12185            auto_renewal_rate: default_auto_renewal_rate(),
12186            amendment_rate: default_amendment_rate(),
12187            type_distribution: ContractTypeDistribution::default(),
12188        }
12189    }
12190}
12191
12192fn default_min_contract_months() -> u32 {
12193    12
12194}
12195fn default_max_contract_months() -> u32 {
12196    36
12197}
12198fn default_auto_renewal_rate() -> f64 {
12199    0.40
12200}
12201fn default_amendment_rate() -> f64 {
12202    0.20
12203}
12204
12205/// Distribution of contract types.
12206#[derive(Debug, Clone, Serialize, Deserialize)]
12207pub struct ContractTypeDistribution {
12208    /// Fixed price percentage
12209    #[serde(default = "default_fixed_price_pct")]
12210    pub fixed_price: f64,
12211    /// Blanket/framework percentage
12212    #[serde(default = "default_blanket_pct")]
12213    pub blanket: f64,
12214    /// Time and materials percentage
12215    #[serde(default = "default_time_materials_pct")]
12216    pub time_and_materials: f64,
12217    /// Service agreement percentage
12218    #[serde(default = "default_service_agreement_pct")]
12219    pub service_agreement: f64,
12220}
12221
12222impl Default for ContractTypeDistribution {
12223    fn default() -> Self {
12224        Self {
12225            fixed_price: default_fixed_price_pct(),
12226            blanket: default_blanket_pct(),
12227            time_and_materials: default_time_materials_pct(),
12228            service_agreement: default_service_agreement_pct(),
12229        }
12230    }
12231}
12232
12233fn default_fixed_price_pct() -> f64 {
12234    0.40
12235}
12236fn default_blanket_pct() -> f64 {
12237    0.30
12238}
12239fn default_time_materials_pct() -> f64 {
12240    0.15
12241}
12242fn default_service_agreement_pct() -> f64 {
12243    0.15
12244}
12245
12246/// Catalog configuration.
12247#[derive(Debug, Clone, Serialize, Deserialize)]
12248pub struct CatalogConfig {
12249    /// Percentage of catalog items marked as preferred
12250    #[serde(default = "default_preferred_vendor_flag_rate")]
12251    pub preferred_vendor_flag_rate: f64,
12252    /// Rate of materials with multiple sources in catalog
12253    #[serde(default = "default_multi_source_rate")]
12254    pub multi_source_rate: f64,
12255}
12256
12257impl Default for CatalogConfig {
12258    fn default() -> Self {
12259        Self {
12260            preferred_vendor_flag_rate: default_preferred_vendor_flag_rate(),
12261            multi_source_rate: default_multi_source_rate(),
12262        }
12263    }
12264}
12265
12266fn default_preferred_vendor_flag_rate() -> f64 {
12267    0.70
12268}
12269fn default_multi_source_rate() -> f64 {
12270    0.25
12271}
12272
12273/// Scorecard configuration.
12274#[derive(Debug, Clone, Serialize, Deserialize)]
12275pub struct ScorecardConfig {
12276    /// Scorecard review frequency (quarterly, monthly)
12277    #[serde(default = "default_scorecard_frequency")]
12278    pub frequency: String,
12279    /// On-time delivery weight in overall score
12280    #[serde(default = "default_otd_weight")]
12281    pub on_time_delivery_weight: f64,
12282    /// Quality weight in overall score
12283    #[serde(default = "default_quality_score_weight")]
12284    pub quality_weight: f64,
12285    /// Price competitiveness weight
12286    #[serde(default = "default_price_score_weight")]
12287    pub price_weight: f64,
12288    /// Responsiveness weight
12289    #[serde(default = "default_responsiveness_weight")]
12290    pub responsiveness_weight: f64,
12291    /// Grade A threshold (score >= this)
12292    #[serde(default = "default_grade_a_threshold")]
12293    pub grade_a_threshold: f64,
12294    /// Grade B threshold
12295    #[serde(default = "default_grade_b_threshold")]
12296    pub grade_b_threshold: f64,
12297    /// Grade C threshold
12298    #[serde(default = "default_grade_c_threshold")]
12299    pub grade_c_threshold: f64,
12300}
12301
12302impl Default for ScorecardConfig {
12303    fn default() -> Self {
12304        Self {
12305            frequency: default_scorecard_frequency(),
12306            on_time_delivery_weight: default_otd_weight(),
12307            quality_weight: default_quality_score_weight(),
12308            price_weight: default_price_score_weight(),
12309            responsiveness_weight: default_responsiveness_weight(),
12310            grade_a_threshold: default_grade_a_threshold(),
12311            grade_b_threshold: default_grade_b_threshold(),
12312            grade_c_threshold: default_grade_c_threshold(),
12313        }
12314    }
12315}
12316
12317fn default_scorecard_frequency() -> String {
12318    "quarterly".to_string()
12319}
12320fn default_otd_weight() -> f64 {
12321    0.30
12322}
12323fn default_quality_score_weight() -> f64 {
12324    0.30
12325}
12326fn default_price_score_weight() -> f64 {
12327    0.25
12328}
12329fn default_responsiveness_weight() -> f64 {
12330    0.15
12331}
12332fn default_grade_a_threshold() -> f64 {
12333    90.0
12334}
12335fn default_grade_b_threshold() -> f64 {
12336    75.0
12337}
12338fn default_grade_c_threshold() -> f64 {
12339    60.0
12340}
12341
12342/// P2P integration settings for contract enforcement.
12343#[derive(Debug, Clone, Serialize, Deserialize)]
12344pub struct P2PIntegrationConfig {
12345    /// Rate of off-contract (maverick) purchases
12346    #[serde(default = "default_off_contract_rate")]
12347    pub off_contract_rate: f64,
12348    /// Price tolerance for contract price validation
12349    #[serde(default = "default_price_tolerance")]
12350    pub price_tolerance: f64,
12351    /// Whether to enforce catalog ordering
12352    #[serde(default)]
12353    pub catalog_enforcement: bool,
12354}
12355
12356impl Default for P2PIntegrationConfig {
12357    fn default() -> Self {
12358        Self {
12359            off_contract_rate: default_off_contract_rate(),
12360            price_tolerance: default_price_tolerance(),
12361            catalog_enforcement: false,
12362        }
12363    }
12364}
12365
12366fn default_off_contract_rate() -> f64 {
12367    0.15
12368}
12369fn default_price_tolerance() -> f64 {
12370    0.02
12371}
12372
12373// ----- Financial Reporting -----
12374
12375/// Financial reporting configuration.
12376#[derive(Debug, Clone, Serialize, Deserialize)]
12377pub struct FinancialReportingConfig {
12378    /// Enable financial reporting generation
12379    #[serde(default)]
12380    pub enabled: bool,
12381    /// Generate balance sheet
12382    #[serde(default = "default_true")]
12383    pub generate_balance_sheet: bool,
12384    /// Generate income statement
12385    #[serde(default = "default_true")]
12386    pub generate_income_statement: bool,
12387    /// Generate cash flow statement
12388    #[serde(default = "default_true")]
12389    pub generate_cash_flow: bool,
12390    /// Generate changes in equity statement
12391    #[serde(default = "default_true")]
12392    pub generate_changes_in_equity: bool,
12393    /// Number of comparative periods
12394    #[serde(default = "default_comparative_periods")]
12395    pub comparative_periods: u32,
12396    /// Management KPIs configuration
12397    #[serde(default)]
12398    pub management_kpis: ManagementKpisConfig,
12399    /// Budget configuration
12400    #[serde(default)]
12401    pub budgets: BudgetConfig,
12402}
12403
12404impl Default for FinancialReportingConfig {
12405    fn default() -> Self {
12406        Self {
12407            enabled: false,
12408            generate_balance_sheet: true,
12409            generate_income_statement: true,
12410            generate_cash_flow: true,
12411            generate_changes_in_equity: true,
12412            comparative_periods: default_comparative_periods(),
12413            management_kpis: ManagementKpisConfig::default(),
12414            budgets: BudgetConfig::default(),
12415        }
12416    }
12417}
12418
12419fn default_comparative_periods() -> u32 {
12420    1
12421}
12422
12423/// Management KPIs configuration.
12424#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12425pub struct ManagementKpisConfig {
12426    /// Enable KPI generation
12427    #[serde(default)]
12428    pub enabled: bool,
12429    /// KPI calculation frequency (monthly, quarterly)
12430    #[serde(default = "default_kpi_frequency")]
12431    pub frequency: String,
12432}
12433
12434fn default_kpi_frequency() -> String {
12435    "monthly".to_string()
12436}
12437
12438/// Budget configuration.
12439#[derive(Debug, Clone, Serialize, Deserialize)]
12440pub struct BudgetConfig {
12441    /// Enable budget generation
12442    #[serde(default)]
12443    pub enabled: bool,
12444    /// Expected revenue growth rate for budgeting
12445    #[serde(default = "default_revenue_growth_rate")]
12446    pub revenue_growth_rate: f64,
12447    /// Expected expense inflation rate
12448    #[serde(default = "default_expense_inflation_rate")]
12449    pub expense_inflation_rate: f64,
12450    /// Random noise to add to budget vs actual
12451    #[serde(default = "default_variance_noise")]
12452    pub variance_noise: f64,
12453}
12454
12455impl Default for BudgetConfig {
12456    fn default() -> Self {
12457        Self {
12458            enabled: false,
12459            revenue_growth_rate: default_revenue_growth_rate(),
12460            expense_inflation_rate: default_expense_inflation_rate(),
12461            variance_noise: default_variance_noise(),
12462        }
12463    }
12464}
12465
12466fn default_revenue_growth_rate() -> f64 {
12467    0.05
12468}
12469fn default_expense_inflation_rate() -> f64 {
12470    0.03
12471}
12472fn default_variance_noise() -> f64 {
12473    0.10
12474}
12475
12476// ----- HR Configuration -----
12477
12478/// HR (Hire-to-Retire) process configuration.
12479#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12480pub struct HrConfig {
12481    /// Enable HR generation
12482    #[serde(default)]
12483    pub enabled: bool,
12484    /// Payroll configuration
12485    #[serde(default)]
12486    pub payroll: PayrollConfig,
12487    /// Time and attendance configuration
12488    #[serde(default)]
12489    pub time_attendance: TimeAttendanceConfig,
12490    /// Expense management configuration
12491    #[serde(default)]
12492    pub expenses: ExpenseConfig,
12493}
12494
12495/// Payroll configuration.
12496#[derive(Debug, Clone, Serialize, Deserialize)]
12497pub struct PayrollConfig {
12498    /// Enable payroll generation
12499    #[serde(default = "default_true")]
12500    pub enabled: bool,
12501    /// Pay frequency (monthly, biweekly, weekly)
12502    #[serde(default = "default_pay_frequency")]
12503    pub pay_frequency: String,
12504    /// Salary ranges by job level
12505    #[serde(default)]
12506    pub salary_ranges: PayrollSalaryRanges,
12507    /// Effective tax rates
12508    #[serde(default)]
12509    pub tax_rates: PayrollTaxRates,
12510    /// Benefits enrollment rate
12511    #[serde(default = "default_benefits_enrollment_rate")]
12512    pub benefits_enrollment_rate: f64,
12513    /// Retirement plan participation rate
12514    #[serde(default = "default_retirement_participation_rate")]
12515    pub retirement_participation_rate: f64,
12516}
12517
12518impl Default for PayrollConfig {
12519    fn default() -> Self {
12520        Self {
12521            enabled: true,
12522            pay_frequency: default_pay_frequency(),
12523            salary_ranges: PayrollSalaryRanges::default(),
12524            tax_rates: PayrollTaxRates::default(),
12525            benefits_enrollment_rate: default_benefits_enrollment_rate(),
12526            retirement_participation_rate: default_retirement_participation_rate(),
12527        }
12528    }
12529}
12530
12531fn default_pay_frequency() -> String {
12532    "monthly".to_string()
12533}
12534fn default_benefits_enrollment_rate() -> f64 {
12535    0.60
12536}
12537fn default_retirement_participation_rate() -> f64 {
12538    0.45
12539}
12540
12541/// Salary ranges by job level.
12542#[derive(Debug, Clone, Serialize, Deserialize)]
12543pub struct PayrollSalaryRanges {
12544    /// Staff level min/max
12545    #[serde(default = "default_staff_min")]
12546    pub staff_min: f64,
12547    #[serde(default = "default_staff_max")]
12548    pub staff_max: f64,
12549    /// Manager level min/max
12550    #[serde(default = "default_manager_min")]
12551    pub manager_min: f64,
12552    #[serde(default = "default_manager_max")]
12553    pub manager_max: f64,
12554    /// Director level min/max
12555    #[serde(default = "default_director_min")]
12556    pub director_min: f64,
12557    #[serde(default = "default_director_max")]
12558    pub director_max: f64,
12559    /// Executive level min/max
12560    #[serde(default = "default_executive_min")]
12561    pub executive_min: f64,
12562    #[serde(default = "default_executive_max")]
12563    pub executive_max: f64,
12564}
12565
12566impl Default for PayrollSalaryRanges {
12567    fn default() -> Self {
12568        Self {
12569            staff_min: default_staff_min(),
12570            staff_max: default_staff_max(),
12571            manager_min: default_manager_min(),
12572            manager_max: default_manager_max(),
12573            director_min: default_director_min(),
12574            director_max: default_director_max(),
12575            executive_min: default_executive_min(),
12576            executive_max: default_executive_max(),
12577        }
12578    }
12579}
12580
12581fn default_staff_min() -> f64 {
12582    50_000.0
12583}
12584fn default_staff_max() -> f64 {
12585    70_000.0
12586}
12587fn default_manager_min() -> f64 {
12588    80_000.0
12589}
12590fn default_manager_max() -> f64 {
12591    120_000.0
12592}
12593fn default_director_min() -> f64 {
12594    120_000.0
12595}
12596fn default_director_max() -> f64 {
12597    180_000.0
12598}
12599fn default_executive_min() -> f64 {
12600    180_000.0
12601}
12602fn default_executive_max() -> f64 {
12603    350_000.0
12604}
12605
12606/// Effective tax rates for payroll.
12607#[derive(Debug, Clone, Serialize, Deserialize)]
12608pub struct PayrollTaxRates {
12609    /// Federal effective tax rate
12610    #[serde(default = "default_federal_rate")]
12611    pub federal_effective: f64,
12612    /// State effective tax rate
12613    #[serde(default = "default_state_rate")]
12614    pub state_effective: f64,
12615    /// FICA/social security rate
12616    #[serde(default = "default_fica_rate")]
12617    pub fica: f64,
12618}
12619
12620impl Default for PayrollTaxRates {
12621    fn default() -> Self {
12622        Self {
12623            federal_effective: default_federal_rate(),
12624            state_effective: default_state_rate(),
12625            fica: default_fica_rate(),
12626        }
12627    }
12628}
12629
12630fn default_federal_rate() -> f64 {
12631    0.22
12632}
12633fn default_state_rate() -> f64 {
12634    0.05
12635}
12636fn default_fica_rate() -> f64 {
12637    0.0765
12638}
12639
12640/// Time and attendance configuration.
12641#[derive(Debug, Clone, Serialize, Deserialize)]
12642pub struct TimeAttendanceConfig {
12643    /// Enable time tracking
12644    #[serde(default = "default_true")]
12645    pub enabled: bool,
12646    /// Overtime rate (% of employees with overtime in a period)
12647    #[serde(default = "default_overtime_rate")]
12648    pub overtime_rate: f64,
12649}
12650
12651impl Default for TimeAttendanceConfig {
12652    fn default() -> Self {
12653        Self {
12654            enabled: true,
12655            overtime_rate: default_overtime_rate(),
12656        }
12657    }
12658}
12659
12660fn default_overtime_rate() -> f64 {
12661    0.10
12662}
12663
12664/// Expense management configuration.
12665#[derive(Debug, Clone, Serialize, Deserialize)]
12666pub struct ExpenseConfig {
12667    /// Enable expense report generation
12668    #[serde(default = "default_true")]
12669    pub enabled: bool,
12670    /// Rate of employees submitting expenses per month
12671    #[serde(default = "default_expense_submission_rate")]
12672    pub submission_rate: f64,
12673    /// Rate of policy violations
12674    #[serde(default = "default_policy_violation_rate")]
12675    pub policy_violation_rate: f64,
12676}
12677
12678impl Default for ExpenseConfig {
12679    fn default() -> Self {
12680        Self {
12681            enabled: true,
12682            submission_rate: default_expense_submission_rate(),
12683            policy_violation_rate: default_policy_violation_rate(),
12684        }
12685    }
12686}
12687
12688fn default_expense_submission_rate() -> f64 {
12689    0.30
12690}
12691fn default_policy_violation_rate() -> f64 {
12692    0.08
12693}
12694
12695// ----- Manufacturing Configuration -----
12696
12697/// Manufacturing process configuration (production orders, WIP, routing).
12698#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12699pub struct ManufacturingProcessConfig {
12700    /// Enable manufacturing generation
12701    #[serde(default)]
12702    pub enabled: bool,
12703    /// Production order configuration
12704    #[serde(default)]
12705    pub production_orders: ProductionOrderConfig,
12706    /// Costing configuration
12707    #[serde(default)]
12708    pub costing: ManufacturingCostingConfig,
12709    /// Routing configuration
12710    #[serde(default)]
12711    pub routing: RoutingConfig,
12712}
12713
12714/// Production order configuration.
12715#[derive(Debug, Clone, Serialize, Deserialize)]
12716pub struct ProductionOrderConfig {
12717    /// Orders per month
12718    #[serde(default = "default_prod_orders_per_month")]
12719    pub orders_per_month: u32,
12720    /// Average batch size
12721    #[serde(default = "default_prod_avg_batch_size")]
12722    pub avg_batch_size: u32,
12723    /// Yield rate
12724    #[serde(default = "default_prod_yield_rate")]
12725    pub yield_rate: f64,
12726    /// Make-to-order rate (vs make-to-stock)
12727    #[serde(default = "default_prod_make_to_order_rate")]
12728    pub make_to_order_rate: f64,
12729    /// Rework rate
12730    #[serde(default = "default_prod_rework_rate")]
12731    pub rework_rate: f64,
12732}
12733
12734impl Default for ProductionOrderConfig {
12735    fn default() -> Self {
12736        Self {
12737            orders_per_month: default_prod_orders_per_month(),
12738            avg_batch_size: default_prod_avg_batch_size(),
12739            yield_rate: default_prod_yield_rate(),
12740            make_to_order_rate: default_prod_make_to_order_rate(),
12741            rework_rate: default_prod_rework_rate(),
12742        }
12743    }
12744}
12745
12746fn default_prod_orders_per_month() -> u32 {
12747    50
12748}
12749fn default_prod_avg_batch_size() -> u32 {
12750    100
12751}
12752fn default_prod_yield_rate() -> f64 {
12753    0.97
12754}
12755fn default_prod_make_to_order_rate() -> f64 {
12756    0.20
12757}
12758fn default_prod_rework_rate() -> f64 {
12759    0.03
12760}
12761
12762/// Manufacturing costing configuration.
12763#[derive(Debug, Clone, Serialize, Deserialize)]
12764pub struct ManufacturingCostingConfig {
12765    /// Labor rate per hour
12766    #[serde(default = "default_labor_rate")]
12767    pub labor_rate_per_hour: f64,
12768    /// Overhead application rate (multiplier on direct labor)
12769    #[serde(default = "default_overhead_rate")]
12770    pub overhead_rate: f64,
12771    /// Standard cost update frequency
12772    #[serde(default = "default_cost_update_frequency")]
12773    pub standard_cost_update_frequency: String,
12774}
12775
12776impl Default for ManufacturingCostingConfig {
12777    fn default() -> Self {
12778        Self {
12779            labor_rate_per_hour: default_labor_rate(),
12780            overhead_rate: default_overhead_rate(),
12781            standard_cost_update_frequency: default_cost_update_frequency(),
12782        }
12783    }
12784}
12785
12786fn default_labor_rate() -> f64 {
12787    35.0
12788}
12789fn default_overhead_rate() -> f64 {
12790    1.50
12791}
12792fn default_cost_update_frequency() -> String {
12793    "quarterly".to_string()
12794}
12795
12796/// Routing configuration for production operations.
12797#[derive(Debug, Clone, Serialize, Deserialize)]
12798pub struct RoutingConfig {
12799    /// Average number of operations per routing
12800    #[serde(default = "default_avg_operations")]
12801    pub avg_operations: u32,
12802    /// Average setup time in hours
12803    #[serde(default = "default_setup_time")]
12804    pub setup_time_hours: f64,
12805    /// Run time variation coefficient
12806    #[serde(default = "default_run_time_variation")]
12807    pub run_time_variation: f64,
12808}
12809
12810impl Default for RoutingConfig {
12811    fn default() -> Self {
12812        Self {
12813            avg_operations: default_avg_operations(),
12814            setup_time_hours: default_setup_time(),
12815            run_time_variation: default_run_time_variation(),
12816        }
12817    }
12818}
12819
12820fn default_avg_operations() -> u32 {
12821    4
12822}
12823fn default_setup_time() -> f64 {
12824    1.5
12825}
12826fn default_run_time_variation() -> f64 {
12827    0.15
12828}
12829
12830// ----- Sales Quote Configuration -----
12831
12832/// Sales quote (quote-to-order) pipeline configuration.
12833#[derive(Debug, Clone, Serialize, Deserialize)]
12834pub struct SalesQuoteConfig {
12835    /// Enable sales quote generation
12836    #[serde(default)]
12837    pub enabled: bool,
12838    /// Quotes per month
12839    #[serde(default = "default_quotes_per_month")]
12840    pub quotes_per_month: u32,
12841    /// Win rate (fraction of quotes that convert to orders)
12842    #[serde(default = "default_quote_win_rate")]
12843    pub win_rate: f64,
12844    /// Average quote validity in days
12845    #[serde(default = "default_quote_validity_days")]
12846    pub validity_days: u32,
12847}
12848
12849impl Default for SalesQuoteConfig {
12850    fn default() -> Self {
12851        Self {
12852            enabled: false,
12853            quotes_per_month: default_quotes_per_month(),
12854            win_rate: default_quote_win_rate(),
12855            validity_days: default_quote_validity_days(),
12856        }
12857    }
12858}
12859
12860fn default_quotes_per_month() -> u32 {
12861    30
12862}
12863fn default_quote_win_rate() -> f64 {
12864    0.35
12865}
12866fn default_quote_validity_days() -> u32 {
12867    30
12868}
12869
12870// =============================================================================
12871// Tax Accounting Configuration
12872// =============================================================================
12873
12874/// Tax accounting configuration.
12875///
12876/// Controls generation of tax-related data including VAT/GST, sales tax,
12877/// withholding tax, tax provisions, and payroll tax across multiple jurisdictions.
12878#[derive(Debug, Clone, Serialize, Deserialize)]
12879pub struct TaxConfig {
12880    /// Whether tax generation is enabled.
12881    #[serde(default)]
12882    pub enabled: bool,
12883    /// Tax jurisdiction configuration.
12884    #[serde(default)]
12885    pub jurisdictions: TaxJurisdictionConfig,
12886    /// VAT/GST configuration.
12887    #[serde(default)]
12888    pub vat_gst: VatGstConfig,
12889    /// Sales tax configuration.
12890    #[serde(default)]
12891    pub sales_tax: SalesTaxConfig,
12892    /// Withholding tax configuration.
12893    #[serde(default)]
12894    pub withholding: WithholdingTaxSchemaConfig,
12895    /// Tax provision configuration.
12896    #[serde(default)]
12897    pub provisions: TaxProvisionSchemaConfig,
12898    /// Payroll tax configuration.
12899    #[serde(default)]
12900    pub payroll_tax: PayrollTaxSchemaConfig,
12901    /// Anomaly injection rate for tax data (0.0 to 1.0).
12902    #[serde(default = "default_tax_anomaly_rate")]
12903    pub anomaly_rate: f64,
12904}
12905
12906fn default_tax_anomaly_rate() -> f64 {
12907    0.03
12908}
12909
12910impl Default for TaxConfig {
12911    fn default() -> Self {
12912        Self {
12913            enabled: false,
12914            jurisdictions: TaxJurisdictionConfig::default(),
12915            vat_gst: VatGstConfig::default(),
12916            sales_tax: SalesTaxConfig::default(),
12917            withholding: WithholdingTaxSchemaConfig::default(),
12918            provisions: TaxProvisionSchemaConfig::default(),
12919            payroll_tax: PayrollTaxSchemaConfig::default(),
12920            anomaly_rate: default_tax_anomaly_rate(),
12921        }
12922    }
12923}
12924
12925/// Tax jurisdiction configuration.
12926///
12927/// Specifies which countries and subnational jurisdictions to include
12928/// when generating tax data.
12929#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12930pub struct TaxJurisdictionConfig {
12931    /// List of country codes to include (e.g., ["US", "DE", "GB"]).
12932    #[serde(default)]
12933    pub countries: Vec<String>,
12934    /// Whether to include subnational jurisdictions (e.g., US states, Canadian provinces).
12935    #[serde(default)]
12936    pub include_subnational: bool,
12937}
12938
12939/// VAT/GST configuration.
12940///
12941/// Controls generation of Value Added Tax / Goods and Services Tax data,
12942/// including standard and reduced rates, exempt categories, and reverse charge.
12943#[derive(Debug, Clone, Serialize, Deserialize)]
12944pub struct VatGstConfig {
12945    /// Whether VAT/GST generation is enabled.
12946    #[serde(default)]
12947    pub enabled: bool,
12948    /// Standard VAT/GST rates by country code (e.g., {"DE": 0.19, "GB": 0.20}).
12949    #[serde(default)]
12950    pub standard_rates: std::collections::HashMap<String, f64>,
12951    /// Reduced VAT/GST rates by country code (e.g., {"DE": 0.07, "GB": 0.05}).
12952    #[serde(default)]
12953    pub reduced_rates: std::collections::HashMap<String, f64>,
12954    /// Categories exempt from VAT/GST (e.g., ["financial_services", "healthcare"]).
12955    #[serde(default)]
12956    pub exempt_categories: Vec<String>,
12957    /// Whether to apply reverse charge mechanism for cross-border B2B transactions.
12958    #[serde(default = "default_true")]
12959    pub reverse_charge: bool,
12960}
12961
12962impl Default for VatGstConfig {
12963    fn default() -> Self {
12964        Self {
12965            enabled: false,
12966            standard_rates: std::collections::HashMap::new(),
12967            reduced_rates: std::collections::HashMap::new(),
12968            exempt_categories: Vec::new(),
12969            reverse_charge: true,
12970        }
12971    }
12972}
12973
12974/// Sales tax configuration.
12975///
12976/// Controls generation of US-style sales tax data including nexus determination.
12977#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12978pub struct SalesTaxConfig {
12979    /// Whether sales tax generation is enabled.
12980    #[serde(default)]
12981    pub enabled: bool,
12982    /// US states where the company has nexus (e.g., ["CA", "NY", "TX"]).
12983    #[serde(default)]
12984    pub nexus_states: Vec<String>,
12985}
12986
12987/// Withholding tax configuration.
12988///
12989/// Controls generation of withholding tax data for cross-border payments,
12990/// including treaty network and rate overrides.
12991#[derive(Debug, Clone, Serialize, Deserialize)]
12992pub struct WithholdingTaxSchemaConfig {
12993    /// Whether withholding tax generation is enabled.
12994    #[serde(default)]
12995    pub enabled: bool,
12996    /// Whether to simulate a treaty network with reduced rates.
12997    #[serde(default = "default_true")]
12998    pub treaty_network: bool,
12999    /// Default withholding tax rate for non-treaty countries (0.0 to 1.0).
13000    #[serde(default = "default_withholding_rate")]
13001    pub default_rate: f64,
13002    /// Reduced withholding tax rate for treaty countries (0.0 to 1.0).
13003    #[serde(default = "default_treaty_reduced_rate")]
13004    pub treaty_reduced_rate: f64,
13005}
13006
13007fn default_withholding_rate() -> f64 {
13008    0.30
13009}
13010
13011fn default_treaty_reduced_rate() -> f64 {
13012    0.15
13013}
13014
13015impl Default for WithholdingTaxSchemaConfig {
13016    fn default() -> Self {
13017        Self {
13018            enabled: false,
13019            treaty_network: true,
13020            default_rate: default_withholding_rate(),
13021            treaty_reduced_rate: default_treaty_reduced_rate(),
13022        }
13023    }
13024}
13025
13026/// Tax provision configuration.
13027///
13028/// Controls generation of tax provision data including statutory rates
13029/// and uncertain tax positions (ASC 740 / IAS 12).
13030#[derive(Debug, Clone, Serialize, Deserialize)]
13031pub struct TaxProvisionSchemaConfig {
13032    /// Whether tax provision generation is enabled.
13033    /// Defaults to true when tax is enabled, as provisions are typically required.
13034    #[serde(default = "default_true")]
13035    pub enabled: bool,
13036    /// Statutory corporate tax rate (0.0 to 1.0).
13037    #[serde(default = "default_statutory_rate")]
13038    pub statutory_rate: f64,
13039    /// Whether to generate uncertain tax positions (FIN 48 / IFRIC 23).
13040    #[serde(default = "default_true")]
13041    pub uncertain_positions: bool,
13042}
13043
13044fn default_statutory_rate() -> f64 {
13045    0.21
13046}
13047
13048impl Default for TaxProvisionSchemaConfig {
13049    fn default() -> Self {
13050        Self {
13051            enabled: true,
13052            statutory_rate: default_statutory_rate(),
13053            uncertain_positions: true,
13054        }
13055    }
13056}
13057
13058/// Payroll tax configuration.
13059///
13060/// Controls generation of payroll tax data (employer/employee contributions,
13061/// social security, Medicare, etc.).
13062#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13063pub struct PayrollTaxSchemaConfig {
13064    /// Whether payroll tax generation is enabled.
13065    #[serde(default)]
13066    pub enabled: bool,
13067}
13068
13069// ---------------------------------------------------------------------------
13070// Treasury & Cash Management Configuration
13071// ---------------------------------------------------------------------------
13072
13073/// Treasury and cash management configuration.
13074///
13075/// Controls generation of cash positions, forecasts, pooling, hedging
13076/// instruments (ASC 815 / IFRS 9), debt instruments with covenants,
13077/// bank guarantees, and intercompany netting runs.
13078#[derive(Debug, Clone, Serialize, Deserialize)]
13079pub struct TreasuryConfig {
13080    /// Whether treasury generation is enabled.
13081    #[serde(default)]
13082    pub enabled: bool,
13083    /// Cash positioning configuration.
13084    #[serde(default)]
13085    pub cash_positioning: CashPositioningConfig,
13086    /// Cash forecasting configuration.
13087    #[serde(default)]
13088    pub cash_forecasting: CashForecastingConfig,
13089    /// Cash pooling configuration.
13090    #[serde(default)]
13091    pub cash_pooling: CashPoolingConfig,
13092    /// Hedging configuration (FX forwards, IR swaps, etc.).
13093    #[serde(default)]
13094    pub hedging: HedgingSchemaConfig,
13095    /// Debt instrument and covenant configuration.
13096    #[serde(default)]
13097    pub debt: DebtSchemaConfig,
13098    /// Intercompany netting configuration.
13099    #[serde(default)]
13100    pub netting: NettingSchemaConfig,
13101    /// Bank guarantee / letter of credit configuration.
13102    #[serde(default)]
13103    pub bank_guarantees: BankGuaranteeSchemaConfig,
13104    /// Anomaly injection rate for treasury data (0.0 to 1.0).
13105    #[serde(default = "default_treasury_anomaly_rate")]
13106    pub anomaly_rate: f64,
13107}
13108
13109fn default_treasury_anomaly_rate() -> f64 {
13110    0.02
13111}
13112
13113impl Default for TreasuryConfig {
13114    fn default() -> Self {
13115        Self {
13116            enabled: false,
13117            cash_positioning: CashPositioningConfig::default(),
13118            cash_forecasting: CashForecastingConfig::default(),
13119            cash_pooling: CashPoolingConfig::default(),
13120            hedging: HedgingSchemaConfig::default(),
13121            debt: DebtSchemaConfig::default(),
13122            netting: NettingSchemaConfig::default(),
13123            bank_guarantees: BankGuaranteeSchemaConfig::default(),
13124            anomaly_rate: default_treasury_anomaly_rate(),
13125        }
13126    }
13127}
13128
13129/// Cash positioning configuration.
13130///
13131/// Controls daily cash position generation per entity/bank account.
13132#[derive(Debug, Clone, Serialize, Deserialize)]
13133pub struct CashPositioningConfig {
13134    /// Whether cash positioning is enabled.
13135    #[serde(default = "default_true")]
13136    pub enabled: bool,
13137    /// Position generation frequency.
13138    #[serde(default = "default_cash_frequency")]
13139    pub frequency: String,
13140    /// Minimum cash balance policy threshold.
13141    #[serde(default = "default_minimum_balance_policy")]
13142    pub minimum_balance_policy: f64,
13143}
13144
13145fn default_cash_frequency() -> String {
13146    "daily".to_string()
13147}
13148
13149fn default_minimum_balance_policy() -> f64 {
13150    100_000.0
13151}
13152
13153impl Default for CashPositioningConfig {
13154    fn default() -> Self {
13155        Self {
13156            enabled: true,
13157            frequency: default_cash_frequency(),
13158            minimum_balance_policy: default_minimum_balance_policy(),
13159        }
13160    }
13161}
13162
13163/// Cash forecasting configuration.
13164///
13165/// Controls forward-looking cash forecast generation with probability-weighted items.
13166#[derive(Debug, Clone, Serialize, Deserialize)]
13167pub struct CashForecastingConfig {
13168    /// Whether cash forecasting is enabled.
13169    #[serde(default = "default_true")]
13170    pub enabled: bool,
13171    /// Number of days to forecast into the future.
13172    #[serde(default = "default_horizon_days")]
13173    pub horizon_days: u32,
13174    /// AR collection probability curve type ("aging" or "flat").
13175    #[serde(default = "default_ar_probability_curve")]
13176    pub ar_collection_probability_curve: String,
13177    /// Confidence interval for the forecast (0.0 to 1.0).
13178    #[serde(default = "default_confidence_interval")]
13179    pub confidence_interval: f64,
13180}
13181
13182fn default_horizon_days() -> u32 {
13183    90
13184}
13185
13186fn default_ar_probability_curve() -> String {
13187    "aging".to_string()
13188}
13189
13190fn default_confidence_interval() -> f64 {
13191    0.90
13192}
13193
13194impl Default for CashForecastingConfig {
13195    fn default() -> Self {
13196        Self {
13197            enabled: true,
13198            horizon_days: default_horizon_days(),
13199            ar_collection_probability_curve: default_ar_probability_curve(),
13200            confidence_interval: default_confidence_interval(),
13201        }
13202    }
13203}
13204
13205/// Cash pooling configuration.
13206///
13207/// Controls cash pool structure generation (physical, notional, zero-balancing).
13208#[derive(Debug, Clone, Serialize, Deserialize)]
13209pub struct CashPoolingConfig {
13210    /// Whether cash pooling is enabled.
13211    #[serde(default)]
13212    pub enabled: bool,
13213    /// Pool type: "physical_pooling", "notional_pooling", or "zero_balancing".
13214    #[serde(default = "default_pool_type")]
13215    pub pool_type: String,
13216    /// Time of day when sweeps occur (HH:MM format).
13217    #[serde(default = "default_sweep_time")]
13218    pub sweep_time: String,
13219}
13220
13221fn default_pool_type() -> String {
13222    "zero_balancing".to_string()
13223}
13224
13225fn default_sweep_time() -> String {
13226    "16:00".to_string()
13227}
13228
13229impl Default for CashPoolingConfig {
13230    fn default() -> Self {
13231        Self {
13232            enabled: false,
13233            pool_type: default_pool_type(),
13234            sweep_time: default_sweep_time(),
13235        }
13236    }
13237}
13238
13239/// Hedging configuration.
13240///
13241/// Controls generation of hedging instruments and hedge relationship designations
13242/// under ASC 815 / IFRS 9.
13243#[derive(Debug, Clone, Serialize, Deserialize)]
13244pub struct HedgingSchemaConfig {
13245    /// Whether hedging generation is enabled.
13246    #[serde(default)]
13247    pub enabled: bool,
13248    /// Target hedge ratio (0.0 to 1.0). Proportion of FX exposure to hedge.
13249    #[serde(default = "default_hedge_ratio")]
13250    pub hedge_ratio: f64,
13251    /// Types of instruments to generate (e.g., ["fx_forward", "interest_rate_swap"]).
13252    #[serde(default = "default_hedge_instruments")]
13253    pub instruments: Vec<String>,
13254    /// Whether to designate formal hedge accounting relationships.
13255    #[serde(default = "default_true")]
13256    pub hedge_accounting: bool,
13257    /// Effectiveness testing method: "dollar_offset", "regression", or "critical_terms".
13258    #[serde(default = "default_effectiveness_method")]
13259    pub effectiveness_method: String,
13260}
13261
13262fn default_hedge_ratio() -> f64 {
13263    0.75
13264}
13265
13266fn default_hedge_instruments() -> Vec<String> {
13267    vec!["fx_forward".to_string(), "interest_rate_swap".to_string()]
13268}
13269
13270fn default_effectiveness_method() -> String {
13271    "regression".to_string()
13272}
13273
13274impl Default for HedgingSchemaConfig {
13275    fn default() -> Self {
13276        Self {
13277            enabled: false,
13278            hedge_ratio: default_hedge_ratio(),
13279            instruments: default_hedge_instruments(),
13280            hedge_accounting: true,
13281            effectiveness_method: default_effectiveness_method(),
13282        }
13283    }
13284}
13285
13286/// Debt instrument configuration.
13287///
13288/// Controls generation of debt instruments (term loans, revolving credit, bonds)
13289/// with amortization schedules and financial covenants.
13290#[derive(Debug, Clone, Default, Serialize, Deserialize)]
13291pub struct DebtSchemaConfig {
13292    /// Whether debt instrument generation is enabled.
13293    #[serde(default)]
13294    pub enabled: bool,
13295    /// Debt instrument definitions.
13296    #[serde(default)]
13297    pub instruments: Vec<DebtInstrumentDef>,
13298    /// Covenant definitions.
13299    #[serde(default)]
13300    pub covenants: Vec<CovenantDef>,
13301}
13302
13303/// Definition of a debt instrument in configuration.
13304#[derive(Debug, Clone, Serialize, Deserialize)]
13305pub struct DebtInstrumentDef {
13306    /// Instrument type: "term_loan", "revolving_credit", "bond", "commercial_paper", "bridge_loan".
13307    #[serde(rename = "type")]
13308    pub instrument_type: String,
13309    /// Principal amount (for term loans, bonds).
13310    #[serde(default)]
13311    pub principal: Option<f64>,
13312    /// Interest rate (annual, as decimal fraction).
13313    #[serde(default)]
13314    pub rate: Option<f64>,
13315    /// Maturity in months.
13316    #[serde(default)]
13317    pub maturity_months: Option<u32>,
13318    /// Facility limit (for revolving credit).
13319    #[serde(default)]
13320    pub facility: Option<f64>,
13321}
13322
13323/// Definition of a debt covenant in configuration.
13324#[derive(Debug, Clone, Serialize, Deserialize)]
13325pub struct CovenantDef {
13326    /// Covenant type: "debt_to_equity", "interest_coverage", "current_ratio",
13327    /// "net_worth", "debt_to_ebitda", "fixed_charge_coverage".
13328    #[serde(rename = "type")]
13329    pub covenant_type: String,
13330    /// Covenant threshold value.
13331    pub threshold: f64,
13332}
13333
13334/// Intercompany netting configuration.
13335///
13336/// Controls generation of multilateral netting runs.
13337#[derive(Debug, Clone, Serialize, Deserialize)]
13338pub struct NettingSchemaConfig {
13339    /// Whether netting generation is enabled.
13340    #[serde(default)]
13341    pub enabled: bool,
13342    /// Netting cycle: "daily", "weekly", or "monthly".
13343    #[serde(default = "default_netting_cycle")]
13344    pub cycle: String,
13345}
13346
13347fn default_netting_cycle() -> String {
13348    "monthly".to_string()
13349}
13350
13351impl Default for NettingSchemaConfig {
13352    fn default() -> Self {
13353        Self {
13354            enabled: false,
13355            cycle: default_netting_cycle(),
13356        }
13357    }
13358}
13359
13360/// Bank guarantee and letter of credit configuration.
13361///
13362/// Controls generation of bank guarantees, standby LCs, and performance bonds.
13363#[derive(Debug, Clone, Serialize, Deserialize)]
13364pub struct BankGuaranteeSchemaConfig {
13365    /// Whether bank guarantee generation is enabled.
13366    #[serde(default)]
13367    pub enabled: bool,
13368    /// Number of guarantees to generate.
13369    #[serde(default = "default_guarantee_count")]
13370    pub count: u32,
13371}
13372
13373fn default_guarantee_count() -> u32 {
13374    5
13375}
13376
13377impl Default for BankGuaranteeSchemaConfig {
13378    fn default() -> Self {
13379        Self {
13380            enabled: false,
13381            count: default_guarantee_count(),
13382        }
13383    }
13384}
13385
13386// ===========================================================================
13387// Project Accounting Configuration
13388// ===========================================================================
13389
13390/// Project accounting configuration.
13391///
13392/// Controls generation of project cost lines, revenue recognition,
13393/// milestones, change orders, retainage, and earned value metrics.
13394#[derive(Debug, Clone, Serialize, Deserialize)]
13395pub struct ProjectAccountingConfig {
13396    /// Whether project accounting is enabled.
13397    #[serde(default)]
13398    pub enabled: bool,
13399    /// Number of projects to generate.
13400    #[serde(default = "default_project_count")]
13401    pub project_count: u32,
13402    /// Distribution of project types (capital, internal, customer, r_and_d, maintenance, technology).
13403    #[serde(default)]
13404    pub project_types: ProjectTypeDistribution,
13405    /// WBS structure configuration.
13406    #[serde(default)]
13407    pub wbs: WbsSchemaConfig,
13408    /// Cost allocation rates (what % of source documents get project-tagged).
13409    #[serde(default)]
13410    pub cost_allocation: CostAllocationConfig,
13411    /// Revenue recognition configuration for project accounting.
13412    #[serde(default)]
13413    pub revenue_recognition: ProjectRevenueRecognitionConfig,
13414    /// Milestone configuration.
13415    #[serde(default)]
13416    pub milestones: MilestoneSchemaConfig,
13417    /// Change order configuration.
13418    #[serde(default)]
13419    pub change_orders: ChangeOrderSchemaConfig,
13420    /// Retainage configuration.
13421    #[serde(default)]
13422    pub retainage: RetainageSchemaConfig,
13423    /// Earned value management configuration.
13424    #[serde(default)]
13425    pub earned_value: EarnedValueSchemaConfig,
13426    /// Anomaly injection rate for project accounting data (0.0 to 1.0).
13427    #[serde(default = "default_project_anomaly_rate")]
13428    pub anomaly_rate: f64,
13429}
13430
13431fn default_project_count() -> u32 {
13432    10
13433}
13434
13435fn default_project_anomaly_rate() -> f64 {
13436    0.03
13437}
13438
13439impl Default for ProjectAccountingConfig {
13440    fn default() -> Self {
13441        Self {
13442            enabled: false,
13443            project_count: default_project_count(),
13444            project_types: ProjectTypeDistribution::default(),
13445            wbs: WbsSchemaConfig::default(),
13446            cost_allocation: CostAllocationConfig::default(),
13447            revenue_recognition: ProjectRevenueRecognitionConfig::default(),
13448            milestones: MilestoneSchemaConfig::default(),
13449            change_orders: ChangeOrderSchemaConfig::default(),
13450            retainage: RetainageSchemaConfig::default(),
13451            earned_value: EarnedValueSchemaConfig::default(),
13452            anomaly_rate: default_project_anomaly_rate(),
13453        }
13454    }
13455}
13456
13457/// Distribution of project types by weight.
13458#[derive(Debug, Clone, Serialize, Deserialize)]
13459pub struct ProjectTypeDistribution {
13460    /// Weight for capital projects (default 0.25).
13461    #[serde(default = "default_capital_weight")]
13462    pub capital: f64,
13463    /// Weight for internal projects (default 0.20).
13464    #[serde(default = "default_internal_weight")]
13465    pub internal: f64,
13466    /// Weight for customer projects (default 0.30).
13467    #[serde(default = "default_customer_weight")]
13468    pub customer: f64,
13469    /// Weight for R&D projects (default 0.10).
13470    #[serde(default = "default_rnd_weight")]
13471    pub r_and_d: f64,
13472    /// Weight for maintenance projects (default 0.10).
13473    #[serde(default = "default_maintenance_weight")]
13474    pub maintenance: f64,
13475    /// Weight for technology projects (default 0.05).
13476    #[serde(default = "default_technology_weight")]
13477    pub technology: f64,
13478}
13479
13480fn default_capital_weight() -> f64 {
13481    0.25
13482}
13483fn default_internal_weight() -> f64 {
13484    0.20
13485}
13486fn default_customer_weight() -> f64 {
13487    0.30
13488}
13489fn default_rnd_weight() -> f64 {
13490    0.10
13491}
13492fn default_maintenance_weight() -> f64 {
13493    0.10
13494}
13495fn default_technology_weight() -> f64 {
13496    0.05
13497}
13498
13499impl Default for ProjectTypeDistribution {
13500    fn default() -> Self {
13501        Self {
13502            capital: default_capital_weight(),
13503            internal: default_internal_weight(),
13504            customer: default_customer_weight(),
13505            r_and_d: default_rnd_weight(),
13506            maintenance: default_maintenance_weight(),
13507            technology: default_technology_weight(),
13508        }
13509    }
13510}
13511
13512/// WBS structure configuration.
13513#[derive(Debug, Clone, Serialize, Deserialize)]
13514pub struct WbsSchemaConfig {
13515    /// Maximum depth of WBS hierarchy (default 3).
13516    #[serde(default = "default_wbs_max_depth")]
13517    pub max_depth: u32,
13518    /// Minimum elements per level-1 WBS (default 2).
13519    #[serde(default = "default_wbs_min_elements")]
13520    pub min_elements_per_level: u32,
13521    /// Maximum elements per level-1 WBS (default 6).
13522    #[serde(default = "default_wbs_max_elements")]
13523    pub max_elements_per_level: u32,
13524}
13525
13526fn default_wbs_max_depth() -> u32 {
13527    3
13528}
13529fn default_wbs_min_elements() -> u32 {
13530    2
13531}
13532fn default_wbs_max_elements() -> u32 {
13533    6
13534}
13535
13536impl Default for WbsSchemaConfig {
13537    fn default() -> Self {
13538        Self {
13539            max_depth: default_wbs_max_depth(),
13540            min_elements_per_level: default_wbs_min_elements(),
13541            max_elements_per_level: default_wbs_max_elements(),
13542        }
13543    }
13544}
13545
13546/// Cost allocation rates — what fraction of each document type gets linked to a project.
13547#[derive(Debug, Clone, Serialize, Deserialize)]
13548pub struct CostAllocationConfig {
13549    /// Fraction of time entries assigned to projects (0.0 to 1.0).
13550    #[serde(default = "default_time_entry_rate")]
13551    pub time_entry_project_rate: f64,
13552    /// Fraction of expense reports assigned to projects (0.0 to 1.0).
13553    #[serde(default = "default_expense_rate")]
13554    pub expense_project_rate: f64,
13555    /// Fraction of purchase orders assigned to projects (0.0 to 1.0).
13556    #[serde(default = "default_po_rate")]
13557    pub purchase_order_project_rate: f64,
13558    /// Fraction of vendor invoices assigned to projects (0.0 to 1.0).
13559    #[serde(default = "default_vi_rate")]
13560    pub vendor_invoice_project_rate: f64,
13561}
13562
13563fn default_time_entry_rate() -> f64 {
13564    0.60
13565}
13566fn default_expense_rate() -> f64 {
13567    0.30
13568}
13569fn default_po_rate() -> f64 {
13570    0.40
13571}
13572fn default_vi_rate() -> f64 {
13573    0.35
13574}
13575
13576impl Default for CostAllocationConfig {
13577    fn default() -> Self {
13578        Self {
13579            time_entry_project_rate: default_time_entry_rate(),
13580            expense_project_rate: default_expense_rate(),
13581            purchase_order_project_rate: default_po_rate(),
13582            vendor_invoice_project_rate: default_vi_rate(),
13583        }
13584    }
13585}
13586
13587/// Revenue recognition configuration for project accounting.
13588#[derive(Debug, Clone, Serialize, Deserialize)]
13589pub struct ProjectRevenueRecognitionConfig {
13590    /// Whether revenue recognition is enabled for customer projects.
13591    #[serde(default = "default_true")]
13592    pub enabled: bool,
13593    /// Default method: "percentage_of_completion", "completed_contract", "milestone_based".
13594    #[serde(default = "default_revenue_method")]
13595    pub method: String,
13596    /// Default completion measure: "cost_to_cost", "labor_hours", "physical_completion".
13597    #[serde(default = "default_completion_measure")]
13598    pub completion_measure: String,
13599    /// Average contract value for customer projects.
13600    #[serde(default = "default_avg_contract_value")]
13601    pub avg_contract_value: f64,
13602}
13603
13604fn default_revenue_method() -> String {
13605    "percentage_of_completion".to_string()
13606}
13607fn default_completion_measure() -> String {
13608    "cost_to_cost".to_string()
13609}
13610fn default_avg_contract_value() -> f64 {
13611    500_000.0
13612}
13613
13614impl Default for ProjectRevenueRecognitionConfig {
13615    fn default() -> Self {
13616        Self {
13617            enabled: true,
13618            method: default_revenue_method(),
13619            completion_measure: default_completion_measure(),
13620            avg_contract_value: default_avg_contract_value(),
13621        }
13622    }
13623}
13624
13625/// Milestone configuration.
13626#[derive(Debug, Clone, Serialize, Deserialize)]
13627pub struct MilestoneSchemaConfig {
13628    /// Whether milestone generation is enabled.
13629    #[serde(default = "default_true")]
13630    pub enabled: bool,
13631    /// Average number of milestones per project.
13632    #[serde(default = "default_milestones_per_project")]
13633    pub avg_per_project: u32,
13634    /// Fraction of milestones that are payment milestones (0.0 to 1.0).
13635    #[serde(default = "default_payment_milestone_rate")]
13636    pub payment_milestone_rate: f64,
13637}
13638
13639fn default_milestones_per_project() -> u32 {
13640    4
13641}
13642fn default_payment_milestone_rate() -> f64 {
13643    0.50
13644}
13645
13646impl Default for MilestoneSchemaConfig {
13647    fn default() -> Self {
13648        Self {
13649            enabled: true,
13650            avg_per_project: default_milestones_per_project(),
13651            payment_milestone_rate: default_payment_milestone_rate(),
13652        }
13653    }
13654}
13655
13656/// Change order configuration.
13657#[derive(Debug, Clone, Serialize, Deserialize)]
13658pub struct ChangeOrderSchemaConfig {
13659    /// Whether change order generation is enabled.
13660    #[serde(default = "default_true")]
13661    pub enabled: bool,
13662    /// Probability that a project will have at least one change order (0.0 to 1.0).
13663    #[serde(default = "default_change_order_probability")]
13664    pub probability: f64,
13665    /// Maximum change orders per project.
13666    #[serde(default = "default_max_change_orders")]
13667    pub max_per_project: u32,
13668    /// Approval rate for change orders (0.0 to 1.0).
13669    #[serde(default = "default_change_order_approval_rate")]
13670    pub approval_rate: f64,
13671}
13672
13673fn default_change_order_probability() -> f64 {
13674    0.40
13675}
13676fn default_max_change_orders() -> u32 {
13677    3
13678}
13679fn default_change_order_approval_rate() -> f64 {
13680    0.75
13681}
13682
13683impl Default for ChangeOrderSchemaConfig {
13684    fn default() -> Self {
13685        Self {
13686            enabled: true,
13687            probability: default_change_order_probability(),
13688            max_per_project: default_max_change_orders(),
13689            approval_rate: default_change_order_approval_rate(),
13690        }
13691    }
13692}
13693
13694/// Retainage configuration.
13695#[derive(Debug, Clone, Serialize, Deserialize)]
13696pub struct RetainageSchemaConfig {
13697    /// Whether retainage is enabled.
13698    #[serde(default)]
13699    pub enabled: bool,
13700    /// Default retainage percentage (0.0 to 1.0, e.g., 0.10 for 10%).
13701    #[serde(default = "default_retainage_pct")]
13702    pub default_percentage: f64,
13703}
13704
13705fn default_retainage_pct() -> f64 {
13706    0.10
13707}
13708
13709impl Default for RetainageSchemaConfig {
13710    fn default() -> Self {
13711        Self {
13712            enabled: false,
13713            default_percentage: default_retainage_pct(),
13714        }
13715    }
13716}
13717
13718/// Earned value management (EVM) configuration.
13719#[derive(Debug, Clone, Serialize, Deserialize)]
13720pub struct EarnedValueSchemaConfig {
13721    /// Whether EVM metrics are generated.
13722    #[serde(default = "default_true")]
13723    pub enabled: bool,
13724    /// Measurement frequency: "weekly", "biweekly", "monthly".
13725    #[serde(default = "default_evm_frequency")]
13726    pub frequency: String,
13727}
13728
13729fn default_evm_frequency() -> String {
13730    "monthly".to_string()
13731}
13732
13733impl Default for EarnedValueSchemaConfig {
13734    fn default() -> Self {
13735        Self {
13736            enabled: true,
13737            frequency: default_evm_frequency(),
13738        }
13739    }
13740}
13741
13742// =============================================================================
13743// ESG / Sustainability Configuration
13744// =============================================================================
13745
13746/// Top-level ESG / sustainability reporting configuration.
13747#[derive(Debug, Clone, Serialize, Deserialize)]
13748pub struct EsgConfig {
13749    /// Whether ESG generation is enabled.
13750    #[serde(default)]
13751    pub enabled: bool,
13752    /// Environmental metrics (emissions, energy, water, waste).
13753    #[serde(default)]
13754    pub environmental: EnvironmentalConfig,
13755    /// Social metrics (diversity, pay equity, safety).
13756    #[serde(default)]
13757    pub social: SocialConfig,
13758    /// Governance metrics (board composition, ethics, compliance).
13759    #[serde(default)]
13760    pub governance: GovernanceSchemaConfig,
13761    /// Supply-chain ESG assessment settings.
13762    #[serde(default)]
13763    pub supply_chain_esg: SupplyChainEsgConfig,
13764    /// ESG reporting / disclosure framework settings.
13765    #[serde(default)]
13766    pub reporting: EsgReportingConfig,
13767    /// Climate scenario analysis settings.
13768    #[serde(default)]
13769    pub climate_scenarios: ClimateScenarioConfig,
13770    /// Anomaly injection rate for ESG data (0.0 to 1.0).
13771    #[serde(default = "default_esg_anomaly_rate")]
13772    pub anomaly_rate: f64,
13773}
13774
13775fn default_esg_anomaly_rate() -> f64 {
13776    0.02
13777}
13778
13779impl Default for EsgConfig {
13780    fn default() -> Self {
13781        Self {
13782            enabled: false,
13783            environmental: EnvironmentalConfig::default(),
13784            social: SocialConfig::default(),
13785            governance: GovernanceSchemaConfig::default(),
13786            supply_chain_esg: SupplyChainEsgConfig::default(),
13787            reporting: EsgReportingConfig::default(),
13788            climate_scenarios: ClimateScenarioConfig::default(),
13789            anomaly_rate: default_esg_anomaly_rate(),
13790        }
13791    }
13792}
13793
13794/// Country pack configuration.
13795///
13796/// Controls where to load additional country packs and per-country overrides.
13797/// When omitted, only the built-in packs (_default, US, DE, GB) are used.
13798#[derive(Debug, Clone, Serialize, Deserialize, Default)]
13799pub struct CountryPacksSchemaConfig {
13800    /// Optional directory containing additional `*.json` country packs.
13801    #[serde(default)]
13802    pub external_dir: Option<PathBuf>,
13803    /// Per-country overrides applied after loading.
13804    /// Keys are ISO 3166-1 alpha-2 codes; values are partial JSON objects
13805    /// that are deep-merged on top of the loaded pack.
13806    #[serde(default)]
13807    pub overrides: std::collections::HashMap<String, serde_json::Value>,
13808}
13809
13810/// Environmental metrics configuration.
13811#[derive(Debug, Clone, Serialize, Deserialize)]
13812pub struct EnvironmentalConfig {
13813    /// Whether environmental metrics are generated.
13814    #[serde(default = "default_true")]
13815    pub enabled: bool,
13816    /// Scope 1 (direct) emission generation settings.
13817    #[serde(default)]
13818    pub scope1: EmissionScopeConfig,
13819    /// Scope 2 (purchased energy) emission generation settings.
13820    #[serde(default)]
13821    pub scope2: EmissionScopeConfig,
13822    /// Scope 3 (value chain) emission generation settings.
13823    #[serde(default)]
13824    pub scope3: Scope3Config,
13825    /// Energy consumption tracking settings.
13826    #[serde(default)]
13827    pub energy: EnergySchemaConfig,
13828    /// Water usage tracking settings.
13829    #[serde(default)]
13830    pub water: WaterSchemaConfig,
13831    /// Waste management tracking settings.
13832    #[serde(default)]
13833    pub waste: WasteSchemaConfig,
13834}
13835
13836impl Default for EnvironmentalConfig {
13837    fn default() -> Self {
13838        Self {
13839            enabled: true,
13840            scope1: EmissionScopeConfig::default(),
13841            scope2: EmissionScopeConfig::default(),
13842            scope3: Scope3Config::default(),
13843            energy: EnergySchemaConfig::default(),
13844            water: WaterSchemaConfig::default(),
13845            waste: WasteSchemaConfig::default(),
13846        }
13847    }
13848}
13849
13850/// Configuration for a single emission scope (Scope 1 or 2).
13851#[derive(Debug, Clone, Serialize, Deserialize)]
13852pub struct EmissionScopeConfig {
13853    /// Whether this scope is enabled.
13854    #[serde(default = "default_true")]
13855    pub enabled: bool,
13856    /// Emission factor region (e.g., "US", "EU", "global").
13857    #[serde(default = "default_emission_region")]
13858    pub factor_region: String,
13859}
13860
13861fn default_emission_region() -> String {
13862    "US".to_string()
13863}
13864
13865impl Default for EmissionScopeConfig {
13866    fn default() -> Self {
13867        Self {
13868            enabled: true,
13869            factor_region: default_emission_region(),
13870        }
13871    }
13872}
13873
13874/// Scope 3 (value chain) emission configuration.
13875#[derive(Debug, Clone, Serialize, Deserialize)]
13876pub struct Scope3Config {
13877    /// Whether Scope 3 emissions are generated.
13878    #[serde(default = "default_true")]
13879    pub enabled: bool,
13880    /// Categories to include (e.g., "purchased_goods", "business_travel", "commuting").
13881    #[serde(default = "default_scope3_categories")]
13882    pub categories: Vec<String>,
13883    /// Spend-based emission intensity (kg CO2e per USD).
13884    #[serde(default = "default_spend_intensity")]
13885    pub default_spend_intensity_kg_per_usd: f64,
13886}
13887
13888fn default_scope3_categories() -> Vec<String> {
13889    vec![
13890        "purchased_goods".to_string(),
13891        "business_travel".to_string(),
13892        "employee_commuting".to_string(),
13893    ]
13894}
13895
13896fn default_spend_intensity() -> f64 {
13897    0.5
13898}
13899
13900impl Default for Scope3Config {
13901    fn default() -> Self {
13902        Self {
13903            enabled: true,
13904            categories: default_scope3_categories(),
13905            default_spend_intensity_kg_per_usd: default_spend_intensity(),
13906        }
13907    }
13908}
13909
13910/// Energy consumption configuration.
13911#[derive(Debug, Clone, Serialize, Deserialize)]
13912pub struct EnergySchemaConfig {
13913    /// Whether energy consumption tracking is enabled.
13914    #[serde(default = "default_true")]
13915    pub enabled: bool,
13916    /// Number of facilities to generate.
13917    #[serde(default = "default_facility_count")]
13918    pub facility_count: u32,
13919    /// Target percentage of energy from renewable sources (0.0 to 1.0).
13920    #[serde(default = "default_renewable_target")]
13921    pub renewable_target: f64,
13922}
13923
13924fn default_facility_count() -> u32 {
13925    5
13926}
13927
13928fn default_renewable_target() -> f64 {
13929    0.30
13930}
13931
13932impl Default for EnergySchemaConfig {
13933    fn default() -> Self {
13934        Self {
13935            enabled: true,
13936            facility_count: default_facility_count(),
13937            renewable_target: default_renewable_target(),
13938        }
13939    }
13940}
13941
13942/// Water usage configuration.
13943#[derive(Debug, Clone, Serialize, Deserialize)]
13944pub struct WaterSchemaConfig {
13945    /// Whether water usage tracking is enabled.
13946    #[serde(default = "default_true")]
13947    pub enabled: bool,
13948    /// Number of facilities with water tracking.
13949    #[serde(default = "default_water_facility_count")]
13950    pub facility_count: u32,
13951}
13952
13953fn default_water_facility_count() -> u32 {
13954    3
13955}
13956
13957impl Default for WaterSchemaConfig {
13958    fn default() -> Self {
13959        Self {
13960            enabled: true,
13961            facility_count: default_water_facility_count(),
13962        }
13963    }
13964}
13965
13966/// Waste management configuration.
13967#[derive(Debug, Clone, Serialize, Deserialize)]
13968pub struct WasteSchemaConfig {
13969    /// Whether waste tracking is enabled.
13970    #[serde(default = "default_true")]
13971    pub enabled: bool,
13972    /// Target diversion rate (0.0 to 1.0).
13973    #[serde(default = "default_diversion_target")]
13974    pub diversion_target: f64,
13975}
13976
13977fn default_diversion_target() -> f64 {
13978    0.50
13979}
13980
13981impl Default for WasteSchemaConfig {
13982    fn default() -> Self {
13983        Self {
13984            enabled: true,
13985            diversion_target: default_diversion_target(),
13986        }
13987    }
13988}
13989
13990/// Social metrics configuration.
13991#[derive(Debug, Clone, Serialize, Deserialize)]
13992pub struct SocialConfig {
13993    /// Whether social metrics are generated.
13994    #[serde(default = "default_true")]
13995    pub enabled: bool,
13996    /// Workforce diversity tracking settings.
13997    #[serde(default)]
13998    pub diversity: DiversitySchemaConfig,
13999    /// Pay equity analysis settings.
14000    #[serde(default)]
14001    pub pay_equity: PayEquitySchemaConfig,
14002    /// Safety incident and metrics settings.
14003    #[serde(default)]
14004    pub safety: SafetySchemaConfig,
14005}
14006
14007impl Default for SocialConfig {
14008    fn default() -> Self {
14009        Self {
14010            enabled: true,
14011            diversity: DiversitySchemaConfig::default(),
14012            pay_equity: PayEquitySchemaConfig::default(),
14013            safety: SafetySchemaConfig::default(),
14014        }
14015    }
14016}
14017
14018/// Workforce diversity configuration.
14019#[derive(Debug, Clone, Serialize, Deserialize)]
14020pub struct DiversitySchemaConfig {
14021    /// Whether diversity metrics are generated.
14022    #[serde(default = "default_true")]
14023    pub enabled: bool,
14024    /// Dimensions to track (e.g., "gender", "ethnicity", "age_group").
14025    #[serde(default = "default_diversity_dimensions")]
14026    pub dimensions: Vec<String>,
14027}
14028
14029fn default_diversity_dimensions() -> Vec<String> {
14030    vec![
14031        "gender".to_string(),
14032        "ethnicity".to_string(),
14033        "age_group".to_string(),
14034    ]
14035}
14036
14037impl Default for DiversitySchemaConfig {
14038    fn default() -> Self {
14039        Self {
14040            enabled: true,
14041            dimensions: default_diversity_dimensions(),
14042        }
14043    }
14044}
14045
14046/// Pay equity analysis configuration.
14047#[derive(Debug, Clone, Serialize, Deserialize)]
14048pub struct PayEquitySchemaConfig {
14049    /// Whether pay equity analysis is generated.
14050    #[serde(default = "default_true")]
14051    pub enabled: bool,
14052    /// Target pay gap threshold for flagging (e.g., 0.05 = 5% gap).
14053    #[serde(default = "default_pay_gap_threshold")]
14054    pub gap_threshold: f64,
14055}
14056
14057fn default_pay_gap_threshold() -> f64 {
14058    0.05
14059}
14060
14061impl Default for PayEquitySchemaConfig {
14062    fn default() -> Self {
14063        Self {
14064            enabled: true,
14065            gap_threshold: default_pay_gap_threshold(),
14066        }
14067    }
14068}
14069
14070/// Safety metrics configuration.
14071#[derive(Debug, Clone, Serialize, Deserialize)]
14072pub struct SafetySchemaConfig {
14073    /// Whether safety metrics are generated.
14074    #[serde(default = "default_true")]
14075    pub enabled: bool,
14076    /// Average annual recordable incidents per 200,000 hours.
14077    #[serde(default = "default_trir_target")]
14078    pub target_trir: f64,
14079    /// Number of safety incidents to generate.
14080    #[serde(default = "default_incident_count")]
14081    pub incident_count: u32,
14082}
14083
14084fn default_trir_target() -> f64 {
14085    2.5
14086}
14087
14088fn default_incident_count() -> u32 {
14089    20
14090}
14091
14092impl Default for SafetySchemaConfig {
14093    fn default() -> Self {
14094        Self {
14095            enabled: true,
14096            target_trir: default_trir_target(),
14097            incident_count: default_incident_count(),
14098        }
14099    }
14100}
14101
14102/// Governance metrics configuration.
14103#[derive(Debug, Clone, Serialize, Deserialize)]
14104pub struct GovernanceSchemaConfig {
14105    /// Whether governance metrics are generated.
14106    #[serde(default = "default_true")]
14107    pub enabled: bool,
14108    /// Number of board members.
14109    #[serde(default = "default_board_size")]
14110    pub board_size: u32,
14111    /// Target independent director ratio (0.0 to 1.0).
14112    #[serde(default = "default_independence_target")]
14113    pub independence_target: f64,
14114}
14115
14116fn default_board_size() -> u32 {
14117    11
14118}
14119
14120fn default_independence_target() -> f64 {
14121    0.67
14122}
14123
14124impl Default for GovernanceSchemaConfig {
14125    fn default() -> Self {
14126        Self {
14127            enabled: true,
14128            board_size: default_board_size(),
14129            independence_target: default_independence_target(),
14130        }
14131    }
14132}
14133
14134/// Supply-chain ESG assessment configuration.
14135#[derive(Debug, Clone, Serialize, Deserialize)]
14136pub struct SupplyChainEsgConfig {
14137    /// Whether supply chain ESG assessments are generated.
14138    #[serde(default = "default_true")]
14139    pub enabled: bool,
14140    /// Proportion of vendors to assess (0.0 to 1.0).
14141    #[serde(default = "default_assessment_coverage")]
14142    pub assessment_coverage: f64,
14143    /// High-risk country codes for automatic flagging.
14144    #[serde(default = "default_high_risk_countries")]
14145    pub high_risk_countries: Vec<String>,
14146}
14147
14148fn default_assessment_coverage() -> f64 {
14149    0.80
14150}
14151
14152fn default_high_risk_countries() -> Vec<String> {
14153    vec!["CN".to_string(), "BD".to_string(), "MM".to_string()]
14154}
14155
14156impl Default for SupplyChainEsgConfig {
14157    fn default() -> Self {
14158        Self {
14159            enabled: true,
14160            assessment_coverage: default_assessment_coverage(),
14161            high_risk_countries: default_high_risk_countries(),
14162        }
14163    }
14164}
14165
14166/// ESG reporting / disclosure framework configuration.
14167#[derive(Debug, Clone, Serialize, Deserialize)]
14168pub struct EsgReportingConfig {
14169    /// Whether ESG disclosures are generated.
14170    #[serde(default = "default_true")]
14171    pub enabled: bool,
14172    /// Frameworks to generate disclosures for.
14173    #[serde(default = "default_esg_frameworks")]
14174    pub frameworks: Vec<String>,
14175    /// Whether materiality assessment is performed.
14176    #[serde(default = "default_true")]
14177    pub materiality_assessment: bool,
14178    /// Materiality threshold for impact dimension (0.0 to 1.0).
14179    #[serde(default = "default_materiality_threshold")]
14180    pub impact_threshold: f64,
14181    /// Materiality threshold for financial dimension (0.0 to 1.0).
14182    #[serde(default = "default_materiality_threshold")]
14183    pub financial_threshold: f64,
14184}
14185
14186fn default_esg_frameworks() -> Vec<String> {
14187    vec!["GRI".to_string(), "ESRS".to_string()]
14188}
14189
14190fn default_materiality_threshold() -> f64 {
14191    0.6
14192}
14193
14194impl Default for EsgReportingConfig {
14195    fn default() -> Self {
14196        Self {
14197            enabled: true,
14198            frameworks: default_esg_frameworks(),
14199            materiality_assessment: true,
14200            impact_threshold: default_materiality_threshold(),
14201            financial_threshold: default_materiality_threshold(),
14202        }
14203    }
14204}
14205
14206/// Climate scenario analysis configuration.
14207#[derive(Debug, Clone, Serialize, Deserialize)]
14208pub struct ClimateScenarioConfig {
14209    /// Whether climate scenario analysis is generated.
14210    #[serde(default)]
14211    pub enabled: bool,
14212    /// Scenarios to model (e.g., "net_zero_2050", "stated_policies", "current_trajectory").
14213    #[serde(default = "default_climate_scenarios")]
14214    pub scenarios: Vec<String>,
14215    /// Time horizons in years to project.
14216    #[serde(default = "default_time_horizons")]
14217    pub time_horizons: Vec<u32>,
14218}
14219
14220fn default_climate_scenarios() -> Vec<String> {
14221    vec![
14222        "net_zero_2050".to_string(),
14223        "stated_policies".to_string(),
14224        "current_trajectory".to_string(),
14225    ]
14226}
14227
14228fn default_time_horizons() -> Vec<u32> {
14229    vec![5, 10, 30]
14230}
14231
14232impl Default for ClimateScenarioConfig {
14233    fn default() -> Self {
14234        Self {
14235            enabled: false,
14236            scenarios: default_climate_scenarios(),
14237            time_horizons: default_time_horizons(),
14238        }
14239    }
14240}
14241
14242// ===== Counterfactual Simulation Scenarios =====
14243
14244/// Configuration for counterfactual simulation scenarios.
14245#[derive(Debug, Clone, Serialize, Deserialize, Default)]
14246pub struct ScenariosConfig {
14247    /// Whether scenario generation is enabled.
14248    #[serde(default)]
14249    pub enabled: bool,
14250    /// List of scenario definitions.
14251    #[serde(default)]
14252    pub scenarios: Vec<ScenarioSchemaConfig>,
14253    /// Causal model configuration.
14254    #[serde(default)]
14255    pub causal_model: CausalModelSchemaConfig,
14256    /// Default settings applied to all scenarios.
14257    #[serde(default)]
14258    pub defaults: ScenarioDefaultsConfig,
14259    /// Generate counterfactual (original, mutated) JE pairs for ML training.
14260    /// When true, the orchestrator produces paired clean/anomalous journal entries.
14261    #[serde(default)]
14262    pub generate_counterfactuals: bool,
14263}
14264
14265/// A single scenario definition in the config.
14266#[derive(Debug, Clone, Serialize, Deserialize)]
14267pub struct ScenarioSchemaConfig {
14268    /// Scenario name (must be unique).
14269    pub name: String,
14270    /// Human-readable description.
14271    #[serde(default)]
14272    pub description: String,
14273    /// Tags for categorization.
14274    #[serde(default)]
14275    pub tags: Vec<String>,
14276    /// Base scenario name (None = default config).
14277    pub base: Option<String>,
14278    /// IFRS 9-style probability weight.
14279    pub probability_weight: Option<f64>,
14280    /// List of interventions to apply.
14281    #[serde(default)]
14282    pub interventions: Vec<InterventionSchemaConfig>,
14283    /// Constraint overrides for this scenario.
14284    #[serde(default)]
14285    pub constraints: ScenarioConstraintsSchemaConfig,
14286    /// Output configuration for this scenario.
14287    #[serde(default)]
14288    pub output: ScenarioOutputSchemaConfig,
14289    /// Arbitrary metadata.
14290    #[serde(default)]
14291    pub metadata: std::collections::HashMap<String, String>,
14292}
14293
14294/// An intervention definition in the config.
14295#[derive(Debug, Clone, Serialize, Deserialize)]
14296pub struct InterventionSchemaConfig {
14297    /// Intervention type and parameters (flattened tagged enum).
14298    #[serde(flatten)]
14299    pub intervention_type: serde_json::Value,
14300    /// Timing configuration.
14301    #[serde(default)]
14302    pub timing: InterventionTimingSchemaConfig,
14303    /// Human-readable label.
14304    pub label: Option<String>,
14305    /// Priority for conflict resolution (higher wins).
14306    #[serde(default)]
14307    pub priority: u32,
14308}
14309
14310/// Timing configuration for an intervention.
14311#[derive(Debug, Clone, Serialize, Deserialize)]
14312pub struct InterventionTimingSchemaConfig {
14313    /// Month offset from start (1-indexed).
14314    #[serde(default = "default_start_month")]
14315    pub start_month: u32,
14316    /// Duration in months.
14317    pub duration_months: Option<u32>,
14318    /// Onset type: "sudden", "gradual", "oscillating", "custom".
14319    #[serde(default = "default_onset")]
14320    pub onset: String,
14321    /// Ramp period in months.
14322    pub ramp_months: Option<u32>,
14323}
14324
14325fn default_start_month() -> u32 {
14326    1
14327}
14328
14329fn default_onset() -> String {
14330    "sudden".to_string()
14331}
14332
14333impl Default for InterventionTimingSchemaConfig {
14334    fn default() -> Self {
14335        Self {
14336            start_month: 1,
14337            duration_months: None,
14338            onset: "sudden".to_string(),
14339            ramp_months: None,
14340        }
14341    }
14342}
14343
14344/// Scenario constraint overrides.
14345#[derive(Debug, Clone, Serialize, Deserialize)]
14346pub struct ScenarioConstraintsSchemaConfig {
14347    #[serde(default = "default_true")]
14348    pub preserve_accounting_identity: bool,
14349    #[serde(default = "default_true")]
14350    pub preserve_document_chains: bool,
14351    #[serde(default = "default_true")]
14352    pub preserve_period_close: bool,
14353    #[serde(default = "default_true")]
14354    pub preserve_balance_coherence: bool,
14355    #[serde(default)]
14356    pub custom: Vec<CustomConstraintSchemaConfig>,
14357}
14358
14359impl Default for ScenarioConstraintsSchemaConfig {
14360    fn default() -> Self {
14361        Self {
14362            preserve_accounting_identity: true,
14363            preserve_document_chains: true,
14364            preserve_period_close: true,
14365            preserve_balance_coherence: true,
14366            custom: Vec::new(),
14367        }
14368    }
14369}
14370
14371/// Custom constraint in config.
14372#[derive(Debug, Clone, Serialize, Deserialize)]
14373pub struct CustomConstraintSchemaConfig {
14374    pub config_path: String,
14375    pub min: Option<f64>,
14376    pub max: Option<f64>,
14377    #[serde(default)]
14378    pub description: String,
14379}
14380
14381/// Output configuration for a scenario.
14382#[derive(Debug, Clone, Serialize, Deserialize)]
14383pub struct ScenarioOutputSchemaConfig {
14384    #[serde(default = "default_true")]
14385    pub paired: bool,
14386    #[serde(default = "default_diff_formats_schema")]
14387    pub diff_formats: Vec<String>,
14388    #[serde(default)]
14389    pub diff_scope: Vec<String>,
14390}
14391
14392fn default_diff_formats_schema() -> Vec<String> {
14393    vec!["summary".to_string(), "aggregate".to_string()]
14394}
14395
14396impl Default for ScenarioOutputSchemaConfig {
14397    fn default() -> Self {
14398        Self {
14399            paired: true,
14400            diff_formats: default_diff_formats_schema(),
14401            diff_scope: Vec::new(),
14402        }
14403    }
14404}
14405
14406/// Causal model configuration.
14407#[derive(Debug, Clone, Serialize, Deserialize)]
14408pub struct CausalModelSchemaConfig {
14409    /// Preset name: "default", "minimal", or "custom".
14410    #[serde(default = "default_causal_preset")]
14411    pub preset: String,
14412    /// Custom nodes (merged with preset).
14413    #[serde(default)]
14414    pub nodes: Vec<serde_json::Value>,
14415    /// Custom edges (merged with preset).
14416    #[serde(default)]
14417    pub edges: Vec<serde_json::Value>,
14418}
14419
14420fn default_causal_preset() -> String {
14421    "default".to_string()
14422}
14423
14424impl Default for CausalModelSchemaConfig {
14425    fn default() -> Self {
14426        Self {
14427            preset: "default".to_string(),
14428            nodes: Vec::new(),
14429            edges: Vec::new(),
14430        }
14431    }
14432}
14433
14434/// Default settings applied to all scenarios.
14435#[derive(Debug, Clone, Serialize, Deserialize, Default)]
14436pub struct ScenarioDefaultsConfig {
14437    #[serde(default)]
14438    pub constraints: ScenarioConstraintsSchemaConfig,
14439    #[serde(default)]
14440    pub output: ScenarioOutputSchemaConfig,
14441}
14442
14443// =====================================================================
14444// Compliance Regulations Framework Configuration
14445// =====================================================================
14446
14447/// Top-level configuration for the compliance regulations framework.
14448///
14449/// Controls standards registry, jurisdiction profiles, temporal versioning,
14450/// audit procedure templates, compliance graph integration, and output settings.
14451///
14452/// # Example
14453///
14454/// ```yaml
14455/// compliance_regulations:
14456///   enabled: true
14457///   jurisdictions: [US, DE, GB]
14458///   reference_date: "2025-06-30"
14459///   standards_selection:
14460///     categories: [accounting, auditing, regulatory]
14461///     include: ["IFRS-16", "ASC-606"]
14462///   audit_procedures:
14463///     enabled: true
14464///     procedures_per_standard: 3
14465///   findings:
14466///     enabled: true
14467///     finding_rate: 0.05
14468///   filings:
14469///     enabled: true
14470///   graph:
14471///     enabled: true
14472///     include_compliance_nodes: true
14473///     include_compliance_edges: true
14474/// ```
14475#[derive(Debug, Clone, Default, Serialize, Deserialize)]
14476pub struct ComplianceRegulationsConfig {
14477    /// Master switch for the compliance regulations framework.
14478    #[serde(default)]
14479    pub enabled: bool,
14480    /// Jurisdictions to generate compliance data for (ISO 3166-1 alpha-2 codes).
14481    /// If empty, inferred from company countries in the config.
14482    #[serde(default)]
14483    pub jurisdictions: Vec<String>,
14484    /// Reference date for temporal standard resolution (YYYY-MM-DD).
14485    /// Defaults to the global start_date if not set.
14486    #[serde(default)]
14487    pub reference_date: Option<String>,
14488    /// Standards selection filters.
14489    #[serde(default)]
14490    pub standards_selection: StandardsSelectionConfig,
14491    /// Audit procedure generation settings.
14492    #[serde(default)]
14493    pub audit_procedures: AuditProcedureGenConfig,
14494    /// Compliance finding generation settings.
14495    #[serde(default)]
14496    pub findings: ComplianceFindingGenConfig,
14497    /// Regulatory filing generation settings.
14498    #[serde(default)]
14499    pub filings: ComplianceFilingGenConfig,
14500    /// Compliance graph integration settings.
14501    #[serde(default)]
14502    pub graph: ComplianceGraphConfig,
14503    /// Output settings for compliance-specific files.
14504    #[serde(default)]
14505    pub output: ComplianceOutputConfig,
14506    /// v3.3.0: legal-document generation (engagement letters,
14507    /// management reps, legal opinions, regulatory filings, board
14508    /// resolutions). Requires `compliance_regulations.enabled = true`
14509    /// AND `legal_documents.enabled = true` to take effect.
14510    #[serde(default)]
14511    pub legal_documents: LegalDocumentsConfig,
14512}
14513
14514/// Legal-document generation settings (v3.3.0+).
14515///
14516/// Wires `LegalDocumentGenerator` into the orchestrator. Generates one
14517/// batch per audit engagement when enabled.
14518#[derive(Debug, Clone, Serialize, Deserialize)]
14519pub struct LegalDocumentsConfig {
14520    /// Master switch.
14521    #[serde(default)]
14522    pub enabled: bool,
14523    /// Probability of including a legal-opinion document in an engagement.
14524    #[serde(default = "default_legal_opinion_probability")]
14525    pub legal_opinion_probability: f64,
14526}
14527
14528fn default_legal_opinion_probability() -> f64 {
14529    0.40
14530}
14531
14532impl Default for LegalDocumentsConfig {
14533    fn default() -> Self {
14534        Self {
14535            enabled: false,
14536            legal_opinion_probability: default_legal_opinion_probability(),
14537        }
14538    }
14539}
14540
14541/// Filters which standards are included in the generation.
14542#[derive(Debug, Clone, Default, Serialize, Deserialize)]
14543pub struct StandardsSelectionConfig {
14544    /// Standard categories to include (accounting, auditing, regulatory, tax, esg).
14545    /// Empty = all categories.
14546    #[serde(default)]
14547    pub categories: Vec<String>,
14548    /// Explicit standard IDs to include (e.g., ["IFRS-16", "ASC-606"]).
14549    /// When non-empty, only these standards (plus mandatory ones for selected jurisdictions) are used.
14550    #[serde(default)]
14551    pub include: Vec<String>,
14552    /// Standard IDs to exclude.
14553    #[serde(default)]
14554    pub exclude: Vec<String>,
14555    /// Include superseded standards in the output (for historical analysis).
14556    #[serde(default)]
14557    pub include_superseded: bool,
14558}
14559
14560/// Configuration for audit procedure template generation.
14561#[derive(Debug, Clone, Serialize, Deserialize)]
14562pub struct AuditProcedureGenConfig {
14563    /// Whether audit procedure generation is enabled.
14564    #[serde(default)]
14565    pub enabled: bool,
14566    /// Number of procedures to generate per applicable standard.
14567    #[serde(default = "default_procedures_per_standard")]
14568    pub procedures_per_standard: usize,
14569    /// Sampling methodology: "statistical", "non_statistical", "mixed".
14570    #[serde(default = "default_sampling_method")]
14571    pub sampling_method: String,
14572    /// Confidence level for statistical sampling (0.0-1.0).
14573    #[serde(default = "default_confidence_level")]
14574    pub confidence_level: f64,
14575    /// Tolerable misstatement rate for sampling (0.0-1.0).
14576    #[serde(default = "default_tolerable_misstatement")]
14577    pub tolerable_misstatement: f64,
14578}
14579
14580fn default_procedures_per_standard() -> usize {
14581    3
14582}
14583
14584fn default_sampling_method() -> String {
14585    "statistical".to_string()
14586}
14587
14588fn default_confidence_level() -> f64 {
14589    0.95
14590}
14591
14592fn default_tolerable_misstatement() -> f64 {
14593    0.05
14594}
14595
14596impl Default for AuditProcedureGenConfig {
14597    fn default() -> Self {
14598        Self {
14599            enabled: false,
14600            procedures_per_standard: default_procedures_per_standard(),
14601            sampling_method: default_sampling_method(),
14602            confidence_level: default_confidence_level(),
14603            tolerable_misstatement: default_tolerable_misstatement(),
14604        }
14605    }
14606}
14607
14608/// Configuration for compliance finding generation.
14609#[derive(Debug, Clone, Serialize, Deserialize)]
14610pub struct ComplianceFindingGenConfig {
14611    /// Whether finding generation is enabled.
14612    #[serde(default)]
14613    pub enabled: bool,
14614    /// Rate of findings per audit procedure (0.0-1.0).
14615    #[serde(default = "default_finding_rate")]
14616    pub finding_rate: f64,
14617    /// Rate of material weakness findings among all findings (0.0-1.0).
14618    #[serde(default = "default_cr_material_weakness_rate")]
14619    pub material_weakness_rate: f64,
14620    /// Rate of significant deficiency findings among all findings (0.0-1.0).
14621    #[serde(default = "default_cr_significant_deficiency_rate")]
14622    pub significant_deficiency_rate: f64,
14623    /// Whether to generate remediation plans for findings.
14624    #[serde(default = "default_true")]
14625    pub generate_remediation: bool,
14626}
14627
14628fn default_finding_rate() -> f64 {
14629    0.05
14630}
14631
14632fn default_cr_material_weakness_rate() -> f64 {
14633    0.02
14634}
14635
14636fn default_cr_significant_deficiency_rate() -> f64 {
14637    0.08
14638}
14639
14640impl Default for ComplianceFindingGenConfig {
14641    fn default() -> Self {
14642        Self {
14643            enabled: false,
14644            finding_rate: default_finding_rate(),
14645            material_weakness_rate: default_cr_material_weakness_rate(),
14646            significant_deficiency_rate: default_cr_significant_deficiency_rate(),
14647            generate_remediation: true,
14648        }
14649    }
14650}
14651
14652/// Configuration for regulatory filing generation.
14653#[derive(Debug, Clone, Serialize, Deserialize)]
14654pub struct ComplianceFilingGenConfig {
14655    /// Whether filing generation is enabled.
14656    #[serde(default)]
14657    pub enabled: bool,
14658    /// Filing types to include (e.g., ["10-K", "10-Q", "Jahresabschluss"]).
14659    /// Empty = all applicable filings for the selected jurisdictions.
14660    #[serde(default)]
14661    pub filing_types: Vec<String>,
14662    /// Generate filing status progression (draft → filed → accepted).
14663    #[serde(default = "default_true")]
14664    pub generate_status_progression: bool,
14665}
14666
14667impl Default for ComplianceFilingGenConfig {
14668    fn default() -> Self {
14669        Self {
14670            enabled: false,
14671            filing_types: Vec::new(),
14672            generate_status_progression: true,
14673        }
14674    }
14675}
14676
14677/// Configuration for compliance graph integration.
14678#[derive(Debug, Clone, Serialize, Deserialize)]
14679pub struct ComplianceGraphConfig {
14680    /// Whether compliance graph integration is enabled.
14681    #[serde(default)]
14682    pub enabled: bool,
14683    /// Include compliance nodes (Standard, Regulation, Jurisdiction, etc.).
14684    #[serde(default = "default_true")]
14685    pub include_compliance_nodes: bool,
14686    /// Include compliance edges (MapsToStandard, TestsControl, etc.).
14687    #[serde(default = "default_true")]
14688    pub include_compliance_edges: bool,
14689    /// Include cross-reference edges between standards.
14690    #[serde(default = "default_true")]
14691    pub include_cross_references: bool,
14692    /// Include temporal supersession edges.
14693    #[serde(default)]
14694    pub include_supersession_edges: bool,
14695    /// Include edges linking standards to the GL account types they govern.
14696    #[serde(default = "default_true")]
14697    pub include_account_links: bool,
14698    /// Include edges linking standards to the internal controls that implement them.
14699    #[serde(default = "default_true")]
14700    pub include_control_links: bool,
14701    /// Include edges linking filings and jurisdictions to the originating company.
14702    #[serde(default = "default_true")]
14703    pub include_company_links: bool,
14704}
14705
14706impl Default for ComplianceGraphConfig {
14707    fn default() -> Self {
14708        Self {
14709            enabled: false,
14710            include_compliance_nodes: true,
14711            include_compliance_edges: true,
14712            include_cross_references: true,
14713            include_supersession_edges: false,
14714            include_account_links: true,
14715            include_control_links: true,
14716            include_company_links: true,
14717        }
14718    }
14719}
14720
14721/// Output settings for compliance-specific data files.
14722#[derive(Debug, Clone, Serialize, Deserialize)]
14723pub struct ComplianceOutputConfig {
14724    /// Export the standards registry catalog.
14725    #[serde(default = "default_true")]
14726    pub export_registry: bool,
14727    /// Export jurisdiction profiles.
14728    #[serde(default = "default_true")]
14729    pub export_jurisdictions: bool,
14730    /// Export cross-reference map.
14731    #[serde(default = "default_true")]
14732    pub export_cross_references: bool,
14733    /// Export temporal version history.
14734    #[serde(default)]
14735    pub export_version_history: bool,
14736}
14737
14738impl Default for ComplianceOutputConfig {
14739    fn default() -> Self {
14740        Self {
14741            export_registry: true,
14742            export_jurisdictions: true,
14743            export_cross_references: true,
14744            export_version_history: false,
14745        }
14746    }
14747}
14748
14749#[cfg(test)]
14750mod tests {
14751    use super::*;
14752    use crate::presets::demo_preset;
14753
14754    // ==========================================================================
14755    // Serialization/Deserialization Tests
14756    // ==========================================================================
14757
14758    #[test]
14759    fn test_config_yaml_roundtrip() {
14760        let config = demo_preset();
14761        let yaml = serde_yaml::to_string(&config).expect("Failed to serialize to YAML");
14762        let deserialized: GeneratorConfig =
14763            serde_yaml::from_str(&yaml).expect("Failed to deserialize from YAML");
14764
14765        assert_eq!(
14766            config.global.period_months,
14767            deserialized.global.period_months
14768        );
14769        assert_eq!(config.global.industry, deserialized.global.industry);
14770        assert_eq!(config.companies.len(), deserialized.companies.len());
14771        assert_eq!(config.companies[0].code, deserialized.companies[0].code);
14772    }
14773
14774    #[test]
14775    fn test_config_json_roundtrip() {
14776        // Create a config without infinity values (JSON can't serialize f64::INFINITY)
14777        let mut config = demo_preset();
14778        // Replace infinity with a large but finite value for JSON compatibility
14779        config.master_data.employees.approval_limits.executive = 1e12;
14780
14781        let json = serde_json::to_string(&config).expect("Failed to serialize to JSON");
14782        let deserialized: GeneratorConfig =
14783            serde_json::from_str(&json).expect("Failed to deserialize from JSON");
14784
14785        assert_eq!(
14786            config.global.period_months,
14787            deserialized.global.period_months
14788        );
14789        assert_eq!(config.global.industry, deserialized.global.industry);
14790        assert_eq!(config.companies.len(), deserialized.companies.len());
14791    }
14792
14793    #[test]
14794    fn test_transaction_volume_serialization() {
14795        // Test various transaction volumes serialize correctly
14796        let volumes = vec![
14797            (TransactionVolume::TenK, "ten_k"),
14798            (TransactionVolume::HundredK, "hundred_k"),
14799            (TransactionVolume::OneM, "one_m"),
14800            (TransactionVolume::TenM, "ten_m"),
14801            (TransactionVolume::HundredM, "hundred_m"),
14802        ];
14803
14804        for (volume, expected_key) in volumes {
14805            let json = serde_json::to_string(&volume).expect("Failed to serialize");
14806            assert!(
14807                json.contains(expected_key),
14808                "Expected {} in JSON: {}",
14809                expected_key,
14810                json
14811            );
14812        }
14813    }
14814
14815    #[test]
14816    fn test_transaction_volume_custom_serialization() {
14817        let volume = TransactionVolume::Custom(12345);
14818        let json = serde_json::to_string(&volume).expect("Failed to serialize");
14819        let deserialized: TransactionVolume =
14820            serde_json::from_str(&json).expect("Failed to deserialize");
14821        assert_eq!(deserialized.count(), 12345);
14822    }
14823
14824    #[test]
14825    fn test_output_mode_serialization() {
14826        let modes = vec![
14827            OutputMode::Streaming,
14828            OutputMode::FlatFile,
14829            OutputMode::Both,
14830        ];
14831
14832        for mode in modes {
14833            let json = serde_json::to_string(&mode).expect("Failed to serialize");
14834            let deserialized: OutputMode =
14835                serde_json::from_str(&json).expect("Failed to deserialize");
14836            assert!(format!("{:?}", mode) == format!("{:?}", deserialized));
14837        }
14838    }
14839
14840    #[test]
14841    fn test_file_format_serialization() {
14842        let formats = vec![
14843            FileFormat::Csv,
14844            FileFormat::Parquet,
14845            FileFormat::Json,
14846            FileFormat::JsonLines,
14847        ];
14848
14849        for format in formats {
14850            let json = serde_json::to_string(&format).expect("Failed to serialize");
14851            let deserialized: FileFormat =
14852                serde_json::from_str(&json).expect("Failed to deserialize");
14853            assert!(format!("{:?}", format) == format!("{:?}", deserialized));
14854        }
14855    }
14856
14857    #[test]
14858    fn test_compression_algorithm_serialization() {
14859        let algos = vec![
14860            CompressionAlgorithm::Gzip,
14861            CompressionAlgorithm::Zstd,
14862            CompressionAlgorithm::Lz4,
14863            CompressionAlgorithm::Snappy,
14864        ];
14865
14866        for algo in algos {
14867            let json = serde_json::to_string(&algo).expect("Failed to serialize");
14868            let deserialized: CompressionAlgorithm =
14869                serde_json::from_str(&json).expect("Failed to deserialize");
14870            assert!(format!("{:?}", algo) == format!("{:?}", deserialized));
14871        }
14872    }
14873
14874    #[test]
14875    fn test_transfer_pricing_method_serialization() {
14876        let methods = vec![
14877            TransferPricingMethod::CostPlus,
14878            TransferPricingMethod::ComparableUncontrolled,
14879            TransferPricingMethod::ResalePrice,
14880            TransferPricingMethod::TransactionalNetMargin,
14881            TransferPricingMethod::ProfitSplit,
14882        ];
14883
14884        for method in methods {
14885            let json = serde_json::to_string(&method).expect("Failed to serialize");
14886            let deserialized: TransferPricingMethod =
14887                serde_json::from_str(&json).expect("Failed to deserialize");
14888            assert!(format!("{:?}", method) == format!("{:?}", deserialized));
14889        }
14890    }
14891
14892    #[test]
14893    fn test_benford_exemption_serialization() {
14894        let exemptions = vec![
14895            BenfordExemption::Recurring,
14896            BenfordExemption::Payroll,
14897            BenfordExemption::FixedFees,
14898            BenfordExemption::RoundAmounts,
14899        ];
14900
14901        for exemption in exemptions {
14902            let json = serde_json::to_string(&exemption).expect("Failed to serialize");
14903            let deserialized: BenfordExemption =
14904                serde_json::from_str(&json).expect("Failed to deserialize");
14905            assert!(format!("{:?}", exemption) == format!("{:?}", deserialized));
14906        }
14907    }
14908
14909    // ==========================================================================
14910    // Default Value Tests
14911    // ==========================================================================
14912
14913    #[test]
14914    fn test_global_config_defaults() {
14915        let yaml = r#"
14916            industry: manufacturing
14917            start_date: "2024-01-01"
14918            period_months: 6
14919        "#;
14920        let config: GlobalConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
14921        assert_eq!(config.group_currency, "USD");
14922        assert!(config.parallel);
14923        assert_eq!(config.worker_threads, 0);
14924        assert_eq!(config.memory_limit_mb, 0);
14925    }
14926
14927    #[test]
14928    fn test_fraud_config_defaults() {
14929        let config = FraudConfig::default();
14930        assert!(!config.enabled);
14931        assert_eq!(config.fraud_rate, 0.005);
14932        assert!(!config.clustering_enabled);
14933    }
14934
14935    #[test]
14936    fn test_internal_controls_config_defaults() {
14937        let config = InternalControlsConfig::default();
14938        assert!(!config.enabled);
14939        assert_eq!(config.exception_rate, 0.02);
14940        assert_eq!(config.sod_violation_rate, 0.01);
14941        assert!(config.export_control_master_data);
14942        assert_eq!(config.sox_materiality_threshold, 10000.0);
14943        // COSO fields
14944        assert!(config.coso_enabled);
14945        assert!(!config.include_entity_level_controls);
14946        assert_eq!(config.target_maturity_level, "mixed");
14947    }
14948
14949    #[test]
14950    fn test_output_config_defaults() {
14951        let config = OutputConfig::default();
14952        assert!(matches!(config.mode, OutputMode::FlatFile));
14953        assert_eq!(config.formats, vec![FileFormat::Parquet]);
14954        assert!(config.compression.enabled);
14955        assert!(matches!(
14956            config.compression.algorithm,
14957            CompressionAlgorithm::Zstd
14958        ));
14959        assert!(config.include_acdoca);
14960        assert!(!config.include_bseg);
14961        assert!(config.partition_by_period);
14962        assert!(!config.partition_by_company);
14963    }
14964
14965    #[test]
14966    fn test_approval_config_defaults() {
14967        let config = ApprovalConfig::default();
14968        assert!(!config.enabled);
14969        assert_eq!(config.auto_approve_threshold, 1000.0);
14970        assert_eq!(config.rejection_rate, 0.02);
14971        assert_eq!(config.revision_rate, 0.05);
14972        assert_eq!(config.average_approval_delay_hours, 4.0);
14973        assert_eq!(config.thresholds.len(), 4);
14974    }
14975
14976    #[test]
14977    fn test_p2p_flow_config_defaults() {
14978        let config = P2PFlowConfig::default();
14979        assert!(config.enabled);
14980        assert_eq!(config.three_way_match_rate, 0.95);
14981        assert_eq!(config.partial_delivery_rate, 0.15);
14982        assert_eq!(config.average_po_to_gr_days, 14);
14983    }
14984
14985    #[test]
14986    fn test_o2c_flow_config_defaults() {
14987        let config = O2CFlowConfig::default();
14988        assert!(config.enabled);
14989        assert_eq!(config.credit_check_failure_rate, 0.02);
14990        assert_eq!(config.return_rate, 0.03);
14991        assert_eq!(config.bad_debt_rate, 0.01);
14992    }
14993
14994    #[test]
14995    fn test_balance_config_defaults() {
14996        let config = BalanceConfig::default();
14997        assert!(!config.generate_opening_balances);
14998        assert!(config.generate_trial_balances);
14999        assert_eq!(config.target_gross_margin, 0.35);
15000        assert!(config.validate_balance_equation);
15001        assert!(config.reconcile_subledgers);
15002    }
15003
15004    // ==========================================================================
15005    // Partial Config Deserialization Tests
15006    // ==========================================================================
15007
15008    #[test]
15009    fn test_partial_config_with_defaults() {
15010        // Minimal config that should use all defaults
15011        let yaml = r#"
15012            global:
15013              industry: manufacturing
15014              start_date: "2024-01-01"
15015              period_months: 3
15016            companies:
15017              - code: "TEST"
15018                name: "Test Company"
15019                currency: "USD"
15020                country: "US"
15021                annual_transaction_volume: ten_k
15022            chart_of_accounts:
15023              complexity: small
15024            output:
15025              output_directory: "./output"
15026        "#;
15027
15028        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15029        assert_eq!(config.global.period_months, 3);
15030        assert_eq!(config.companies.len(), 1);
15031        assert!(!config.fraud.enabled); // Default
15032        assert!(!config.internal_controls.enabled); // Default
15033    }
15034
15035    #[test]
15036    fn test_config_with_fraud_enabled() {
15037        let yaml = r#"
15038            global:
15039              industry: retail
15040              start_date: "2024-01-01"
15041              period_months: 12
15042            companies:
15043              - code: "RETAIL"
15044                name: "Retail Co"
15045                currency: "USD"
15046                country: "US"
15047                annual_transaction_volume: hundred_k
15048            chart_of_accounts:
15049              complexity: medium
15050            output:
15051              output_directory: "./output"
15052            fraud:
15053              enabled: true
15054              fraud_rate: 0.05
15055              clustering_enabled: true
15056        "#;
15057
15058        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15059        assert!(config.fraud.enabled);
15060        assert_eq!(config.fraud.fraud_rate, 0.05);
15061        assert!(config.fraud.clustering_enabled);
15062    }
15063
15064    #[test]
15065    fn test_config_with_multiple_companies() {
15066        let yaml = r#"
15067            global:
15068              industry: manufacturing
15069              start_date: "2024-01-01"
15070              period_months: 6
15071            companies:
15072              - code: "HQ"
15073                name: "Headquarters"
15074                currency: "USD"
15075                country: "US"
15076                annual_transaction_volume: hundred_k
15077                volume_weight: 1.0
15078              - code: "EU"
15079                name: "European Subsidiary"
15080                currency: "EUR"
15081                country: "DE"
15082                annual_transaction_volume: hundred_k
15083                volume_weight: 0.5
15084              - code: "APAC"
15085                name: "Asia Pacific"
15086                currency: "JPY"
15087                country: "JP"
15088                annual_transaction_volume: ten_k
15089                volume_weight: 0.3
15090            chart_of_accounts:
15091              complexity: large
15092            output:
15093              output_directory: "./output"
15094        "#;
15095
15096        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15097        assert_eq!(config.companies.len(), 3);
15098        assert_eq!(config.companies[0].code, "HQ");
15099        assert_eq!(config.companies[1].currency, "EUR");
15100        assert_eq!(config.companies[2].volume_weight, 0.3);
15101    }
15102
15103    #[test]
15104    fn test_intercompany_config() {
15105        let yaml = r#"
15106            enabled: true
15107            ic_transaction_rate: 0.20
15108            transfer_pricing_method: cost_plus
15109            markup_percent: 0.08
15110            generate_matched_pairs: true
15111            generate_eliminations: true
15112        "#;
15113
15114        let config: IntercompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15115        assert!(config.enabled);
15116        assert_eq!(config.ic_transaction_rate, 0.20);
15117        assert!(matches!(
15118            config.transfer_pricing_method,
15119            TransferPricingMethod::CostPlus
15120        ));
15121        assert_eq!(config.markup_percent, 0.08);
15122        assert!(config.generate_eliminations);
15123    }
15124
15125    // ==========================================================================
15126    // Company Config Tests
15127    // ==========================================================================
15128
15129    #[test]
15130    fn test_company_config_defaults() {
15131        let yaml = r#"
15132            code: "TEST"
15133            name: "Test Company"
15134            currency: "USD"
15135            country: "US"
15136            annual_transaction_volume: ten_k
15137        "#;
15138
15139        let config: CompanyConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15140        assert_eq!(config.fiscal_year_variant, "K4"); // Default
15141        assert_eq!(config.volume_weight, 1.0); // Default
15142    }
15143
15144    // ==========================================================================
15145    // Chart of Accounts Config Tests
15146    // ==========================================================================
15147
15148    #[test]
15149    fn test_coa_config_defaults() {
15150        let yaml = r#"
15151            complexity: medium
15152        "#;
15153
15154        let config: ChartOfAccountsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15155        assert!(config.industry_specific); // Default true
15156        assert!(config.custom_accounts.is_none());
15157        assert_eq!(config.min_hierarchy_depth, 2); // Default
15158        assert_eq!(config.max_hierarchy_depth, 5); // Default
15159    }
15160
15161    // ==========================================================================
15162    // Accounting Standards Config Tests
15163    // ==========================================================================
15164
15165    #[test]
15166    fn test_accounting_standards_config_defaults() {
15167        let config = AccountingStandardsConfig::default();
15168        assert!(!config.enabled);
15169        assert!(config.framework.is_none());
15170        assert!(!config.revenue_recognition.enabled);
15171        assert!(!config.leases.enabled);
15172        assert!(!config.fair_value.enabled);
15173        assert!(!config.impairment.enabled);
15174        assert!(!config.generate_differences);
15175    }
15176
15177    #[test]
15178    fn test_accounting_standards_config_yaml() {
15179        let yaml = r#"
15180            enabled: true
15181            framework: ifrs
15182            revenue_recognition:
15183              enabled: true
15184              generate_contracts: true
15185              avg_obligations_per_contract: 2.5
15186              variable_consideration_rate: 0.20
15187              over_time_recognition_rate: 0.35
15188              contract_count: 150
15189            leases:
15190              enabled: true
15191              lease_count: 75
15192              finance_lease_percent: 0.25
15193              avg_lease_term_months: 48
15194            generate_differences: true
15195        "#;
15196
15197        let config: AccountingStandardsConfig =
15198            serde_yaml::from_str(yaml).expect("Failed to parse");
15199        assert!(config.enabled);
15200        assert!(matches!(
15201            config.framework,
15202            Some(AccountingFrameworkConfig::Ifrs)
15203        ));
15204        assert!(config.revenue_recognition.enabled);
15205        assert_eq!(config.revenue_recognition.contract_count, 150);
15206        assert_eq!(config.revenue_recognition.avg_obligations_per_contract, 2.5);
15207        assert!(config.leases.enabled);
15208        assert_eq!(config.leases.lease_count, 75);
15209        assert_eq!(config.leases.finance_lease_percent, 0.25);
15210        assert!(config.generate_differences);
15211    }
15212
15213    #[test]
15214    fn test_accounting_framework_serialization() {
15215        let frameworks = [
15216            AccountingFrameworkConfig::UsGaap,
15217            AccountingFrameworkConfig::Ifrs,
15218            AccountingFrameworkConfig::DualReporting,
15219            AccountingFrameworkConfig::FrenchGaap,
15220            AccountingFrameworkConfig::GermanGaap,
15221        ];
15222
15223        for framework in frameworks {
15224            let json = serde_json::to_string(&framework).expect("Failed to serialize");
15225            let deserialized: AccountingFrameworkConfig =
15226                serde_json::from_str(&json).expect("Failed to deserialize");
15227            assert!(format!("{:?}", framework) == format!("{:?}", deserialized));
15228        }
15229    }
15230
15231    #[test]
15232    fn test_revenue_recognition_config_defaults() {
15233        let config = RevenueRecognitionConfig::default();
15234        assert!(!config.enabled);
15235        assert!(config.generate_contracts);
15236        assert_eq!(config.avg_obligations_per_contract, 2.0);
15237        assert_eq!(config.variable_consideration_rate, 0.15);
15238        assert_eq!(config.over_time_recognition_rate, 0.30);
15239        assert_eq!(config.contract_count, 100);
15240    }
15241
15242    #[test]
15243    fn test_lease_accounting_config_defaults() {
15244        let config = LeaseAccountingConfig::default();
15245        assert!(!config.enabled);
15246        assert_eq!(config.lease_count, 50);
15247        assert_eq!(config.finance_lease_percent, 0.30);
15248        assert_eq!(config.avg_lease_term_months, 60);
15249        assert!(config.generate_amortization);
15250        assert_eq!(config.real_estate_percent, 0.40);
15251    }
15252
15253    #[test]
15254    fn test_fair_value_config_defaults() {
15255        let config = FairValueConfig::default();
15256        assert!(!config.enabled);
15257        assert_eq!(config.measurement_count, 25);
15258        assert_eq!(config.level1_percent, 0.40);
15259        assert_eq!(config.level2_percent, 0.35);
15260        assert_eq!(config.level3_percent, 0.25);
15261        assert!(!config.include_sensitivity_analysis);
15262    }
15263
15264    #[test]
15265    fn test_impairment_config_defaults() {
15266        let config = ImpairmentConfig::default();
15267        assert!(!config.enabled);
15268        assert_eq!(config.test_count, 15);
15269        assert_eq!(config.impairment_rate, 0.10);
15270        assert!(config.generate_projections);
15271        assert!(!config.include_goodwill);
15272    }
15273
15274    // ==========================================================================
15275    // Audit Standards Config Tests
15276    // ==========================================================================
15277
15278    #[test]
15279    fn test_audit_standards_config_defaults() {
15280        let config = AuditStandardsConfig::default();
15281        assert!(!config.enabled);
15282        assert!(!config.isa_compliance.enabled);
15283        assert!(!config.analytical_procedures.enabled);
15284        assert!(!config.confirmations.enabled);
15285        assert!(!config.opinion.enabled);
15286        assert!(!config.generate_audit_trail);
15287        assert!(!config.sox.enabled);
15288        assert!(!config.pcaob.enabled);
15289    }
15290
15291    #[test]
15292    fn test_audit_standards_config_yaml() {
15293        let yaml = r#"
15294            enabled: true
15295            isa_compliance:
15296              enabled: true
15297              compliance_level: comprehensive
15298              generate_isa_mappings: true
15299              include_pcaob: true
15300              framework: dual
15301            analytical_procedures:
15302              enabled: true
15303              procedures_per_account: 5
15304              variance_probability: 0.25
15305            confirmations:
15306              enabled: true
15307              confirmation_count: 75
15308              positive_response_rate: 0.90
15309              exception_rate: 0.08
15310            opinion:
15311              enabled: true
15312              generate_kam: true
15313              average_kam_count: 4
15314            sox:
15315              enabled: true
15316              generate_302_certifications: true
15317              generate_404_assessments: true
15318              material_weakness_rate: 0.03
15319            pcaob:
15320              enabled: true
15321              is_pcaob_audit: true
15322              include_icfr_opinion: true
15323            generate_audit_trail: true
15324        "#;
15325
15326        let config: AuditStandardsConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15327        assert!(config.enabled);
15328        assert!(config.isa_compliance.enabled);
15329        assert_eq!(config.isa_compliance.compliance_level, "comprehensive");
15330        assert!(config.isa_compliance.include_pcaob);
15331        assert_eq!(config.isa_compliance.framework, "dual");
15332        assert!(config.analytical_procedures.enabled);
15333        assert_eq!(config.analytical_procedures.procedures_per_account, 5);
15334        assert!(config.confirmations.enabled);
15335        assert_eq!(config.confirmations.confirmation_count, 75);
15336        assert!(config.opinion.enabled);
15337        assert_eq!(config.opinion.average_kam_count, 4);
15338        assert!(config.sox.enabled);
15339        assert!(config.sox.generate_302_certifications);
15340        assert_eq!(config.sox.material_weakness_rate, 0.03);
15341        assert!(config.pcaob.enabled);
15342        assert!(config.pcaob.is_pcaob_audit);
15343        assert!(config.pcaob.include_icfr_opinion);
15344        assert!(config.generate_audit_trail);
15345    }
15346
15347    #[test]
15348    fn test_isa_compliance_config_defaults() {
15349        let config = IsaComplianceConfig::default();
15350        assert!(!config.enabled);
15351        assert_eq!(config.compliance_level, "standard");
15352        assert!(config.generate_isa_mappings);
15353        assert!(config.generate_coverage_summary);
15354        assert!(!config.include_pcaob);
15355        assert_eq!(config.framework, "isa");
15356    }
15357
15358    #[test]
15359    fn test_sox_compliance_config_defaults() {
15360        let config = SoxComplianceConfig::default();
15361        assert!(!config.enabled);
15362        assert!(config.generate_302_certifications);
15363        assert!(config.generate_404_assessments);
15364        assert_eq!(config.materiality_threshold, 10000.0);
15365        assert_eq!(config.material_weakness_rate, 0.02);
15366        assert_eq!(config.significant_deficiency_rate, 0.08);
15367    }
15368
15369    #[test]
15370    fn test_pcaob_config_defaults() {
15371        let config = PcaobConfig::default();
15372        assert!(!config.enabled);
15373        assert!(!config.is_pcaob_audit);
15374        assert!(config.generate_cam);
15375        assert!(!config.include_icfr_opinion);
15376        assert!(!config.generate_standard_mappings);
15377    }
15378
15379    #[test]
15380    fn test_config_with_standards_enabled() {
15381        let yaml = r#"
15382            global:
15383              industry: financial_services
15384              start_date: "2024-01-01"
15385              period_months: 12
15386            companies:
15387              - code: "BANK"
15388                name: "Test Bank"
15389                currency: "USD"
15390                country: "US"
15391                annual_transaction_volume: hundred_k
15392            chart_of_accounts:
15393              complexity: large
15394            output:
15395              output_directory: "./output"
15396            accounting_standards:
15397              enabled: true
15398              framework: us_gaap
15399              revenue_recognition:
15400                enabled: true
15401              leases:
15402                enabled: true
15403            audit_standards:
15404              enabled: true
15405              isa_compliance:
15406                enabled: true
15407              sox:
15408                enabled: true
15409        "#;
15410
15411        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15412        assert!(config.accounting_standards.enabled);
15413        assert!(matches!(
15414            config.accounting_standards.framework,
15415            Some(AccountingFrameworkConfig::UsGaap)
15416        ));
15417        assert!(config.accounting_standards.revenue_recognition.enabled);
15418        assert!(config.accounting_standards.leases.enabled);
15419        assert!(config.audit_standards.enabled);
15420        assert!(config.audit_standards.isa_compliance.enabled);
15421        assert!(config.audit_standards.sox.enabled);
15422    }
15423
15424    // ==========================================================================
15425    // Industry-Specific Config Tests
15426    // ==========================================================================
15427
15428    #[test]
15429    fn test_industry_specific_config_defaults() {
15430        let config = IndustrySpecificConfig::default();
15431        assert!(!config.enabled);
15432        assert!(!config.manufacturing.enabled);
15433        assert!(!config.retail.enabled);
15434        assert!(!config.healthcare.enabled);
15435        assert!(!config.technology.enabled);
15436        assert!(!config.financial_services.enabled);
15437        assert!(!config.professional_services.enabled);
15438    }
15439
15440    #[test]
15441    fn test_manufacturing_config_defaults() {
15442        let config = ManufacturingConfig::default();
15443        assert!(!config.enabled);
15444        assert_eq!(config.bom_depth, 4);
15445        assert!(!config.just_in_time);
15446        assert_eq!(config.supplier_tiers, 2);
15447        assert_eq!(config.target_yield_rate, 0.97);
15448        assert_eq!(config.scrap_alert_threshold, 0.03);
15449    }
15450
15451    #[test]
15452    fn test_retail_config_defaults() {
15453        let config = RetailConfig::default();
15454        assert!(!config.enabled);
15455        assert_eq!(config.avg_daily_transactions, 500);
15456        assert!(config.loss_prevention);
15457        assert_eq!(config.shrinkage_rate, 0.015);
15458    }
15459
15460    #[test]
15461    fn test_healthcare_config_defaults() {
15462        let config = HealthcareConfig::default();
15463        assert!(!config.enabled);
15464        assert_eq!(config.facility_type, "hospital");
15465        assert_eq!(config.avg_daily_encounters, 150);
15466        assert!(config.compliance.hipaa);
15467        assert!(config.compliance.stark_law);
15468        assert!(config.coding_systems.icd10);
15469        assert!(config.coding_systems.cpt);
15470    }
15471
15472    #[test]
15473    fn test_technology_config_defaults() {
15474        let config = TechnologyConfig::default();
15475        assert!(!config.enabled);
15476        assert_eq!(config.revenue_model, "saas");
15477        assert_eq!(config.subscription_revenue_pct, 0.60);
15478        assert!(config.rd_capitalization.enabled);
15479    }
15480
15481    #[test]
15482    fn test_config_with_industry_specific() {
15483        let yaml = r#"
15484            global:
15485              industry: healthcare
15486              start_date: "2024-01-01"
15487              period_months: 12
15488            companies:
15489              - code: "HOSP"
15490                name: "Test Hospital"
15491                currency: "USD"
15492                country: "US"
15493                annual_transaction_volume: hundred_k
15494            chart_of_accounts:
15495              complexity: medium
15496            output:
15497              output_directory: "./output"
15498            industry_specific:
15499              enabled: true
15500              healthcare:
15501                enabled: true
15502                facility_type: hospital
15503                payer_mix:
15504                  medicare: 0.45
15505                  medicaid: 0.15
15506                  commercial: 0.35
15507                  self_pay: 0.05
15508                coding_systems:
15509                  icd10: true
15510                  cpt: true
15511                  drg: true
15512                compliance:
15513                  hipaa: true
15514                  stark_law: true
15515                anomaly_rates:
15516                  upcoding: 0.03
15517                  unbundling: 0.02
15518        "#;
15519
15520        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15521        assert!(config.industry_specific.enabled);
15522        assert!(config.industry_specific.healthcare.enabled);
15523        assert_eq!(
15524            config.industry_specific.healthcare.facility_type,
15525            "hospital"
15526        );
15527        assert_eq!(config.industry_specific.healthcare.payer_mix.medicare, 0.45);
15528        assert_eq!(config.industry_specific.healthcare.payer_mix.self_pay, 0.05);
15529        assert!(config.industry_specific.healthcare.coding_systems.icd10);
15530        assert!(config.industry_specific.healthcare.compliance.hipaa);
15531        assert_eq!(
15532            config.industry_specific.healthcare.anomaly_rates.upcoding,
15533            0.03
15534        );
15535    }
15536
15537    #[test]
15538    fn test_config_with_manufacturing_specific() {
15539        let yaml = r#"
15540            global:
15541              industry: manufacturing
15542              start_date: "2024-01-01"
15543              period_months: 12
15544            companies:
15545              - code: "MFG"
15546                name: "Test Manufacturing"
15547                currency: "USD"
15548                country: "US"
15549                annual_transaction_volume: hundred_k
15550            chart_of_accounts:
15551              complexity: medium
15552            output:
15553              output_directory: "./output"
15554            industry_specific:
15555              enabled: true
15556              manufacturing:
15557                enabled: true
15558                bom_depth: 5
15559                just_in_time: true
15560                supplier_tiers: 3
15561                target_yield_rate: 0.98
15562                anomaly_rates:
15563                  yield_manipulation: 0.02
15564                  phantom_production: 0.01
15565        "#;
15566
15567        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15568        assert!(config.industry_specific.enabled);
15569        assert!(config.industry_specific.manufacturing.enabled);
15570        assert_eq!(config.industry_specific.manufacturing.bom_depth, 5);
15571        assert!(config.industry_specific.manufacturing.just_in_time);
15572        assert_eq!(config.industry_specific.manufacturing.supplier_tiers, 3);
15573        assert_eq!(
15574            config.industry_specific.manufacturing.target_yield_rate,
15575            0.98
15576        );
15577        assert_eq!(
15578            config
15579                .industry_specific
15580                .manufacturing
15581                .anomaly_rates
15582                .yield_manipulation,
15583            0.02
15584        );
15585    }
15586
15587    // ==========================================================================
15588    // Tax Configuration Tests
15589    // ==========================================================================
15590
15591    #[test]
15592    fn test_tax_config_defaults() {
15593        let tax = TaxConfig::default();
15594        assert!(!tax.enabled);
15595        assert!(tax.jurisdictions.countries.is_empty());
15596        assert!(!tax.jurisdictions.include_subnational);
15597        assert!(!tax.vat_gst.enabled);
15598        assert!(tax.vat_gst.standard_rates.is_empty());
15599        assert!(tax.vat_gst.reduced_rates.is_empty());
15600        assert!(tax.vat_gst.exempt_categories.is_empty());
15601        assert!(tax.vat_gst.reverse_charge);
15602        assert!(!tax.sales_tax.enabled);
15603        assert!(tax.sales_tax.nexus_states.is_empty());
15604        assert!(!tax.withholding.enabled);
15605        assert!(tax.withholding.treaty_network);
15606        assert_eq!(tax.withholding.default_rate, 0.30);
15607        assert_eq!(tax.withholding.treaty_reduced_rate, 0.15);
15608        assert!(tax.provisions.enabled);
15609        assert_eq!(tax.provisions.statutory_rate, 0.21);
15610        assert!(tax.provisions.uncertain_positions);
15611        assert!(!tax.payroll_tax.enabled);
15612        assert_eq!(tax.anomaly_rate, 0.03);
15613    }
15614
15615    #[test]
15616    fn test_tax_config_from_yaml() {
15617        let yaml = r#"
15618            global:
15619              seed: 42
15620              start_date: "2024-01-01"
15621              period_months: 12
15622              industry: retail
15623            companies:
15624              - code: C001
15625                name: Test Corp
15626                currency: USD
15627                country: US
15628                annual_transaction_volume: ten_k
15629            chart_of_accounts:
15630              complexity: small
15631            output:
15632              output_directory: ./output
15633            tax:
15634              enabled: true
15635              anomaly_rate: 0.05
15636              jurisdictions:
15637                countries: ["US", "DE", "GB"]
15638                include_subnational: true
15639              vat_gst:
15640                enabled: true
15641                standard_rates:
15642                  DE: 0.19
15643                  GB: 0.20
15644                reduced_rates:
15645                  DE: 0.07
15646                  GB: 0.05
15647                exempt_categories:
15648                  - financial_services
15649                  - healthcare
15650                reverse_charge: false
15651              sales_tax:
15652                enabled: true
15653                nexus_states: ["CA", "NY", "TX"]
15654              withholding:
15655                enabled: true
15656                treaty_network: false
15657                default_rate: 0.25
15658                treaty_reduced_rate: 0.10
15659              provisions:
15660                enabled: false
15661                statutory_rate: 0.28
15662                uncertain_positions: false
15663              payroll_tax:
15664                enabled: true
15665        "#;
15666
15667        let config: GeneratorConfig = serde_yaml::from_str(yaml).expect("Failed to parse");
15668        assert!(config.tax.enabled);
15669        assert_eq!(config.tax.anomaly_rate, 0.05);
15670
15671        // Jurisdictions
15672        assert_eq!(config.tax.jurisdictions.countries.len(), 3);
15673        assert!(config
15674            .tax
15675            .jurisdictions
15676            .countries
15677            .contains(&"DE".to_string()));
15678        assert!(config.tax.jurisdictions.include_subnational);
15679
15680        // VAT/GST
15681        assert!(config.tax.vat_gst.enabled);
15682        assert_eq!(config.tax.vat_gst.standard_rates.get("DE"), Some(&0.19));
15683        assert_eq!(config.tax.vat_gst.standard_rates.get("GB"), Some(&0.20));
15684        assert_eq!(config.tax.vat_gst.reduced_rates.get("DE"), Some(&0.07));
15685        assert_eq!(config.tax.vat_gst.exempt_categories.len(), 2);
15686        assert!(!config.tax.vat_gst.reverse_charge);
15687
15688        // Sales tax
15689        assert!(config.tax.sales_tax.enabled);
15690        assert_eq!(config.tax.sales_tax.nexus_states.len(), 3);
15691        assert!(config
15692            .tax
15693            .sales_tax
15694            .nexus_states
15695            .contains(&"CA".to_string()));
15696
15697        // Withholding
15698        assert!(config.tax.withholding.enabled);
15699        assert!(!config.tax.withholding.treaty_network);
15700        assert_eq!(config.tax.withholding.default_rate, 0.25);
15701        assert_eq!(config.tax.withholding.treaty_reduced_rate, 0.10);
15702
15703        // Provisions
15704        assert!(!config.tax.provisions.enabled);
15705        assert_eq!(config.tax.provisions.statutory_rate, 0.28);
15706        assert!(!config.tax.provisions.uncertain_positions);
15707
15708        // Payroll tax
15709        assert!(config.tax.payroll_tax.enabled);
15710    }
15711
15712    #[test]
15713    fn test_generator_config_with_tax_default() {
15714        let yaml = r#"
15715            global:
15716              seed: 42
15717              start_date: "2024-01-01"
15718              period_months: 12
15719              industry: retail
15720            companies:
15721              - code: C001
15722                name: Test Corp
15723                currency: USD
15724                country: US
15725                annual_transaction_volume: ten_k
15726            chart_of_accounts:
15727              complexity: small
15728            output:
15729              output_directory: ./output
15730        "#;
15731
15732        let config: GeneratorConfig =
15733            serde_yaml::from_str(yaml).expect("Failed to parse config without tax section");
15734        // Tax should be present with defaults when not specified in YAML
15735        assert!(!config.tax.enabled);
15736        assert!(config.tax.jurisdictions.countries.is_empty());
15737        assert_eq!(config.tax.anomaly_rate, 0.03);
15738        assert!(config.tax.provisions.enabled); // provisions default to enabled=true
15739        assert_eq!(config.tax.provisions.statutory_rate, 0.21);
15740    }
15741
15742    // ==========================================================================
15743    // SessionSchemaConfig Tests
15744    // ==========================================================================
15745
15746    #[test]
15747    fn test_session_config_default_disabled() {
15748        let yaml = "{}";
15749        let config: SessionSchemaConfig =
15750            serde_yaml::from_str(yaml).expect("Failed to parse empty session config");
15751        assert!(!config.enabled);
15752        assert!(config.checkpoint_path.is_none());
15753        assert!(config.per_period_output);
15754        assert!(config.consolidated_output);
15755    }
15756
15757    #[test]
15758    fn test_config_backward_compatible_without_session() {
15759        let yaml = r#"
15760            global:
15761              seed: 42
15762              start_date: "2024-01-01"
15763              period_months: 12
15764              industry: retail
15765            companies:
15766              - code: C001
15767                name: Test Corp
15768                currency: USD
15769                country: US
15770                annual_transaction_volume: ten_k
15771            chart_of_accounts:
15772              complexity: small
15773            output:
15774              output_directory: ./output
15775        "#;
15776
15777        let config: GeneratorConfig =
15778            serde_yaml::from_str(yaml).expect("Failed to parse config without session");
15779        // Session should default to disabled
15780        assert!(!config.session.enabled);
15781        assert!(config.session.per_period_output);
15782        assert!(config.session.consolidated_output);
15783        // fiscal_year_months should be None
15784        assert!(config.global.fiscal_year_months.is_none());
15785    }
15786
15787    #[test]
15788    fn test_fiscal_year_months_parsed() {
15789        let yaml = r#"
15790            global:
15791              seed: 42
15792              start_date: "2024-01-01"
15793              period_months: 24
15794              industry: retail
15795              fiscal_year_months: 12
15796            companies:
15797              - code: C001
15798                name: Test Corp
15799                currency: USD
15800                country: US
15801                annual_transaction_volume: ten_k
15802            chart_of_accounts:
15803              complexity: small
15804            output:
15805              output_directory: ./output
15806            session:
15807              enabled: true
15808              checkpoint_path: /tmp/checkpoints
15809              per_period_output: true
15810              consolidated_output: false
15811        "#;
15812
15813        let config: GeneratorConfig =
15814            serde_yaml::from_str(yaml).expect("Failed to parse config with fiscal_year_months");
15815        assert_eq!(config.global.fiscal_year_months, Some(12));
15816        assert!(config.session.enabled);
15817        assert_eq!(
15818            config.session.checkpoint_path,
15819            Some("/tmp/checkpoints".to_string())
15820        );
15821        assert!(config.session.per_period_output);
15822        assert!(!config.session.consolidated_output);
15823    }
15824
15825    // -----------------------------------------------------------------------
15826    // SP3 — IndustryProfileField / IndustryPriorsConfig tests
15827    // -----------------------------------------------------------------------
15828
15829    #[test]
15830    fn industry_profile_legacy_string_form_parses() {
15831        // Legacy YAML: bare enum variant name.  Must round-trip without changes
15832        // to existing config files.
15833        let yaml = r#"
15834enabled: true
15835industry_profile: retail
15836"#;
15837        let cfg: AdvancedDistributionConfig =
15838            serde_yaml::from_str(yaml).expect("parse legacy industry_profile string");
15839        let profile = cfg.industry_profile.expect("Some");
15840        assert_eq!(profile.profile_type(), IndustryProfileType::Retail);
15841        assert!(profile.priors().is_none());
15842    }
15843
15844    #[test]
15845    fn industry_profile_full_form_with_priors_parses() {
15846        let yaml = r#"
15847enabled: true
15848industry_profile:
15849  name: healthcare
15850  priors:
15851    enabled: true
15852    source: bundled
15853"#;
15854        let cfg: AdvancedDistributionConfig =
15855            serde_yaml::from_str(yaml).expect("parse full industry_profile struct");
15856        let profile = cfg.industry_profile.expect("Some");
15857        assert_eq!(profile.profile_type(), IndustryProfileType::Healthcare);
15858        let priors = profile.priors().expect("priors present");
15859        assert!(priors.enabled);
15860        assert_eq!(priors.source, PriorsSource::Bundled);
15861        assert!(priors.path.is_none());
15862    }
15863
15864    #[test]
15865    fn industry_profile_full_form_without_priors_parses() {
15866        // Struct form with only `name` and no priors block.
15867        let yaml = r#"
15868enabled: true
15869industry_profile:
15870  name: manufacturing
15871"#;
15872        let cfg: AdvancedDistributionConfig =
15873            serde_yaml::from_str(yaml).expect("parse struct without priors");
15874        let profile = cfg.industry_profile.expect("Some");
15875        assert_eq!(profile.profile_type(), IndustryProfileType::Manufacturing);
15876        assert!(profile.priors().is_none());
15877    }
15878
15879    #[test]
15880    fn industry_profile_priors_file_without_path_fails_validation() {
15881        use crate::validation::validate_config;
15882
15883        // Minimal valid config plumbing.
15884        let yaml = r#"
15885global:
15886  seed: 42
15887  start_date: "2024-01-01"
15888  period_months: 1
15889  industry: retail
15890companies:
15891  - code: C001
15892    name: Test Corp
15893    currency: USD
15894    country: US
15895    annual_transaction_volume: ten_k
15896chart_of_accounts:
15897  complexity: small
15898output:
15899  output_directory: ./output
15900distributions:
15901  enabled: true
15902  industry_profile:
15903    name: retail
15904    priors:
15905      enabled: true
15906      source: file
15907"#;
15908        let cfg: GeneratorConfig = serde_yaml::from_str(yaml).expect("serde parse should succeed");
15909        let err = validate_config(&cfg).expect_err("path required when source=file");
15910        let msg = err.to_string();
15911        assert!(
15912            msg.contains("path") || msg.contains("required"),
15913            "unexpected error message: {msg}"
15914        );
15915    }
15916
15917    #[test]
15918    fn industry_profile_priors_file_with_path_passes_validation() {
15919        use crate::validation::validate_config;
15920
15921        let yaml = r#"
15922global:
15923  seed: 42
15924  start_date: "2024-01-01"
15925  period_months: 1
15926  industry: retail
15927companies:
15928  - code: C001
15929    name: Test Corp
15930    currency: USD
15931    country: US
15932    annual_transaction_volume: ten_k
15933chart_of_accounts:
15934  complexity: small
15935output:
15936  output_directory: ./output
15937distributions:
15938  enabled: true
15939  industry_profile:
15940    name: retail
15941    priors:
15942      enabled: true
15943      source: file
15944      path: /tmp/priors.json
15945"#;
15946        let cfg: GeneratorConfig = serde_yaml::from_str(yaml).expect("serde parse should succeed");
15947        validate_config(&cfg).expect("validation should pass with path supplied");
15948    }
15949}