Skip to main content

datasynth_runtime/
output_writer.rs

1//! Comprehensive output writer for all generated data.
2//!
3//! Writes all generated data from the EnhancedGenerationResult to files
4//! in the output directory. Uses CSV for flat tabular data (journal entry
5//! lines) and JSON for types with nested structures (Vecs, sub-structs).
6
7use std::cell::Cell;
8use std::io::Write;
9use std::path::Path;
10
11use crate::enhanced_orchestrator::EnhancedGenerationResult;
12use datasynth_core::documents::PaymentType;
13use datasynth_output::OutputRootConfig;
14use tracing::{info, warn};
15
16thread_local! {
17    /// Thread-local flat-layout flag. When true, every `write_json_safe` call
18    /// routes through `write_json_flat` so nested `{header, lines}` shapes get
19    /// flattened. Set by `write_all_output_with_layout` at the top of its body,
20    /// reset on exit.
21    static FLAT_LAYOUT_ACTIVE: Cell<bool> = const { Cell::new(false) };
22
23    /// Thread-local JSON skip flag. When true, `write_json_safe` becomes a no-op.
24    /// Set by `write_all_output_with_layout` when the requested formats don't
25    /// include JSON. This avoids wrapping 190+ call sites in `if write_json`.
26    static SKIP_JSON: Cell<bool> = const { Cell::new(false) };
27}
28
29/// Write a JSON file for any serializable slice. Skips empty slices.
30///
31/// Streams JSON directly to a buffered file writer instead of allocating
32/// the entire JSON string in memory (Phase 3 I/O optimization).
33/// Write a JSON array by streaming one record at a time.
34///
35/// Instead of serializing the entire `&[T]` in one `to_writer_pretty` call
36/// (which builds a massive in-memory serde state for large arrays), this
37/// writes `[\n` + per-record pretty-printed JSON with commas + `\n]`.
38///
39/// For 200K+ records this reduces peak memory and improves write throughput
40/// by avoiding serde's internal buffering of the full array structure.
41fn write_json<T: serde::Serialize>(
42    data: &[T],
43    path: &Path,
44    label: &str,
45) -> Result<(), Box<dyn std::error::Error>> {
46    use std::io::Write;
47
48    if data.is_empty() {
49        return Ok(());
50    }
51
52    let file = std::fs::File::create(path)?;
53    let mut writer = std::io::BufWriter::with_capacity(512 * 1024, file);
54
55    // Stream records one at a time into a JSON array
56    writer.write_all(b"[\n")?;
57    for (i, item) in data.iter().enumerate() {
58        if i > 0 {
59            writer.write_all(b",\n")?;
60        }
61        serde_json::to_writer_pretty(&mut writer, item)?;
62    }
63    writer.write_all(b"\n]\n")?;
64    writer.flush()?;
65
66    info!(
67        "  {} written: {} records -> {}",
68        label,
69        data.len(),
70        path.display()
71    );
72    Ok(())
73}
74
75/// Write journal entry lines as a flat CSV file.
76///
77/// This extracts the key fields from both the header and each line item to
78/// produce a single flat CSV that can be loaded directly into dataframes.
79fn write_journal_entries_csv(
80    result: &EnhancedGenerationResult,
81    output_dir: &Path,
82) -> Result<(), Box<dyn std::error::Error>> {
83    if result.journal_entries.is_empty() {
84        return Ok(());
85    }
86
87    let path = output_dir.join("journal_entries.csv");
88    let file = std::fs::File::create(&path)?;
89    let mut w = std::io::BufWriter::with_capacity(256 * 1024, file);
90
91    // Write header.
92    //
93    // Schema note: each release that widens the schema appends new
94    // columns at the end so existing column-positional consumers keep
95    // working.
96    //   v5.5.1 added:
97    //     is_manual, is_post_close, source_system     (audit / ETL provenance)
98    //     account_description                         (joined from CoA)
99    //     financial_statement_category                (asset/liability/...)
100    //     assignment, value_date, tax_code            (already-populated line fields)
101    //     transaction_id                              (stable per-line id)
102    //   v5.6.0 added (ISO 21378 Audit Data Collection classification):
103    //     account_class, account_class_name           (Level-2 e.g. "A.B" / "Trade Receivables")
104    //     account_sub_class, account_sub_class_name   (Level-3 e.g. "A.B.A" / "Trade Accounts Receivable")
105    //   v5.8.0 added:
106    //     predecessor_line_id                         (UUID v5 of preceding line in document chain;
107    //                                                  populated by document_flow_je_generator for
108    //                                                  P2P / O2C chains, empty for chain heads and
109    //                                                  for purely-GL adjustments)
110    //   v5.16.1 (SP3.8a) added:
111    //     trading_partner                             (counterparty company code; populated from
112    //                                                  SP3.7 per-source conditional when priors are
113    //                                                  loaded; empty for chain heads / GL adjustments
114    //                                                  when priors are absent)
115    //   v5.17.0 (HF community request) added:
116    //     fraud_type                                  (FraudType enum variant via Debug format, e.g.
117    //                                                  "GhostEmployee"; empty string when None)
118    //     anomaly_type                                (AnomalyType serialized name, e.g.
119    //                                                  "DuplicateEntry"; empty string when None)
120    writeln!(
121        w,
122        "document_id,company_code,fiscal_year,fiscal_period,posting_date,document_date,\
123         document_type,currency,exchange_rate,reference,header_text,created_by,source,\
124         business_process,ledger,is_fraud,is_anomaly,\
125         line_number,gl_account,debit_amount,credit_amount,local_amount,transaction_amount,\
126         cost_center,profit_center,business_unit,line_text,\
127         auxiliary_account_number,auxiliary_account_label,lettrage,lettrage_date,\
128         is_manual,is_post_close,source_system,\
129         account_description,financial_statement_category,\
130         assignment,value_date,tax_code,transaction_id,\
131         account_class,account_class_name,account_sub_class,account_sub_class_name,\
132         predecessor_line_id,trading_partner,fraud_type,anomaly_type"
133    )?;
134
135    // Build a CoA → (short_description, ISO class, ISO sub-class) lookup.
136    // Empty when no CoA was generated (e.g. some smoke tests); resolution
137    // falls back to the line's already-populated `account_description`
138    // and to empty ISO codes.
139    let coa_index: std::collections::HashMap<&str, (&str, &str, &str, &str, &str)> = result
140        .chart_of_accounts
141        .accounts
142        .iter()
143        .map(|a| {
144            (
145                a.account_number.as_str(),
146                (
147                    a.short_description.as_str(),
148                    a.account_class.as_str(),
149                    a.account_class_name.as_str(),
150                    a.account_sub_class.as_str(),
151                    a.account_sub_class_name.as_str(),
152                ),
153            )
154        })
155        .collect();
156
157    // SP5.2 — Secondary index built from the CoA semantic prior (when loaded).
158    // The per-source attribute conditional (SP3.7) draws corpus GL account
159    // numbers (e.g. `0000105000`) that typically are NOT present in the synthetic
160    // CoA master table, so the primary `coa_index` misses ~85% of lines.  This
161    // fallback index covers 3,123 corpus accounts sourced from the `.dsf`
162    // bundle, resolving `account_description` and ISO 21378 class codes for any
163    // account number the prior knows about.
164    //
165    // When no prior is loaded the map is empty and the existing behaviour is
166    // byte-identical to earlier releases.
167    let coa_semantic_index: std::collections::HashMap<&str, (&str, &str, &str, &str, &str)> =
168        result
169            .coa_semantic_prior
170            .as_ref()
171            .map(|prior| {
172                prior
173                    .accounts
174                    .iter()
175                    .map(|(account_number, sem)| {
176                        (
177                            account_number.as_str(),
178                            (
179                                sem.description.as_str(),
180                                sem.account_class.as_deref().unwrap_or(""),
181                                sem.account_class_name.as_deref().unwrap_or(""),
182                                sem.account_sub_class.as_deref().unwrap_or(""),
183                                sem.account_sub_class_name.as_deref().unwrap_or(""),
184                            ),
185                        )
186                    })
187                    .collect()
188            })
189            .unwrap_or_default();
190
191    for je in &result.journal_entries {
192        let h = &je.header;
193        // SP3.6 — when priors are loaded, `sap_source_code` holds a canonical
194        // SAP source code (`KR`, `RV`, …); fall back to the TransactionSource
195        // Display label for the priors-disabled path.
196        let source_label: std::borrow::Cow<str> = match &h.sap_source_code {
197            Some(code) => std::borrow::Cow::Borrowed(code.as_str()),
198            None => std::borrow::Cow::Owned(h.source.to_string()),
199        };
200        for line in &je.lines {
201            let lettrage_date_str = line
202                .lettrage_date
203                .map(|d| d.to_string())
204                .unwrap_or_default();
205            let value_date_str = line.value_date.map(|d| d.to_string()).unwrap_or_default();
206            // Look up CoA-joined fields in one shot.
207            // SP5.2 — try primary (synthetic CoA) then fall through to the
208            // corpus prior secondary index when the primary misses.
209            let coa_hit = coa_index
210                .get(line.gl_account.as_str())
211                .copied()
212                .or_else(|| coa_semantic_index.get(line.gl_account.as_str()).copied());
213            let coa_short_desc = coa_hit.map(|t| t.0).unwrap_or("");
214            let coa_class = coa_hit.map(|t| t.1).unwrap_or("");
215            let coa_class_name = coa_hit.map(|t| t.2).unwrap_or("");
216            let coa_sub_class = coa_hit.map(|t| t.3).unwrap_or("");
217            let coa_sub_class_name = coa_hit.map(|t| t.4).unwrap_or("");
218            // Prefer the line's own account_description; fall back to the CoA
219            // lookup so consumers always get a name even when the generator
220            // forgot to populate the field.
221            let account_description: &str = line
222                .account_description
223                .as_deref()
224                .filter(|s| !s.is_empty())
225                .unwrap_or(coa_short_desc);
226            // Derive the FSA category from the gl_account prefix (1xxx=asset,
227            // 2xxx=liability, ...). Cheap, deterministic, no CoA dependency.
228            let fsa_category =
229                datasynth_core::accounts::AccountCategory::from_account(line.gl_account.as_str())
230                    .as_label();
231            // Stable per-line identifier (UUID v5 of document_id+line_number).
232            let transaction_id = line.transaction_id.clone().unwrap_or_else(|| {
233                datasynth_core::models::JournalEntryLine::derive_transaction_id(
234                    line.document_id,
235                    line.line_number,
236                )
237            });
238            // v5.17.0 — fraud_type and anomaly_type category columns (cols 45-46).
239            // fraud_type: Option<FraudType> → Debug format, empty when None.
240            // anomaly_type: Option<String> → already serialized, empty when None.
241            let fraud_type_str = h.fraud_type.map(|ft| format!("{ft:?}")).unwrap_or_default();
242            let anomaly_type_str = h.anomaly_type.as_deref().unwrap_or("").to_string();
243            writeln!(
244                w,
245                "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}",
246                h.document_id,
247                csv_escape(&h.company_code),
248                h.fiscal_year,
249                h.fiscal_period,
250                h.posting_date,
251                h.document_date,
252                csv_escape(&h.document_type),
253                csv_escape(&h.currency),
254                h.exchange_rate,
255                csv_opt_str(&h.reference),
256                csv_opt_str(&h.header_text),
257                csv_escape(&h.created_by),
258                source_label,
259                h.business_process
260                    .map(|bp| format!("{bp:?}"))
261                    .unwrap_or_default(),
262                csv_escape(&h.ledger),
263                h.is_fraud,
264                h.is_anomaly,
265                line.line_number,
266                csv_escape(&line.gl_account),
267                line.debit_amount,
268                line.credit_amount,
269                line.local_amount,
270                line.transaction_amount.map(|d| d.to_string()).unwrap_or_default(),
271                csv_opt_str(&line.cost_center),
272                csv_opt_str(&line.profit_center),
273                csv_opt_str(&line.business_unit),
274                csv_opt_str(&line.line_text),
275                csv_opt_str(&line.auxiliary_account_number),
276                csv_opt_str(&line.auxiliary_account_label),
277                csv_opt_str(&line.lettrage),
278                lettrage_date_str,
279                h.is_manual,
280                h.is_post_close,
281                csv_escape(&h.source_system),
282                csv_escape(account_description),
283                fsa_category,
284                csv_opt_str(&line.assignment),
285                value_date_str,
286                csv_opt_str(&line.tax_code),
287                csv_escape(&transaction_id),
288                csv_escape(coa_class),
289                csv_escape(coa_class_name),
290                csv_escape(coa_sub_class),
291                csv_escape(coa_sub_class_name),
292                csv_opt_str(&line.predecessor_line_id),
293                // SP3.8a — trading_partner appended as column 44.
294                csv_opt_str(&line.trading_partner),
295                // v5.17.0 — fraud_type (col 45) and anomaly_type (col 46).
296                csv_escape(&fraud_type_str),
297                csv_escape(&anomaly_type_str),
298            )?;
299        }
300    }
301
302    w.flush()?;
303    let total_lines: usize = result.journal_entries.iter().map(|je| je.lines.len()).sum();
304    info!(
305        "  Journal entries CSV written: {} entries, {} line items -> {}",
306        result.journal_entries.len(),
307        total_lines,
308        path.display()
309    );
310    Ok(())
311}
312
313/// **v5.8.0** — write `graphs/je_network.csv`: a flat edge-list of the
314/// accounting network derived from journal entries.
315///
316/// Each row represents one debit↔credit flow within a single JE,
317/// formed via the cartesian product of debit lines × credit lines (the
318/// approach in `datasynth-graph::TransactionGraphBuilder`). For a
319/// 2-line JE this is exactly the bijective Method-A flow from
320/// Ivertowski et al. (2024); for larger JEs it is a Method-B/C
321/// approximation with proportional amount allocation.
322///
323/// v5.10: edge construction has been extracted into
324/// [`crate::je_network::build_je_network_edges`] so the same logic is
325/// reused by the `datasynth-group` aggregate emitter.  The CSV format
326/// is unchanged — this writer keeps the v5.8.0 13-column schema.
327///
328/// Joins back to `journal_entries.csv` via:
329///   - `document_id` → JE-level header
330///   - `from_line_id` / `to_line_id` → per-line `transaction_id`
331///   - `predecessor_edge_id` → previous flow in a document chain
332fn write_je_network_csv(
333    result: &EnhancedGenerationResult,
334    output_dir: &Path,
335    method: datasynth_config::JeNetworkMethod,
336) -> Result<(), Box<dyn std::error::Error>> {
337    if result.journal_entries.is_empty() {
338        return Ok(());
339    }
340    let graphs_dir = output_dir.join("graphs");
341    std::fs::create_dir_all(&graphs_dir)?;
342    let path = graphs_dir.join("je_network.csv");
343    let file = std::fs::File::create(&path)?;
344    let mut w = std::io::BufWriter::with_capacity(256 * 1024, file);
345
346    writeln!(
347        w,
348        "edge_id,document_id,posting_date,from_account,to_account,\
349         from_line_id,to_line_id,amount,confidence,\
350         predecessor_edge_id,business_process,is_fraud,is_anomaly,fraud_type"
351    )?;
352
353    let edges = crate::je_network::build_je_network_edges(&result.journal_entries, method);
354
355    for e in &edges {
356        writeln!(
357            w,
358            "{},{},{},{},{},{},{},{},{},{},{},{},{},{}",
359            csv_escape(&e.edge_id),
360            csv_escape(&e.document_id.to_string()),
361            csv_escape(&e.posting_date.to_string()),
362            csv_escape(&e.from_account),
363            csv_escape(&e.to_account),
364            csv_escape(&e.from_line_id),
365            csv_escape(&e.to_line_id),
366            e.amount,
367            e.confidence,
368            csv_escape(&e.predecessor_edge_id),
369            csv_escape(&e.business_process),
370            e.is_fraud,
371            e.is_anomaly,
372            csv_escape(e.fraud_type.as_deref().unwrap_or("")),
373        )?;
374    }
375
376    w.flush()?;
377    info!(
378        "  JE network CSV written: {} edges from {} entries -> {}",
379        edges.len(),
380        result.journal_entries.len(),
381        path.display()
382    );
383    Ok(())
384}
385
386/// Write journal entries as flat JSON (header fields merged onto each line).
387///
388/// Each object in the output array contains all header fields plus all line fields,
389/// with no nesting. This is the analytics-friendly format.
390fn write_journal_entries_flat_json(
391    result: &EnhancedGenerationResult,
392    output_dir: &Path,
393) -> Result<(), Box<dyn std::error::Error>> {
394    if result.journal_entries.is_empty() {
395        return Ok(());
396    }
397
398    let path = output_dir.join("journal_entries.json");
399    let file = std::fs::File::create(&path)?;
400    let mut writer = std::io::BufWriter::with_capacity(256 * 1024, file);
401
402    // Write opening bracket
403    writer.write_all(b"[\n")?;
404
405    let mut first = true;
406    let mut total_lines = 0usize;
407    for je in &result.journal_entries {
408        // Serialize header to a JSON map
409        let header_value = serde_json::to_value(&je.header)?;
410
411        for line in &je.lines {
412            if !first {
413                writer.write_all(b",\n")?;
414            }
415            first = false;
416            total_lines += 1;
417
418            // Serialize line to a JSON map, then merge header fields in
419            let mut line_value = serde_json::to_value(line)?;
420
421            if let serde_json::Value::Object(ref header_map) = header_value {
422                if let serde_json::Value::Object(ref mut line_map) = line_value {
423                    for (key, val) in header_map {
424                        // Line fields take precedence for shared keys (e.g. document_id)
425                        if !line_map.contains_key(key) {
426                            line_map.insert(key.clone(), val.clone());
427                        }
428                    }
429                }
430            }
431
432            serde_json::to_writer_pretty(&mut writer, &line_value)?;
433        }
434    }
435
436    writer.write_all(b"\n]\n")?;
437    writer.flush()?;
438    info!(
439        "  Journal entries (flat JSON) written: {} line items -> {}",
440        total_lines,
441        path.display()
442    );
443    Ok(())
444}
445
446/// v4.4.2 helper — walk a serialized OCEL event-log `Value` tree and
447/// mirror `object_type_id` into `object_type` on every
448/// `object_refs[*]` entry. The canonical OCEL 2.0 field name is
449/// `object_type`; DataSynth's internal model carries it as
450/// `object_type_id` for historical reasons. Emitting both keys lets
451/// OCEL-spec-compliant consumers (pm4py, Celonis, etc.) see the type
452/// without a rename step.
453fn add_ocel_object_type_alias(value: &mut serde_json::Value) {
454    if let Some(events) = value.get_mut("events").and_then(|v| v.as_array_mut()) {
455        for event in events.iter_mut() {
456            if let Some(refs) = event.get_mut("object_refs").and_then(|r| r.as_array_mut()) {
457                for oref in refs.iter_mut() {
458                    if let Some(obj) = oref.as_object_mut() {
459                        if let Some(oti) = obj.get("object_type_id").cloned() {
460                            obj.entry("object_type").or_insert(oti);
461                        }
462                    }
463                }
464            }
465        }
466    }
467}
468
469/// Escape a string for CSV output by quoting if it contains commas or quotes.
470fn csv_escape(s: &str) -> String {
471    if s.contains(',') || s.contains('"') || s.contains('\n') {
472        format!("\"{}\"", s.replace('"', "\"\""))
473    } else {
474        s.to_string()
475    }
476}
477
478/// Format an Option<String> for CSV output (empty string for None).
479fn csv_opt_str(opt: &Option<String>) -> String {
480    match opt {
481        Some(s) => csv_escape(s),
482        None => String::new(),
483    }
484}
485
486/// Write all generated data to the output directory.
487///
488/// This function exports every non-empty dataset from the generation result.
489/// Journal entries are written as a flat CSV file (one row per line item)
490/// and as a nested JSON file. Other data is written as JSON files since
491/// many model types contain nested structures.
492#[allow(dead_code)]
493pub fn write_all_output(
494    result: &EnhancedGenerationResult,
495    output_dir: &Path,
496) -> Result<(), Box<dyn std::error::Error>> {
497    write_all_output_with_layout(
498        result,
499        output_dir,
500        datasynth_config::ExportLayout::Nested,
501        &[
502            datasynth_config::FileFormat::Csv,
503            datasynth_config::FileFormat::Json,
504        ],
505        datasynth_config::JeNetworkMethod::default(),
506    )
507}
508
509/// Variant of [`write_all_output_with_layout`] that routes output through
510/// an [`OutputRootConfig`] instead of a raw `&Path`.
511///
512/// Per-entity subtree mode is used by the group-audit shard runner
513/// (v5.0+): the runner sets `per_entity_subtree: true` and
514/// `entity_code: Some(code)` on `root`, and this helper drops each
515/// entity's archive under `{root_dir}/entities/{code}/` so group-wide
516/// artifacts can still live at `{root_dir}/`.
517///
518/// In flat mode (the default for single-entity runs) this is exactly
519/// equivalent to calling [`write_all_output_with_layout`] with
520/// `output_dir = root.root_dir`, so the signature and behavior of the
521/// existing single-entity entrypoints are unchanged.
522#[allow(dead_code)]
523pub fn write_all_output_with_root(
524    result: &EnhancedGenerationResult,
525    root: &OutputRootConfig,
526    export_layout: datasynth_config::ExportLayout,
527    formats: &[datasynth_config::FileFormat],
528) -> Result<(), Box<dyn std::error::Error>> {
529    let effective = root.effective_dir();
530    write_all_output_with_layout(
531        result,
532        &effective,
533        export_layout,
534        formats,
535        datasynth_config::JeNetworkMethod::default(),
536    )
537}
538
539/// Write all generated data with a configurable export layout and format set.
540///
541/// Only writes files for formats present in `formats`. If `formats` is empty,
542/// writes both CSV and JSON (backward compatible). This allows skipping JSON
543/// when only CSV is needed, which halves output time for large datasets.
544pub fn write_all_output_with_layout(
545    result: &EnhancedGenerationResult,
546    output_dir: &Path,
547    export_layout: datasynth_config::ExportLayout,
548    formats: &[datasynth_config::FileFormat],
549    je_network_method: datasynth_config::JeNetworkMethod,
550) -> Result<(), Box<dyn std::error::Error>> {
551    let csv_enabled = formats.is_empty()
552        || formats.contains(&datasynth_config::FileFormat::Csv)
553        || formats.contains(&datasynth_config::FileFormat::Parquet);
554    let json_enabled = formats.is_empty()
555        || formats.contains(&datasynth_config::FileFormat::Json)
556        || formats.contains(&datasynth_config::FileFormat::JsonLines);
557    std::fs::create_dir_all(output_dir)?;
558    info!("Writing comprehensive output to: {}", output_dir.display());
559
560    // Set flat-layout flag for all `write_json_safe` calls in this pass.
561    // Scope guard ensures we reset on return (including error paths).
562    struct FlatLayoutGuard;
563    impl Drop for FlatLayoutGuard {
564        fn drop(&mut self) {
565            FLAT_LAYOUT_ACTIVE.with(|c| c.set(false));
566        }
567    }
568    let _flat_guard = if export_layout == datasynth_config::ExportLayout::Flat {
569        FLAT_LAYOUT_ACTIVE.with(|c| c.set(true));
570        Some(FlatLayoutGuard)
571    } else {
572        None
573    };
574
575    // Set JSON skip flag so `write_json_safe` becomes a no-op when JSON not requested.
576    struct SkipJsonGuard;
577    impl Drop for SkipJsonGuard {
578        fn drop(&mut self) {
579            SKIP_JSON.with(|c| c.set(false));
580        }
581    }
582    let _skip_json_guard = if !json_enabled {
583        SKIP_JSON.with(|c| c.set(true));
584        info!("JSON output skipped (not in requested formats)");
585        Some(SkipJsonGuard)
586    } else {
587        None
588    };
589
590    // ========================================================================
591    // F1: Ledger Coherence Report — integrity self-validation (JSON only).
592    // Cheap and additive: proves every JE balances, the ledger nets to zero,
593    // and reports any unbalanced entries + per-account activity. Doubles as a
594    // regression guard for ledger-coherence issues.
595    // ========================================================================
596    if json_enabled && !result.journal_entries.is_empty() {
597        let report = datasynth_core::models::LedgerCoherenceReport::from_entries(
598            &result.journal_entries,
599            datasynth_core::models::LedgerCoherenceReport::DEFAULT_LIST_CAP,
600        );
601        let path = output_dir.join("ledger_coherence_report.json");
602        match serde_json::to_string_pretty(&report) {
603            Ok(json) => {
604                if let Err(e) = std::fs::write(&path, json) {
605                    warn!("Failed to write ledger_coherence_report.json: {}", e);
606                }
607            }
608            Err(e) => warn!("Failed to serialize ledger coherence report: {}", e),
609        }
610    }
611
612    // ========================================================================
613    // F2: Dimensional star-schema export — surrogate-key fact table + dimension
614    // lookups + COA map, for loading into common GL-analytics platforms. Emitted
615    // alongside CSV (same precedent as the je_network edge list), under
616    // `star_schema/`. Additive: existing files unchanged.
617    // ========================================================================
618    if csv_enabled && !result.journal_entries.is_empty() {
619        let export =
620            datasynth_core::models::DimensionalExport::from_entries(&result.journal_entries);
621        let dir = output_dir.join("star_schema");
622        match std::fs::create_dir_all(&dir) {
623            Ok(()) => {
624                for (fname, contents) in export.files() {
625                    if let Err(e) = std::fs::write(dir.join(&fname), contents) {
626                        warn!("Failed to write star_schema/{}: {}", fname, e);
627                    }
628                }
629            }
630            Err(e) => warn!("Failed to create star_schema dir: {}", e),
631        }
632    }
633
634    // ========================================================================
635    // F3: Structural JE fingerprints + heuristic flags. L1 (by account) / L2
636    // (by account prefix) structural fingerprints cluster recurring postings;
637    // balance-sheet-only / income-statement-only flags by account prefix.
638    // Summary listing → JSON; per-JE assignments → CSV. Additive.
639    // ========================================================================
640    if (json_enabled || csv_enabled) && !result.journal_entries.is_empty() {
641        let report = datasynth_core::models::StructuralFingerprintReport::from_entries(
642            &result.journal_entries,
643            datasynth_core::models::StructuralFingerprintReport::DEFAULT_LIST_CAP,
644        );
645        if json_enabled {
646            match serde_json::to_string_pretty(&report) {
647                Ok(json) => {
648                    if let Err(e) =
649                        std::fs::write(output_dir.join("structural_fingerprints.json"), json)
650                    {
651                        warn!("Failed to write structural_fingerprints.json: {}", e);
652                    }
653                }
654                Err(e) => warn!("Failed to serialize structural fingerprints: {}", e),
655            }
656        }
657        if csv_enabled {
658            if let Err(e) = std::fs::write(
659                output_dir.join("structural_fingerprints.csv"),
660                report.per_je_csv(),
661            ) {
662                warn!("Failed to write structural_fingerprints.csv: {}", e);
663            }
664        }
665    }
666
667    // ========================================================================
668    // Journal Entries (CSV + JSON in parallel when both enabled)
669    // ========================================================================
670    if !result.journal_entries.is_empty() {
671        let do_csv = csv_enabled;
672        let do_json = json_enabled;
673        let is_flat = export_layout == datasynth_config::ExportLayout::Flat;
674
675        std::thread::scope(|s| {
676            if do_csv {
677                s.spawn(|| {
678                    if let Err(e) = write_journal_entries_csv(result, output_dir) {
679                        warn!("Failed to write journal_entries.csv: {}", e);
680                    }
681                });
682                // v5.8.0 — flat edge-list for accounting-network construction.
683                // Always emit when CSV is requested; cheap relative to the
684                // main JE table.
685                s.spawn(|| {
686                    if let Err(e) = write_je_network_csv(result, output_dir, je_network_method) {
687                        warn!("Failed to write graphs/je_network.csv: {}", e);
688                    }
689                });
690            }
691            if do_json {
692                s.spawn(|| {
693                    if is_flat {
694                        if let Err(e) = write_journal_entries_flat_json(result, output_dir) {
695                            warn!("Failed to write flat journal_entries.json: {}", e);
696                        }
697                    } else if let Err(e) = write_json(
698                        &result.journal_entries,
699                        &output_dir.join("journal_entries.json"),
700                        "Journal entries (JSON)",
701                    ) {
702                        warn!("Failed to write journal_entries.json: {}", e);
703                    }
704                });
705            }
706        });
707    }
708
709    // ========================================================================
710    // Master Data
711    // ========================================================================
712    let md_dir = output_dir.join("master_data");
713    if !result.master_data.vendors.is_empty()
714        || !result.master_data.customers.is_empty()
715        || !result.master_data.materials.is_empty()
716        || !result.master_data.assets.is_empty()
717        || !result.master_data.employees.is_empty()
718        || !result.master_data.cost_centers.is_empty()
719        || !result.master_data.profit_centers.is_empty()
720    {
721        std::fs::create_dir_all(&md_dir)?;
722        info!("Writing master data...");
723
724        write_json_safe(
725            &result.master_data.vendors,
726            &md_dir.join("vendors.json"),
727            "Vendors",
728        );
729        write_json_safe(
730            &result.master_data.customers,
731            &md_dir.join("customers.json"),
732            "Customers",
733        );
734        write_json_safe(
735            &result.master_data.materials,
736            &md_dir.join("materials.json"),
737            "Materials",
738        );
739        write_json_safe(
740            &result.master_data.assets,
741            &md_dir.join("fixed_assets.json"),
742            "Fixed assets",
743        );
744        write_json_safe(
745            &result.master_data.employees,
746            &md_dir.join("employees.json"),
747            "Employees",
748        );
749        write_json_safe(
750            &result.master_data.cost_centers,
751            &md_dir.join("cost_centers.json"),
752            "Cost centers",
753        );
754        // v5.1: profit-centre hierarchy (segments + sub-units).
755        write_json_safe(
756            &result.master_data.profit_centers,
757            &md_dir.join("profit_centers.json"),
758            "Profit centres",
759        );
760        // v3.3.0: organizational profiles (one per company)
761        write_json_safe(
762            &result.master_data.organizational_profiles,
763            &md_dir.join("organizational_profiles.json"),
764            "Organizational profiles (v3.3.0)",
765        );
766    }
767
768    // ========================================================================
769    // Document Flows
770    // ========================================================================
771    let df_dir = output_dir.join("document_flows");
772    let flat_mode = export_layout == datasynth_config::ExportLayout::Flat;
773    if !result.document_flows.purchase_orders.is_empty()
774        || !result.document_flows.sales_orders.is_empty()
775    {
776        std::fs::create_dir_all(&df_dir)?;
777        info!("Writing document flows...");
778
779        write_json_auto(
780            &result.document_flows.purchase_orders,
781            &df_dir.join("purchase_orders.json"),
782            "Purchase orders",
783            flat_mode,
784        );
785        write_json_auto(
786            &result.document_flows.goods_receipts,
787            &df_dir.join("goods_receipts.json"),
788            "Goods receipts",
789            flat_mode,
790        );
791        write_json_auto(
792            &result.document_flows.vendor_invoices,
793            &df_dir.join("vendor_invoices.json"),
794            "Vendor invoices",
795            flat_mode,
796        );
797        write_json_auto(
798            &result.document_flows.payments,
799            &df_dir.join("payments.json"),
800            "Payments",
801            flat_mode,
802        );
803        let customer_receipts: Vec<_> = result
804            .document_flows
805            .payments
806            .iter()
807            .filter(|p| p.payment_type == PaymentType::ArReceipt)
808            .collect();
809        write_json_auto(
810            &customer_receipts,
811            &df_dir.join("customer_receipts.json"),
812            "Customer receipts",
813            flat_mode,
814        );
815        write_json_auto(
816            &result.document_flows.sales_orders,
817            &df_dir.join("sales_orders.json"),
818            "Sales orders",
819            flat_mode,
820        );
821        write_json_auto(
822            &result.document_flows.deliveries,
823            &df_dir.join("deliveries.json"),
824            "Deliveries",
825            flat_mode,
826        );
827        write_json_auto(
828            &result.document_flows.customer_invoices,
829            &df_dir.join("customer_invoices.json"),
830            "Customer invoices",
831            flat_mode,
832        );
833
834        // Document cross-references (PO→GR, GR→Invoice, Invoice→Payment, etc.).
835        // v4.4.2+: inject SDK-friendly `from_type`/`from_id`/`to_type`/`to_id`
836        // aliases so consumers that follow the graph convention see the
837        // types populated. The canonical `source_doc_*`/`target_doc_*`
838        // keys continue to emit unchanged for backwards compatibility.
839        match serde_json::to_value(&result.document_flows.document_references) {
840            Ok(mut v) => {
841                if let Some(arr) = v.as_array_mut() {
842                    for r in arr.iter_mut() {
843                        if let Some(obj) = r.as_object_mut() {
844                            if let Some(st) = obj.get("source_doc_type").cloned() {
845                                obj.entry("from_type").or_insert(st);
846                            }
847                            if let Some(si) = obj.get("source_doc_id").cloned() {
848                                obj.entry("from_id").or_insert(si);
849                            }
850                            if let Some(tt) = obj.get("target_doc_type").cloned() {
851                                obj.entry("to_type").or_insert(tt);
852                            }
853                            if let Some(ti) = obj.get("target_doc_id").cloned() {
854                                obj.entry("to_id").or_insert(ti);
855                            }
856                        }
857                    }
858                }
859                match serde_json::to_string_pretty(&v) {
860                    Ok(json) => {
861                        let path = df_dir.join("document_references.json");
862                        if let Err(e) = std::fs::write(&path, json) {
863                            warn!("Failed to write document references: {}", e);
864                        } else {
865                            info!(
866                                "  Document references written: {} records -> {}",
867                                result.document_flows.document_references.len(),
868                                path.display()
869                            );
870                        }
871                    }
872                    Err(e) => warn!("Failed to serialize document references: {}", e),
873                }
874            }
875            Err(e) => warn!("Failed to build document references Value: {}", e),
876        }
877
878        // Note: P2P/O2C chain types do not implement Serialize, so we log
879        // their counts instead. The individual documents above capture all data.
880        if !result.document_flows.p2p_chains.is_empty() {
881            info!(
882                "  P2P chains: {} (data exported via individual document files)",
883                result.document_flows.p2p_chains.len()
884            );
885        }
886        if !result.document_flows.o2c_chains.is_empty() {
887            info!(
888                "  O2C chains: {} (data exported via individual document files)",
889                result.document_flows.o2c_chains.len()
890            );
891        }
892    }
893
894    // ========================================================================
895    // Subledger
896    // ========================================================================
897    let sl_dir = output_dir.join("subledger");
898    if !result.subledger.ap_invoices.is_empty()
899        || !result.subledger.ar_invoices.is_empty()
900        || !result.subledger.fa_records.is_empty()
901        || !result.subledger.inventory_positions.is_empty()
902    {
903        std::fs::create_dir_all(&sl_dir)?;
904        info!("Writing subledger data...");
905
906        write_json_safe(
907            &result.subledger.ap_invoices,
908            &sl_dir.join("ap_invoices.json"),
909            "AP invoices",
910        );
911        write_json_safe(
912            &result.subledger.ar_invoices,
913            &sl_dir.join("ar_invoices.json"),
914            "AR invoices",
915        );
916        write_json_safe(
917            &result.subledger.fa_records,
918            &sl_dir.join("fa_records.json"),
919            "FA records",
920        );
921        write_json_safe(
922            &result.subledger.inventory_positions,
923            &sl_dir.join("inventory_positions.json"),
924            "Inventory positions",
925        );
926        write_json_safe(
927            &result.subledger.inventory_movements,
928            &sl_dir.join("inventory_movements.json"),
929            "Inventory movements",
930        );
931        write_json_safe(
932            &result.subledger.ar_aging_reports,
933            &sl_dir.join("ar_aging.json"),
934            "AR aging reports",
935        );
936        write_json_safe(
937            &result.subledger.ap_aging_reports,
938            &sl_dir.join("ap_aging.json"),
939            "AP aging reports",
940        );
941        write_json_safe(
942            &result.subledger.depreciation_runs,
943            &sl_dir.join("depreciation_runs.json"),
944            "Depreciation runs",
945        );
946        write_json_safe(
947            &result.subledger.inventory_valuations,
948            &sl_dir.join("inventory_valuation.json"),
949            "Inventory valuations",
950        );
951        // Dunning runs and letters (generated after AR aging)
952        write_json_safe(
953            &result.subledger.dunning_runs,
954            &sl_dir.join("dunning_runs.json"),
955            "Dunning runs",
956        );
957        write_json_safe(
958            &result.subledger.dunning_letters,
959            &sl_dir.join("dunning_letters.json"),
960            "Dunning letters",
961        );
962    }
963
964    // ========================================================================
965    // Audit
966    // ========================================================================
967    let audit_dir = output_dir.join("audit");
968    if !result.audit.engagements.is_empty() {
969        std::fs::create_dir_all(&audit_dir)?;
970        info!("Writing audit data...");
971
972        write_json_safe(
973            &result.audit.engagements,
974            &audit_dir.join("audit_engagements.json"),
975            "Audit engagements",
976        );
977        write_json_safe(
978            &result.audit.audit_scopes,
979            &audit_dir.join("audit_scopes.json"),
980            "Audit scopes (ISA 220 / ISA 300)",
981        );
982        write_json_safe(
983            &result.audit.workpapers,
984            &audit_dir.join("audit_workpapers.json"),
985            "Audit workpapers",
986        );
987        write_json_safe(
988            &result.audit.evidence,
989            &audit_dir.join("audit_evidence.json"),
990            "Audit evidence",
991        );
992        write_json_safe(
993            &result.audit.risk_assessments,
994            &audit_dir.join("audit_risk_assessments.json"),
995            "Audit risk assessments",
996        );
997        write_json_safe(
998            &result.audit.findings,
999            &audit_dir.join("audit_findings.json"),
1000            "Audit findings",
1001        );
1002        write_json_safe(
1003            &result.audit.judgments,
1004            &audit_dir.join("audit_judgments.json"),
1005            "Audit judgments",
1006        );
1007        write_json_safe(
1008            &result.audit.confirmations,
1009            &audit_dir.join("audit_confirmations.json"),
1010            "Audit confirmations",
1011        );
1012        write_json_safe(
1013            &result.audit.confirmation_responses,
1014            &audit_dir.join("audit_confirmation_responses.json"),
1015            "Audit confirmation responses",
1016        );
1017        write_json_safe(
1018            &result.audit.procedure_steps,
1019            &audit_dir.join("audit_procedure_steps.json"),
1020            "Audit procedure steps",
1021        );
1022        write_json_safe(
1023            &result.audit.samples,
1024            &audit_dir.join("audit_samples.json"),
1025            "Audit samples",
1026        );
1027        write_json_safe(
1028            &result.audit.analytical_results,
1029            &audit_dir.join("audit_analytical_results.json"),
1030            "Audit analytical results",
1031        );
1032        write_json_safe(
1033            &result.audit.ia_functions,
1034            &audit_dir.join("audit_ia_functions.json"),
1035            "Audit IA functions",
1036        );
1037        write_json_safe(
1038            &result.audit.ia_reports,
1039            &audit_dir.join("audit_ia_reports.json"),
1040            "Audit IA reports",
1041        );
1042        write_json_safe(
1043            &result.audit.related_parties,
1044            &audit_dir.join("audit_related_parties.json"),
1045            "Audit related parties",
1046        );
1047        write_json_safe(
1048            &result.audit.related_party_transactions,
1049            &audit_dir.join("audit_related_party_transactions.json"),
1050            "Audit related party transactions",
1051        );
1052        // ISA 600: Group audit artefacts
1053        if !result.audit.component_auditors.is_empty() {
1054            write_json_safe(
1055                &result.audit.component_auditors,
1056                &audit_dir.join("component_auditors.json"),
1057                "Component auditors (ISA 600)",
1058            );
1059            if let Some(plan) = &result.audit.group_audit_plan {
1060                write_json_single_safe(
1061                    plan,
1062                    &audit_dir.join("group_audit_plan.json"),
1063                    "Group audit plan (ISA 600)",
1064                );
1065            }
1066            write_json_safe(
1067                &result.audit.component_instructions,
1068                &audit_dir.join("component_instructions.json"),
1069                "Component instructions (ISA 600)",
1070            );
1071            write_json_safe(
1072                &result.audit.component_reports,
1073                &audit_dir.join("component_reports.json"),
1074                "Component auditor reports (ISA 600)",
1075            );
1076        }
1077        // ISA 210: Engagement letters
1078        write_json_safe(
1079            &result.audit.engagement_letters,
1080            &audit_dir.join("engagement_letters.json"),
1081            "Engagement letters (ISA 210)",
1082        );
1083        // ISA 560 / IAS 10: Subsequent events
1084        write_json_safe(
1085            &result.audit.subsequent_events,
1086            &audit_dir.join("subsequent_events.json"),
1087            "Subsequent events (ISA 560 / IAS 10)",
1088        );
1089        // ISA 402: Service organization controls
1090        write_json_safe(
1091            &result.audit.service_organizations,
1092            &audit_dir.join("service_organizations.json"),
1093            "Service organizations (ISA 402)",
1094        );
1095        write_json_safe(
1096            &result.audit.soc_reports,
1097            &audit_dir.join("soc_reports.json"),
1098            "SOC reports (ISA 402)",
1099        );
1100        write_json_safe(
1101            &result.audit.user_entity_controls,
1102            &audit_dir.join("user_entity_controls.json"),
1103            "User entity controls (ISA 402)",
1104        );
1105
1106        // ISA 570: Going concern assessments
1107        write_json_safe(
1108            &result.audit.going_concern_assessments,
1109            &audit_dir.join("going_concern_assessments.json"),
1110            "Going concern assessments (ISA 570)",
1111        );
1112
1113        // ISA 540: Accounting estimates
1114        write_json_safe(
1115            &result.audit.accounting_estimates,
1116            &audit_dir.join("accounting_estimates.json"),
1117            "Accounting estimates (ISA 540)",
1118        );
1119
1120        // ISA 700/701/705/706: Audit opinions and Key Audit Matters.
1121        // Always write even if the vec is empty; see the always-emit block
1122        // below (outside the `engagements.is_empty()` guard) for the case
1123        // where audit is entirely disabled — the files still appear in the
1124        // archive with `[]` so SDK consumers don't get 404s on the manifest.
1125        write_json_always(
1126            &result.audit.audit_opinions,
1127            &audit_dir.join("audit_opinions.json"),
1128            "Audit opinions (ISA 700/705/706)",
1129        );
1130        write_json_always(
1131            &result.audit.key_audit_matters,
1132            &audit_dir.join("key_audit_matters.json"),
1133            "Key Audit Matters (ISA 701)",
1134        );
1135
1136        // SOX 302 / 404
1137        if !result.audit.sox_302_certifications.is_empty() {
1138            write_json_safe(
1139                &result.audit.sox_302_certifications,
1140                &audit_dir.join("sox_302_certifications.json"),
1141                "SOX 302 certifications",
1142            );
1143            write_json_safe(
1144                &result.audit.sox_404_assessments,
1145                &audit_dir.join("sox_404_assessments.json"),
1146                "SOX 404 ICFR assessments",
1147            );
1148        }
1149
1150        // ISA 320: Materiality calculations
1151        if !result.audit.materiality_calculations.is_empty() {
1152            write_json_safe(
1153                &result.audit.materiality_calculations,
1154                &audit_dir.join("materiality_calculations.json"),
1155                "Materiality calculations (ISA 320)",
1156            );
1157        }
1158
1159        // ISA 315: Combined Risk Assessments
1160        if !result.audit.combined_risk_assessments.is_empty() {
1161            write_json_safe(
1162                &result.audit.combined_risk_assessments,
1163                &audit_dir.join("combined_risk_assessments.json"),
1164                "Combined Risk Assessments (ISA 315)",
1165            );
1166        }
1167
1168        // ISA 530: Sampling Plans and Sampled Items
1169        if !result.audit.sampling_plans.is_empty() {
1170            write_json_safe(
1171                &result.audit.sampling_plans,
1172                &audit_dir.join("sampling_plans.json"),
1173                "Sampling plans (ISA 530)",
1174            );
1175            write_json_safe(
1176                &result.audit.sampled_items,
1177                &audit_dir.join("sampled_items.json"),
1178                "Sampled items (ISA 530)",
1179            );
1180        }
1181
1182        // ISA 315: Significant Classes of Transactions (SCOTS)
1183        if !result.audit.significant_transaction_classes.is_empty() {
1184            write_json_safe(
1185                &result.audit.significant_transaction_classes,
1186                &audit_dir.join("significant_transaction_classes.json"),
1187                "Significant Classes of Transactions / SCOTS (ISA 315)",
1188            );
1189        }
1190
1191        // ISA 520: Unusual Item Markers
1192        if !result.audit.unusual_items.is_empty() {
1193            write_json_safe(
1194                &result.audit.unusual_items,
1195                &audit_dir.join("unusual_items.json"),
1196                "Unusual item flags (ISA 520)",
1197            );
1198        }
1199
1200        // ISA 520: Analytical Relationships
1201        if !result.audit.analytical_relationships.is_empty() {
1202            write_json_safe(
1203                &result.audit.analytical_relationships,
1204                &audit_dir.join("analytical_relationships.json"),
1205                "Analytical relationships (ISA 520)",
1206            );
1207        }
1208
1209        // PCAOB-ISA cross-reference mappings
1210        if !result.audit.isa_pcaob_mappings.is_empty() {
1211            write_json_safe(
1212                &result.audit.isa_pcaob_mappings,
1213                &audit_dir.join("isa_pcaob_mappings.json"),
1214                "PCAOB-ISA standard mappings",
1215            );
1216        }
1217
1218        // ISA standard reference (number, title, series for all 34 ISA standards)
1219        if !result.audit.isa_mappings.is_empty() {
1220            write_json_safe(
1221                &result.audit.isa_mappings,
1222                &audit_dir.join("isa_mappings.json"),
1223                "ISA standard reference mappings",
1224            );
1225        }
1226
1227        // FSM event trail (when audit.fsm.enabled: true)
1228        if let Some(ref event_trail) = result.audit.fsm_event_trail {
1229            if !event_trail.is_empty() {
1230                write_json_safe(
1231                    event_trail,
1232                    &audit_dir.join("fsm_event_trail.json"),
1233                    "FSM audit event trail",
1234                );
1235            }
1236        }
1237
1238        // v3.3.0: legal documents (when compliance_regulations.legal_documents.enabled)
1239        write_json_safe(
1240            &result.audit.legal_documents,
1241            &audit_dir.join("legal_documents.json"),
1242            "Legal documents (v3.3.0)",
1243        );
1244
1245        // v3.3.0: IT general controls — access logs + change records
1246        write_json_safe(
1247            &result.audit.it_controls_access_logs,
1248            &audit_dir.join("it_controls_access_logs.json"),
1249            "IT general controls — access logs (v3.3.0)",
1250        );
1251        write_json_safe(
1252            &result.audit.it_controls_change_records,
1253            &audit_dir.join("it_controls_change_records.json"),
1254            "IT general controls — change management records (v3.3.0)",
1255        );
1256    } else {
1257        // Audit phase disabled or ran with no engagements — still emit
1258        // audit_opinions.json + key_audit_matters.json so the archive
1259        // structure is consistent and SDK consumers can rely on these
1260        // files always existing. v3.1 announced these as archive-shipping
1261        // files; v3.1.1 guarantees it regardless of audit.enabled.
1262        std::fs::create_dir_all(&audit_dir)?;
1263        write_json_always(
1264            &result.audit.audit_opinions,
1265            &audit_dir.join("audit_opinions.json"),
1266            "Audit opinions (ISA 700/705/706) — empty (audit phase disabled)",
1267        );
1268        write_json_always(
1269            &result.audit.key_audit_matters,
1270            &audit_dir.join("key_audit_matters.json"),
1271            "Key Audit Matters (ISA 701) — empty (audit phase disabled)",
1272        );
1273    }
1274
1275    // ========================================================================
1276    // Banking (JSON - keep existing format for backward compat)
1277    // ========================================================================
1278    let banking_dir = output_dir.join("banking");
1279    if !result.banking.customers.is_empty() {
1280        std::fs::create_dir_all(&banking_dir)?;
1281        info!("Writing banking data...");
1282
1283        // v4.4.2: dual-key risk tier. SDK consumers inspect `risk_level`;
1284        // the struct stores it as `risk_tier` for historical reasons.
1285        // Serialize through a `serde_json::Value` so we can inject the
1286        // `risk_level` alias key on every customer row without touching
1287        // the `BankingCustomer` Serialize impl (which has 40+ fields).
1288        match serde_json::to_value(&result.banking.customers) {
1289            Ok(mut v) => {
1290                if let Some(arr) = v.as_array_mut() {
1291                    for c in arr.iter_mut() {
1292                        if let Some(obj) = c.as_object_mut() {
1293                            if let Some(rt) = obj.get("risk_tier").cloned() {
1294                                obj.entry("risk_level").or_insert(rt);
1295                            }
1296                        }
1297                    }
1298                }
1299                match serde_json::to_string_pretty(&v) {
1300                    Ok(json) => {
1301                        let path = banking_dir.join("banking_customers.json");
1302                        if let Err(e) = std::fs::write(&path, json) {
1303                            warn!("Failed to write banking_customers.json: {}", e);
1304                        } else {
1305                            info!(
1306                                "  Banking customers written: {} records -> {}",
1307                                result.banking.customers.len(),
1308                                path.display()
1309                            );
1310                        }
1311                    }
1312                    Err(e) => warn!("Failed to serialize banking customers: {}", e),
1313                }
1314            }
1315            Err(e) => warn!("Failed to build banking customers Value: {}", e),
1316        }
1317        write_json_safe(
1318            &result.banking.accounts,
1319            &banking_dir.join("banking_accounts.json"),
1320            "Banking accounts",
1321        );
1322        write_json_safe(
1323            &result.banking.transactions,
1324            &banking_dir.join("banking_transactions.json"),
1325            "Banking transactions",
1326        );
1327        write_json_safe(
1328            &result.banking.transaction_labels,
1329            &banking_dir.join("aml_transaction_labels.json"),
1330            "AML transaction labels",
1331        );
1332        write_json_safe(
1333            &result.banking.customer_labels,
1334            &banking_dir.join("aml_customer_labels.json"),
1335            "AML customer labels",
1336        );
1337        write_json_safe(
1338            &result.banking.account_labels,
1339            &banking_dir.join("aml_account_labels.json"),
1340            "AML account labels",
1341        );
1342        write_json_safe(
1343            &result.banking.relationship_labels,
1344            &banking_dir.join("aml_relationship_labels.json"),
1345            "AML relationship labels",
1346        );
1347        write_json_safe(
1348            &result.banking.narratives,
1349            &banking_dir.join("aml_narratives.json"),
1350            "AML narratives",
1351        );
1352    }
1353
1354    // ========================================================================
1355    // Sourcing (S2C)
1356    // ========================================================================
1357    let s2c_dir = output_dir.join("sourcing");
1358    if !result.sourcing.spend_analyses.is_empty() || !result.sourcing.sourcing_projects.is_empty() {
1359        std::fs::create_dir_all(&s2c_dir)?;
1360        info!("Writing sourcing (S2C) data...");
1361
1362        write_json_safe(
1363            &result.sourcing.spend_analyses,
1364            &s2c_dir.join("spend_analyses.json"),
1365            "Spend analyses",
1366        );
1367        write_json_safe(
1368            &result.sourcing.sourcing_projects,
1369            &s2c_dir.join("sourcing_projects.json"),
1370            "Sourcing projects",
1371        );
1372        write_json_safe(
1373            &result.sourcing.qualifications,
1374            &s2c_dir.join("supplier_qualifications.json"),
1375            "Supplier qualifications",
1376        );
1377        write_json_safe(
1378            &result.sourcing.rfx_events,
1379            &s2c_dir.join("rfx_events.json"),
1380            "RFx events",
1381        );
1382        write_json_safe(
1383            &result.sourcing.bids,
1384            &s2c_dir.join("supplier_bids.json"),
1385            "Supplier bids",
1386        );
1387        write_json_safe(
1388            &result.sourcing.bid_evaluations,
1389            &s2c_dir.join("bid_evaluations.json"),
1390            "Bid evaluations",
1391        );
1392        write_json_safe(
1393            &result.sourcing.contracts,
1394            &s2c_dir.join("procurement_contracts.json"),
1395            "Procurement contracts",
1396        );
1397        write_json_safe(
1398            &result.sourcing.catalog_items,
1399            &s2c_dir.join("catalog_items.json"),
1400            "Catalog items",
1401        );
1402        write_json_safe(
1403            &result.sourcing.scorecards,
1404            &s2c_dir.join("supplier_scorecards.json"),
1405            "Supplier scorecards",
1406        );
1407    }
1408
1409    // ========================================================================
1410    // Intercompany
1411    // ========================================================================
1412    let ic_dir = output_dir.join("intercompany");
1413    if result.intercompany.group_structure.is_some()
1414        || !result.intercompany.matched_pairs.is_empty()
1415    {
1416        std::fs::create_dir_all(&ic_dir)?;
1417        info!("Writing intercompany data...");
1418
1419        // Always write group structure when present (independent of IC transactions).
1420        if let Some(gs) = &result.intercompany.group_structure {
1421            write_json_single_safe(gs, &ic_dir.join("group_structure.json"), "Group structure");
1422        }
1423
1424        write_json_safe(
1425            &result.intercompany.matched_pairs,
1426            &ic_dir.join("ic_matched_pairs.json"),
1427            "IC matched pairs",
1428        );
1429        write_json_safe(
1430            &result.intercompany.seller_journal_entries,
1431            &ic_dir.join("ic_seller_journal_entries.json"),
1432            "IC seller journal entries",
1433        );
1434        write_json_safe(
1435            &result.intercompany.buyer_journal_entries,
1436            &ic_dir.join("ic_buyer_journal_entries.json"),
1437            "IC buyer journal entries",
1438        );
1439        write_json_safe(
1440            &result.intercompany.elimination_entries,
1441            &ic_dir.join("ic_elimination_entries.json"),
1442            "IC elimination entries",
1443        );
1444
1445        // NCI measurements from group structure ownership percentages
1446        if !result.intercompany.nci_measurements.is_empty() {
1447            write_json_safe(
1448                &result.intercompany.nci_measurements,
1449                &ic_dir.join("nci_measurements.json"),
1450                "NCI measurements",
1451            );
1452        }
1453    }
1454
1455    // ========================================================================
1456    // Financial Reporting
1457    // ========================================================================
1458    let fin_dir = output_dir.join("financial_reporting");
1459    if !result.financial_reporting.financial_statements.is_empty()
1460        || !result.financial_reporting.bank_reconciliations.is_empty()
1461        || !result
1462            .financial_reporting
1463            .consolidated_statements
1464            .is_empty()
1465    {
1466        std::fs::create_dir_all(&fin_dir)?;
1467        info!("Writing financial reporting data...");
1468
1469        // Legacy flat file (all standalone statements combined)
1470        write_json_safe(
1471            &result.financial_reporting.financial_statements,
1472            &fin_dir.join("financial_statements.json"),
1473            "Financial statements",
1474        );
1475
1476        // Per-entity standalone statements
1477        if !result.financial_reporting.standalone_statements.is_empty() {
1478            let standalone_dir = fin_dir.join("standalone");
1479            std::fs::create_dir_all(&standalone_dir)?;
1480            for (entity_code, stmts) in &result.financial_reporting.standalone_statements {
1481                let file_name = format!("{}_financial_statements.json", entity_code);
1482                write_json_safe(
1483                    stmts,
1484                    &standalone_dir.join(&file_name),
1485                    &format!("Standalone statements for {}", entity_code),
1486                );
1487            }
1488        }
1489
1490        // Consolidated statements + schedule
1491        if !result
1492            .financial_reporting
1493            .consolidated_statements
1494            .is_empty()
1495            || !result
1496                .financial_reporting
1497                .consolidation_schedules
1498                .is_empty()
1499        {
1500            let consolidated_dir = fin_dir.join("consolidated");
1501            std::fs::create_dir_all(&consolidated_dir)?;
1502            write_json_safe(
1503                &result.financial_reporting.consolidated_statements,
1504                &consolidated_dir.join("consolidated_financial_statements.json"),
1505                "Consolidated financial statements",
1506            );
1507            write_json_safe(
1508                &result.financial_reporting.consolidation_schedules,
1509                &consolidated_dir.join("consolidation_schedule.json"),
1510                "Consolidation schedule",
1511            );
1512        }
1513
1514        write_json_safe(
1515            &result.financial_reporting.bank_reconciliations,
1516            &fin_dir.join("bank_reconciliations.json"),
1517            "Bank reconciliations",
1518        );
1519
1520        // IFRS 8 / ASC 280 Segment Reporting
1521        if !result.financial_reporting.segment_reports.is_empty()
1522            || !result
1523                .financial_reporting
1524                .segment_reconciliations
1525                .is_empty()
1526        {
1527            let seg_dir = fin_dir.join("segment_reporting");
1528            std::fs::create_dir_all(&seg_dir)?;
1529            write_json_safe(
1530                &result.financial_reporting.segment_reports,
1531                &seg_dir.join("segment_reports.json"),
1532                "Segment reports",
1533            );
1534            write_json_safe(
1535                &result.financial_reporting.segment_reconciliations,
1536                &seg_dir.join("segment_reconciliations.json"),
1537                "Segment reconciliations",
1538            );
1539        }
1540
1541        // IAS 1 / ASC 235: Notes to financial statements
1542        write_json_safe(
1543            &result.financial_reporting.notes_to_financial_statements,
1544            &fin_dir.join("notes_to_financial_statements.json"),
1545            "Notes to financial statements",
1546        );
1547    }
1548
1549    // ========================================================================
1550    // Period-Close Trial Balances
1551    // ========================================================================
1552    //
1553    // v5.1: convert each in-memory `PeriodTrialBalance` to the
1554    // canonical `datasynth_core::models::balance::TrialBalance` before
1555    // writing.  The on-disk shape is now identical to what the group
1556    // aggregate phase loads via `tb_loader::load_entity_trial_balance`,
1557    // so the loader's v5.0 dual-shape detection (`PeriodTrialBalanceOnDisk`
1558    // → `TrialBalance` synthesis) is no longer required.
1559    if !result.financial_reporting.trial_balances.is_empty() {
1560        let pc_dir = output_dir.join("period_close");
1561        std::fs::create_dir_all(&pc_dir)?;
1562        info!(
1563            "Writing {} period-close trial balances...",
1564            result.financial_reporting.trial_balances.len()
1565        );
1566        // Pick the first JE's company_code + currency as the
1567        // canonical identifiers; the orchestrator only emits one TB
1568        // per period (gated by `if company_idx == 0` at the push
1569        // site), so all trial-balance entries belong to that company.
1570        // Fallback to safe defaults when the JE list is empty
1571        // (effectively only test fixtures).
1572        let (company_code, currency) = result
1573            .journal_entries
1574            .first()
1575            .map(|je| (je.header.company_code.as_str(), je.header.currency.as_str()))
1576            .unwrap_or(("UNKNOWN", "USD"));
1577        let canonical: Vec<datasynth_core::models::balance::TrialBalance> = result
1578            .financial_reporting
1579            .trial_balances
1580            .iter()
1581            .cloned()
1582            .map(|tb| tb.into_canonical(company_code, currency))
1583            .collect();
1584        write_json_safe(
1585            &canonical,
1586            &pc_dir.join("trial_balances.json"),
1587            "Period-close trial balances (canonical)",
1588        );
1589    }
1590
1591    // ========================================================================
1592    // Balance: Opening Balances + GL-Subledger Reconciliation
1593    // ========================================================================
1594    if !result.opening_balances.is_empty() || !result.subledger_reconciliation.is_empty() {
1595        let balance_dir = output_dir.join("balance");
1596        std::fs::create_dir_all(&balance_dir)?;
1597        info!("Writing balance data...");
1598
1599        write_json_safe(
1600            &result.opening_balances,
1601            &balance_dir.join("opening_balances.json"),
1602            "Opening balances",
1603        );
1604        write_json_safe(
1605            &result.subledger_reconciliation,
1606            &balance_dir.join("subledger_reconciliation.json"),
1607            "Subledger reconciliation",
1608        );
1609    }
1610
1611    // ========================================================================
1612    // HR (Payroll, Time Entries, Expense Reports, Benefit Enrollments, Pensions)
1613    // ========================================================================
1614    let hr_dir = output_dir.join("hr");
1615    if !result.hr.payroll_runs.is_empty()
1616        || !result.hr.time_entries.is_empty()
1617        || !result.hr.expense_reports.is_empty()
1618        || !result.hr.benefit_enrollments.is_empty()
1619        || !result.hr.pension_plans.is_empty()
1620        || !result.hr.stock_grants.is_empty()
1621        || !result.master_data.employee_change_history.is_empty()
1622    {
1623        std::fs::create_dir_all(&hr_dir)?;
1624        info!("Writing HR data...");
1625
1626        write_json_safe(
1627            &result.hr.payroll_runs,
1628            &hr_dir.join("payroll_runs.json"),
1629            "Payroll runs",
1630        );
1631        write_json_safe(
1632            &result.hr.payroll_line_items,
1633            &hr_dir.join("payroll_line_items.json"),
1634            "Payroll line items",
1635        );
1636        write_json_safe(
1637            &result.hr.time_entries,
1638            &hr_dir.join("time_entries.json"),
1639            "Time entries",
1640        );
1641        write_json_safe(
1642            &result.hr.expense_reports,
1643            &hr_dir.join("expense_reports.json"),
1644            "Expense reports",
1645        );
1646        write_json_safe(
1647            &result.hr.benefit_enrollments,
1648            &hr_dir.join("benefit_enrollments.json"),
1649            "Benefit enrollments",
1650        );
1651        write_json_safe(
1652            &result.hr.pension_plans,
1653            &hr_dir.join("pension_plans.json"),
1654            "Pension plans",
1655        );
1656        write_json_safe(
1657            &result.hr.pension_obligations,
1658            &hr_dir.join("pension_obligations.json"),
1659            "Pension obligations",
1660        );
1661        write_json_safe(
1662            &result.hr.pension_plan_assets,
1663            &hr_dir.join("plan_assets.json"),
1664            "Plan assets",
1665        );
1666        write_json_safe(
1667            &result.hr.pension_disclosures,
1668            &hr_dir.join("pension_disclosures.json"),
1669            "Pension disclosures",
1670        );
1671        write_json_safe(
1672            &result.hr.stock_grants,
1673            &hr_dir.join("stock_grants.json"),
1674            "Stock grants",
1675        );
1676        write_json_safe(
1677            &result.hr.stock_comp_expenses,
1678            &hr_dir.join("stock_comp_expense.json"),
1679            "Stock comp expense",
1680        );
1681        write_json_safe(
1682            &result.master_data.employee_change_history,
1683            &hr_dir.join("employee_change_history.json"),
1684            "Employee change history",
1685        );
1686    }
1687
1688    // ========================================================================
1689    // Manufacturing
1690    // ========================================================================
1691    let mfg_dir = output_dir.join("manufacturing");
1692    if !result.manufacturing.production_orders.is_empty()
1693        || !result.manufacturing.quality_inspections.is_empty()
1694        || !result.manufacturing.cycle_counts.is_empty()
1695        || !result.manufacturing.bom_components.is_empty()
1696        || !result.manufacturing.inventory_movements.is_empty()
1697    {
1698        std::fs::create_dir_all(&mfg_dir)?;
1699        info!("Writing manufacturing data...");
1700
1701        write_json_safe(
1702            &result.manufacturing.production_orders,
1703            &mfg_dir.join("production_orders.json"),
1704            "Production orders",
1705        );
1706        write_json_safe(
1707            &result.manufacturing.quality_inspections,
1708            &mfg_dir.join("quality_inspections.json"),
1709            "Quality inspections",
1710        );
1711        write_json_safe(
1712            &result.manufacturing.cycle_counts,
1713            &mfg_dir.join("cycle_counts.json"),
1714            "Cycle counts",
1715        );
1716        write_json_safe(
1717            &result.manufacturing.bom_components,
1718            &mfg_dir.join("bom_components.json"),
1719            "BOM components",
1720        );
1721        write_json_safe(
1722            &result.manufacturing.inventory_movements,
1723            &mfg_dir.join("inventory_movements.json"),
1724            "Inventory movements",
1725        );
1726    }
1727
1728    // ========================================================================
1729    // Sales, KPIs, Budgets
1730    // ========================================================================
1731    let sales_dir = output_dir.join("sales_kpi_budgets");
1732    if !result.sales_kpi_budgets.sales_quotes.is_empty()
1733        || !result.sales_kpi_budgets.kpis.is_empty()
1734        || !result.sales_kpi_budgets.budgets.is_empty()
1735        || !result.sales_kpi_budgets.external_expectations.is_empty()
1736        || !result.sales_kpi_budgets.evidence_anchors.is_empty()
1737    {
1738        std::fs::create_dir_all(&sales_dir)?;
1739        info!("Writing sales, KPI, and budget data...");
1740
1741        write_json_safe(
1742            &result.sales_kpi_budgets.sales_quotes,
1743            &sales_dir.join("sales_quotes.json"),
1744            "Sales quotes",
1745        );
1746        write_json_safe(
1747            &result.sales_kpi_budgets.kpis,
1748            &sales_dir.join("management_kpis.json"),
1749            "Management KPIs",
1750        );
1751        write_json_safe(
1752            &result.sales_kpi_budgets.budgets,
1753            &sales_dir.join("budgets.json"),
1754            "Budgets",
1755        );
1756        write_json_safe(
1757            &result.sales_kpi_budgets.external_expectations,
1758            &sales_dir.join("external_expectations.json"),
1759            "External expectations",
1760        );
1761        write_json_safe(
1762            &result.sales_kpi_budgets.evidence_anchors,
1763            &sales_dir.join("evidence_anchors.json"),
1764            "Evidence anchors",
1765        );
1766    }
1767
1768    // ========================================================================
1769    // Tax
1770    // ========================================================================
1771    let tax_dir = output_dir.join("tax");
1772    if !result.tax.jurisdictions.is_empty()
1773        || !result.tax.codes.is_empty()
1774        || !result.tax.tax_provisions.is_empty()
1775    {
1776        std::fs::create_dir_all(&tax_dir)?;
1777        info!("Writing tax data...");
1778
1779        write_json_safe(
1780            &result.tax.jurisdictions,
1781            &tax_dir.join("tax_jurisdictions.json"),
1782            "Tax jurisdictions",
1783        );
1784        write_json_safe(
1785            &result.tax.codes,
1786            &tax_dir.join("tax_codes.json"),
1787            "Tax codes",
1788        );
1789        write_json_safe(
1790            &result.tax.tax_provisions,
1791            &tax_dir.join("tax_provisions.json"),
1792            "Tax provisions",
1793        );
1794        write_json_safe(
1795            &result.tax.tax_lines,
1796            &tax_dir.join("tax_lines.json"),
1797            "Tax lines",
1798        );
1799        write_json_safe(
1800            &result.tax.tax_returns,
1801            &tax_dir.join("tax_returns.json"),
1802            "Tax returns",
1803        );
1804        write_json_safe(
1805            &result.tax.withholding_records,
1806            &tax_dir.join("withholding_records.json"),
1807            "Withholding tax records",
1808        );
1809        if !result.tax.tax_anomaly_labels.is_empty() {
1810            write_json_safe(
1811                &result.tax.tax_anomaly_labels,
1812                &tax_dir.join("tax_anomaly_labels.json"),
1813                "Tax anomaly labels",
1814            );
1815        }
1816        // Deferred tax engine output (IAS 12 / ASC 740)
1817        if !result.tax.deferred_tax.temporary_differences.is_empty() {
1818            write_json_safe(
1819                &result.tax.deferred_tax.temporary_differences,
1820                &tax_dir.join("temporary_differences.json"),
1821                "Temporary differences",
1822            );
1823            write_json_safe(
1824                &result.tax.deferred_tax.etr_reconciliations,
1825                &tax_dir.join("etr_reconciliation.json"),
1826                "ETR reconciliation",
1827            );
1828            write_json_safe(
1829                &result.tax.deferred_tax.rollforwards,
1830                &tax_dir.join("deferred_tax_rollforward.json"),
1831                "Deferred tax rollforward",
1832            );
1833            write_json_safe(
1834                &result.tax.deferred_tax.journal_entries,
1835                &tax_dir.join("deferred_tax_journal_entries.json"),
1836                "Deferred tax journal entries",
1837            );
1838        }
1839    }
1840
1841    // ========================================================================
1842    // ESG
1843    // ========================================================================
1844    let esg_dir = output_dir.join("esg");
1845    if !result.esg.emissions.is_empty()
1846        || !result.esg.energy.is_empty()
1847        || !result.esg.diversity.is_empty()
1848        || !result.esg.governance.is_empty()
1849    {
1850        std::fs::create_dir_all(&esg_dir)?;
1851        info!("Writing ESG data...");
1852
1853        write_json_safe(
1854            &result.esg.emissions,
1855            &esg_dir.join("emission_records.json"),
1856            "Emission records",
1857        );
1858        write_json_safe(
1859            &result.esg.energy,
1860            &esg_dir.join("energy_consumption.json"),
1861            "Energy consumption",
1862        );
1863        write_json_safe(
1864            &result.esg.water,
1865            &esg_dir.join("water_usage.json"),
1866            "Water usage",
1867        );
1868        write_json_safe(
1869            &result.esg.waste,
1870            &esg_dir.join("waste_records.json"),
1871            "Waste records",
1872        );
1873        write_json_safe(
1874            &result.esg.diversity,
1875            &esg_dir.join("workforce_diversity.json"),
1876            "Workforce diversity",
1877        );
1878        write_json_safe(
1879            &result.esg.pay_equity,
1880            &esg_dir.join("pay_equity.json"),
1881            "Pay equity",
1882        );
1883        write_json_safe(
1884            &result.esg.safety_incidents,
1885            &esg_dir.join("safety_incidents.json"),
1886            "Safety incidents",
1887        );
1888        write_json_safe(
1889            &result.esg.safety_metrics,
1890            &esg_dir.join("safety_metrics.json"),
1891            "Safety metrics",
1892        );
1893        write_json_safe(
1894            &result.esg.governance,
1895            &esg_dir.join("governance_metrics.json"),
1896            "Governance metrics",
1897        );
1898        write_json_safe(
1899            &result.esg.supplier_assessments,
1900            &esg_dir.join("supplier_esg_assessments.json"),
1901            "Supplier ESG assessments",
1902        );
1903        write_json_safe(
1904            &result.esg.materiality,
1905            &esg_dir.join("materiality_assessments.json"),
1906            "Materiality assessments",
1907        );
1908        write_json_safe(
1909            &result.esg.disclosures,
1910            &esg_dir.join("esg_disclosures.json"),
1911            "ESG disclosures",
1912        );
1913        write_json_safe(
1914            &result.esg.climate_scenarios,
1915            &esg_dir.join("climate_scenarios.json"),
1916            "Climate scenarios",
1917        );
1918        write_json_safe(
1919            &result.esg.anomaly_labels,
1920            &esg_dir.join("esg_anomaly_labels.json"),
1921            "ESG anomaly labels",
1922        );
1923    }
1924
1925    // ========================================================================
1926    // Process Mining (OCPM)
1927    // ========================================================================
1928    if let Some(ref event_log) = result.ocpm.event_log {
1929        if !event_log.events.is_empty() || !event_log.objects.is_empty() {
1930            let pm_dir = output_dir.join("process_mining");
1931            std::fs::create_dir_all(&pm_dir)?;
1932            info!("Writing process mining (OCPM) data...");
1933
1934            // Write the full OCEL 2.0 event log. v4.4.2+ patches every
1935            // `object_refs[*].object_type_id` with a companion
1936            // `object_type` key, matching the OCEL 2.0 spec and SDK
1937            // consumer expectations that previously saw `object_type`
1938            // arrive as null. See `add_ocel_object_type_alias` below.
1939            match serde_json::to_value(event_log) {
1940                Ok(mut v) => {
1941                    add_ocel_object_type_alias(&mut v);
1942                    match serde_json::to_string_pretty(&v) {
1943                        Ok(json) => {
1944                            if let Err(e) = std::fs::write(pm_dir.join("event_log.json"), json) {
1945                                warn!("Failed to write OCPM event log: {}", e);
1946                            } else {
1947                                info!(
1948                                    "  Event log written: {} events, {} objects",
1949                                    result.ocpm.event_count, result.ocpm.object_count
1950                                );
1951                            }
1952                        }
1953                        Err(e) => warn!("Failed to serialize OCPM event log: {}", e),
1954                    }
1955                }
1956                Err(e) => warn!("Failed to build OCPM event log Value: {}", e),
1957            }
1958
1959            // Write events separately for easy consumption
1960            if !event_log.events.is_empty() {
1961                match serde_json::to_string_pretty(&event_log.events) {
1962                    Ok(json) => {
1963                        if let Err(e) = std::fs::write(pm_dir.join("events.json"), json) {
1964                            warn!("Failed to write OCPM events: {}", e);
1965                        } else {
1966                            info!("  Events written: {} records", event_log.events.len());
1967                        }
1968                    }
1969                    Err(e) => warn!("Failed to serialize OCPM events: {}", e),
1970                }
1971            }
1972
1973            // Write objects separately for easy consumption
1974            if !event_log.objects.is_empty() {
1975                let objects: Vec<&_> = event_log.objects.iter().collect();
1976                match serde_json::to_string_pretty(&objects) {
1977                    Ok(json) => {
1978                        if let Err(e) = std::fs::write(pm_dir.join("objects.json"), json) {
1979                            warn!("Failed to write OCPM objects: {}", e);
1980                        } else {
1981                            info!("  Objects written: {} records", event_log.objects.len());
1982                        }
1983                    }
1984                    Err(e) => warn!("Failed to serialize OCPM objects: {}", e),
1985                }
1986            }
1987
1988            // Write process variants if any were computed
1989            if !event_log.variants.is_empty() {
1990                let variants: Vec<&_> = event_log.variants.values().collect();
1991                match serde_json::to_string_pretty(&variants) {
1992                    Ok(json) => {
1993                        if let Err(e) = std::fs::write(pm_dir.join("process_variants.json"), json) {
1994                            warn!("Failed to write process variants: {}", e);
1995                        } else {
1996                            info!(
1997                                "  Process variants written: {} variants",
1998                                event_log.variants.len()
1999                            );
2000                        }
2001                    }
2002                    Err(e) => warn!("Failed to serialize process variants: {}", e),
2003                }
2004            }
2005        }
2006    }
2007
2008    // ========================================================================
2009    // Chart of Accounts
2010    // ========================================================================
2011    // Primary file: flat array of accounts (shape stable since v3.x —
2012    // consumers iterate over it).
2013    match serde_json::to_string_pretty(&result.chart_of_accounts.accounts) {
2014        Ok(json) => {
2015            if let Err(e) = std::fs::write(output_dir.join("chart_of_accounts.json"), json) {
2016                warn!("Failed to write chart of accounts: {}", e);
2017            } else {
2018                info!("  Chart of accounts written");
2019            }
2020        }
2021        Err(e) => warn!("Failed to serialize chart of accounts: {}", e),
2022    }
2023    // v4.4.1 — companion metadata file so SDK consumers can read the
2024    // accounting framework + complexity + ID without having to infer
2025    // them from each account row. The SDK team flagged
2026    // `CoA.accounting_framework` arriving as null in v4.1.x; the field
2027    // didn't exist at all until v4.4.1.
2028    let coa_meta = serde_json::json!({
2029        "coa_id": result.chart_of_accounts.coa_id,
2030        "name": result.chart_of_accounts.name,
2031        "country": result.chart_of_accounts.country,
2032        "industry": result.chart_of_accounts.industry,
2033        "complexity": result.chart_of_accounts.complexity,
2034        "account_format": result.chart_of_accounts.account_format,
2035        "accounting_framework": result.chart_of_accounts.accounting_framework,
2036        "account_count": result.chart_of_accounts.accounts.len(),
2037    });
2038    match serde_json::to_string_pretty(&coa_meta) {
2039        Ok(json) => {
2040            if let Err(e) = std::fs::write(output_dir.join("chart_of_accounts_meta.json"), json) {
2041                warn!("Failed to write CoA metadata: {}", e);
2042            } else {
2043                info!(
2044                    "  Chart of accounts metadata written (accounting_framework: {:?})",
2045                    result.chart_of_accounts.accounting_framework
2046                );
2047            }
2048        }
2049        Err(e) => warn!("Failed to serialize CoA metadata: {}", e),
2050    }
2051
2052    // ========================================================================
2053    // Balance Validation Summary
2054    // ========================================================================
2055    if result.balance_validation.validated {
2056        match serde_json::to_string_pretty(&BalanceValidationSummary::from(
2057            &result.balance_validation,
2058        )) {
2059            Ok(json) => {
2060                if let Err(e) = std::fs::write(output_dir.join("balance_validation.json"), json) {
2061                    warn!("Failed to write balance validation: {}", e);
2062                } else {
2063                    info!("  Balance validation summary written");
2064                }
2065            }
2066            Err(e) => warn!("Failed to serialize balance validation: {}", e),
2067        }
2068    }
2069
2070    // ========================================================================
2071    // Data Quality Statistics (now serializable directly via Serialize derives)
2072    // ========================================================================
2073    {
2074        match serde_json::to_string_pretty(&result.data_quality_stats) {
2075            Ok(json) => {
2076                if let Err(e) = std::fs::write(output_dir.join("data_quality_stats.json"), json) {
2077                    warn!("Failed to write data quality stats: {}", e);
2078                } else {
2079                    info!("  Data quality stats written (full detail)");
2080                }
2081            }
2082            Err(e) => warn!("Failed to serialize data quality stats: {}", e),
2083        }
2084    }
2085
2086    // ========================================================================
2087    // v3.3.0: Analytics-metadata phase outputs (prior year, industry
2088    // benchmarks, management reports, drift events).
2089    // ========================================================================
2090    {
2091        let am = &result.analytics_metadata;
2092        if !am.prior_year_comparatives.is_empty()
2093            || !am.industry_benchmarks.is_empty()
2094            || !am.management_reports.is_empty()
2095            || !am.drift_events.is_empty()
2096        {
2097            let analytics_dir = output_dir.join("analytics");
2098            std::fs::create_dir_all(&analytics_dir)?;
2099            write_json_safe(
2100                &am.prior_year_comparatives,
2101                &analytics_dir.join("prior_year_comparatives.json"),
2102                "Prior-year comparatives (v3.3.0)",
2103            );
2104            write_json_safe(
2105                &am.industry_benchmarks,
2106                &analytics_dir.join("industry_benchmarks.json"),
2107                "Industry benchmarks (v3.3.0)",
2108            );
2109            write_json_safe(
2110                &am.management_reports,
2111                &analytics_dir.join("management_reports.json"),
2112                "Management reports (v3.3.0)",
2113            );
2114            write_json_safe(
2115                &am.drift_events,
2116                &analytics_dir.join("drift_events.json"),
2117                "Drift event labels (v3.3.0)",
2118            );
2119        }
2120    }
2121
2122    // ========================================================================
2123    // Pre-built Analytics (Benford, amount distribution, process variants)
2124    // ========================================================================
2125    {
2126        let analytics_dir = output_dir.join("analytics");
2127
2128        // Collect non-zero amounts from journal entry lines
2129        let amounts: Vec<_> = result
2130            .journal_entries
2131            .iter()
2132            .flat_map(|je| je.lines.iter())
2133            .flat_map(|line| {
2134                let d = (!line.debit_amount.is_zero()).then_some(line.debit_amount);
2135                let c = (!line.credit_amount.is_zero()).then_some(line.credit_amount);
2136                d.into_iter().chain(c)
2137            })
2138            .collect();
2139
2140        if amounts.len() >= 10 {
2141            std::fs::create_dir_all(&analytics_dir)?;
2142            info!("Writing pre-built analytics ({} amounts)...", amounts.len());
2143
2144            // Benford's Law analysis
2145            let benford_analyzer = datasynth_eval::BenfordAnalyzer::default();
2146            match benford_analyzer.analyze(&amounts) {
2147                Ok(ref benford_result) => {
2148                    if let Ok(json) = serde_json::to_string_pretty(benford_result) {
2149                        if let Err(e) =
2150                            std::fs::write(analytics_dir.join("benford_analysis.json"), json)
2151                        {
2152                            warn!("Failed to write Benford analysis: {}", e);
2153                        } else {
2154                            info!(
2155                                "  Benford analysis written (conformity: {:?}, MAD: {:.4})",
2156                                benford_result.conformity, benford_result.mad
2157                            );
2158                        }
2159                    }
2160                }
2161                Err(e) => warn!("Benford analysis skipped: {}", e),
2162            }
2163
2164            // Amount distribution analysis
2165            let amount_analyzer = datasynth_eval::AmountDistributionAnalyzer::new();
2166            match amount_analyzer.analyze(&amounts) {
2167                Ok(ref dist_result) => {
2168                    if let Ok(json) = serde_json::to_string_pretty(dist_result) {
2169                        if let Err(e) =
2170                            std::fs::write(analytics_dir.join("amount_distribution.json"), json)
2171                        {
2172                            warn!("Failed to write amount distribution: {}", e);
2173                        } else {
2174                            info!(
2175                                "  Amount distribution written (skewness: {:.2}, kurtosis: {:.2})",
2176                                dist_result.skewness, dist_result.kurtosis
2177                            );
2178                        }
2179                    }
2180                }
2181                Err(e) => warn!("Amount distribution analysis skipped: {}", e),
2182            }
2183        }
2184
2185        // Process variant summary (from OCPM event log).
2186        //
2187        // v3.1.1 — always emit the file when an event_log exists. When the
2188        // event_log has no pre-computed `variants` map (older OCPM phases
2189        // didn't populate it), derive variants on the fly from the raw
2190        // events so SDK consumers see `analytics/process_variant_summary.json`
2191        // in every archive rather than `null`. Without this, the v3.1
2192        // claim that the file exists was only true when OCPM happened to
2193        // populate its variants map.
2194        if let Some(ref event_log) = result.ocpm.event_log {
2195            std::fs::create_dir_all(&analytics_dir)?;
2196            let variant_data: Vec<datasynth_eval::VariantData> = if !event_log.variants.is_empty() {
2197                event_log
2198                    .variants
2199                    .values()
2200                    .map(|v| datasynth_eval::VariantData {
2201                        variant_id: v.variant_id.clone(),
2202                        case_count: v.frequency as usize,
2203                        is_happy_path: v.is_happy_path,
2204                    })
2205                    .collect()
2206            } else {
2207                // Fallback: derive variants from raw events by case_id.
2208                // Each case's activity sequence (by activity_id) defines a
2209                // variant; cases with the same sequence collapse into one
2210                // variant. Events without a case_id are skipped since they
2211                // can't be grouped into a process instance.
2212                use std::collections::HashMap;
2213                // Key by case_id's string form to avoid pulling the uuid
2214                // crate into the output writer's dependency graph.
2215                let mut per_case: HashMap<String, Vec<String>> = HashMap::new();
2216                for ev in &event_log.events {
2217                    if let Some(case_id) = ev.case_id {
2218                        per_case
2219                            .entry(case_id.to_string())
2220                            .or_default()
2221                            .push(ev.activity_id.clone());
2222                    }
2223                }
2224                let mut variant_counts: HashMap<Vec<String>, usize> = HashMap::new();
2225                for activities in per_case.into_values() {
2226                    *variant_counts.entry(activities).or_insert(0) += 1;
2227                }
2228                // Happy path heuristic: the highest-frequency variant.
2229                let max_count = variant_counts.values().copied().max().unwrap_or(0);
2230                variant_counts
2231                    .into_iter()
2232                    .enumerate()
2233                    .map(|(i, (seq, count))| datasynth_eval::VariantData {
2234                        variant_id: format!("V{i:04}:{}", seq.join("->")),
2235                        case_count: count,
2236                        is_happy_path: count == max_count && max_count > 0,
2237                    })
2238                    .collect()
2239            };
2240
2241            let variant_analyzer = datasynth_eval::VariantAnalyzer::new();
2242            match variant_analyzer.analyze(&variant_data) {
2243                Ok(ref variant_result) => {
2244                    if let Ok(json) = serde_json::to_string_pretty(variant_result) {
2245                        if let Err(e) =
2246                            std::fs::write(analytics_dir.join("process_variant_summary.json"), json)
2247                        {
2248                            warn!("Failed to write variant summary: {}", e);
2249                        } else {
2250                            info!(
2251                                "  Process variant summary written ({} variants, entropy: {:.2})",
2252                                variant_result.variant_count, variant_result.variant_entropy
2253                            );
2254                        }
2255                    }
2256                }
2257                Err(e) => {
2258                    // Even on analyzer error, emit a minimal JSON placeholder
2259                    // so the file always exists in the archive.
2260                    warn!("Variant analysis failed: {}; emitting empty summary", e);
2261                    let placeholder = serde_json::json!({
2262                        "variant_count": 0,
2263                        "variant_entropy": null,
2264                        "happy_path_concentration": null,
2265                        "top_variants": [],
2266                        "passes": false,
2267                        "issues": [format!("analyzer error: {e}")],
2268                    });
2269                    if let Ok(json) = serde_json::to_string_pretty(&placeholder) {
2270                        let _ = std::fs::write(
2271                            analytics_dir.join("process_variant_summary.json"),
2272                            json,
2273                        );
2274                    }
2275                }
2276            }
2277        }
2278
2279        // Banking evaluation (KYC completeness + AML detectability).
2280        // Matches the payload served by /v1/jobs/{id}/analytics so
2281        // archive-mode consumers see the same four files the endpoint returns.
2282        if !result.banking.customers.is_empty() {
2283            use datasynth_core::models::banking::BankingCustomerType;
2284            use datasynth_eval::banking::{
2285                AmlDetectabilityAnalyzer, AmlTransactionData, BankingEvaluation,
2286                KycCompletenessAnalyzer, KycProfileData, TypologyData,
2287            };
2288            use std::collections::HashMap;
2289            std::fs::create_dir_all(&analytics_dir)?;
2290
2291            let kyc_data: Vec<KycProfileData> = result
2292                .banking
2293                .customers
2294                .iter()
2295                .map(|c| KycProfileData {
2296                    profile_id: c.customer_id.to_string(),
2297                    has_name: true,
2298                    has_dob: c.date_of_birth.is_some(),
2299                    has_address: c.address_line1.is_some(),
2300                    has_id_document: c.national_id.is_some() || c.passport_number.is_some(),
2301                    has_risk_rating: true,
2302                    has_beneficial_owner: !c.beneficial_owners.is_empty(),
2303                    is_entity: c.customer_type == BankingCustomerType::Business,
2304                    is_verified: c.kyc_truthful,
2305                })
2306                .collect();
2307
2308            let mut banking_eval = BankingEvaluation::new();
2309            if let Ok(kyc_res) = KycCompletenessAnalyzer::new().analyze(&kyc_data) {
2310                banking_eval.kyc = Some(kyc_res);
2311            }
2312
2313            let suspicious: Vec<&_> = result
2314                .banking
2315                .transactions
2316                .iter()
2317                .filter(|t| t.is_suspicious)
2318                .collect();
2319            if !suspicious.is_empty() {
2320                // Use AmlTypology::canonical_name() so the evaluator's
2321                // exact-string match against EXPECTED_TYPOLOGIES succeeds.
2322                // Prior to v3.1.1 we used `format!("{:?}", r)` (Debug /
2323                // PascalCase) which never matched the lowercase expected
2324                // names and produced "typology_coverage = 0.000" in every
2325                // run regardless of actual typology injection.
2326                let aml_data: Vec<AmlTransactionData> = suspicious
2327                    .iter()
2328                    .map(|t| AmlTransactionData {
2329                        transaction_id: t.transaction_id.to_string(),
2330                        typology: t
2331                            .suspicion_reason
2332                            .as_ref()
2333                            .map(|r| r.canonical_name().to_string())
2334                            .unwrap_or_default(),
2335                        case_id: t.case_id.clone().unwrap_or_default(),
2336                        amount: t.amount.try_into().unwrap_or(0.0),
2337                        is_flagged: t.is_suspicious,
2338                    })
2339                    .collect();
2340
2341                let mut typology_map: HashMap<String, (usize, HashMap<String, bool>)> =
2342                    HashMap::new();
2343                for txn in &aml_data {
2344                    if !txn.typology.is_empty() {
2345                        let entry = typology_map
2346                            .entry(txn.typology.clone())
2347                            .or_insert_with(|| (0, HashMap::new()));
2348                        entry.0 += 1;
2349                        entry.1.insert(txn.case_id.clone(), true);
2350                    }
2351                }
2352                let typology_data: Vec<TypologyData> = typology_map
2353                    .iter()
2354                    .map(|(name, (count, cases))| TypologyData {
2355                        name: name.clone(),
2356                        scenario_count: *count,
2357                        case_ids_consistent: cases.len() <= *count,
2358                    })
2359                    .collect();
2360
2361                if let Ok(aml_res) =
2362                    AmlDetectabilityAnalyzer::new().analyze(&aml_data, &typology_data)
2363                {
2364                    banking_eval.aml = Some(aml_res);
2365                }
2366            }
2367            banking_eval.check_thresholds();
2368
2369            match serde_json::to_string_pretty(&banking_eval) {
2370                Ok(json) => {
2371                    if let Err(e) =
2372                        std::fs::write(analytics_dir.join("banking_evaluation.json"), json)
2373                    {
2374                        warn!("Failed to write banking evaluation: {}", e);
2375                    } else {
2376                        info!(
2377                            "  Banking evaluation written ({} profiles, {} issues, passes={})",
2378                            result.banking.customers.len(),
2379                            banking_eval.issues.len(),
2380                            banking_eval.passes
2381                        );
2382                    }
2383                }
2384                Err(e) => warn!("Failed to serialize banking evaluation: {}", e),
2385            }
2386        }
2387    }
2388
2389    // ========================================================================
2390    // Data Quality Issue Records + Quality Labels
2391    // ========================================================================
2392    if !result.quality_issues.is_empty() {
2393        let labels_dir = output_dir.join("labels");
2394        std::fs::create_dir_all(&labels_dir)?;
2395        info!("Writing data quality issue records...");
2396        write_json_safe(
2397            &result.quality_issues,
2398            &labels_dir.join("quality_issues.json"),
2399            "Data quality issues",
2400        );
2401
2402        // Derive quality_labels.json from quality_issues: maps each QualityIssue
2403        // to a QualityIssueLabel with the corresponding LabeledIssueType and severity.
2404        use datasynth_generators::{
2405            LabeledIssueType, QualityIssueLabel, QualityIssueType, QualityLabels,
2406        };
2407        let mut quality_labels = QualityLabels::with_capacity(result.quality_issues.len());
2408        for issue in &result.quality_issues {
2409            let labeled_type = match issue.issue_type {
2410                QualityIssueType::MissingValue => LabeledIssueType::MissingValue,
2411                QualityIssueType::Typo => LabeledIssueType::Typo,
2412                QualityIssueType::DateFormatVariation
2413                | QualityIssueType::AmountFormatVariation
2414                | QualityIssueType::IdentifierFormatVariation
2415                | QualityIssueType::TextFormatVariation => LabeledIssueType::FormatVariation,
2416                QualityIssueType::ExactDuplicate
2417                | QualityIssueType::NearDuplicate
2418                | QualityIssueType::FuzzyDuplicate => LabeledIssueType::Duplicate,
2419                QualityIssueType::EncodingIssue => LabeledIssueType::EncodingIssue,
2420            };
2421            let mut label = QualityIssueLabel::new(
2422                labeled_type,
2423                issue.record_id.clone(),
2424                issue.field.clone().unwrap_or_else(|| "_record".to_string()),
2425                "data_quality_injector",
2426            );
2427            if let Some(ref orig) = issue.original_value {
2428                label = label.with_original(orig.clone());
2429            }
2430            if let Some(ref modified) = issue.modified_value {
2431                label = label.with_modified(modified.clone());
2432            }
2433            quality_labels.add(label);
2434        }
2435        if let Ok(json) = serde_json::to_string_pretty(&quality_labels) {
2436            if let Err(e) = std::fs::write(labels_dir.join("quality_labels.json"), json.as_bytes())
2437            {
2438                warn!("Failed to write quality labels: {}", e);
2439            } else {
2440                info!(
2441                    "  Quality labels written: {} labels -> labels/quality_labels.json",
2442                    quality_labels.len()
2443                );
2444            }
2445        }
2446    }
2447
2448    // ========================================================================
2449    // Internal Controls
2450    // ========================================================================
2451    if !result.internal_controls.is_empty() || !result.sod_violations.is_empty() {
2452        let ctrl_dir = output_dir.join("internal_controls");
2453        std::fs::create_dir_all(&ctrl_dir)?;
2454        info!("Writing internal controls data...");
2455
2456        write_json_safe(
2457            &result.internal_controls,
2458            &ctrl_dir.join("internal_controls.json"),
2459            "Internal controls",
2460        );
2461        // SoD violations extracted from control-annotated journal entries
2462        write_json_safe(
2463            &result.sod_violations,
2464            &ctrl_dir.join("sod_violations.json"),
2465            "SoD violations",
2466        );
2467
2468        // SoD conflict pairs, SoD rules, control mappings, and COSO control mapping
2469        // are static reference data — export via ControlExporter regardless of whether
2470        // individual violations were generated so the master catalog is always present.
2471        let exporter = datasynth_output::ControlExporter::new(&ctrl_dir);
2472        match exporter.export_standard() {
2473            Ok(summary) => {
2474                info!(
2475                    "  Control master data written: {} controls, {} SoD conflicts, {} SoD rules, {} COSO mappings, {} account mappings",
2476                    summary.controls_count,
2477                    summary.sod_conflicts_count,
2478                    summary.sod_rules_count,
2479                    summary.coso_mappings_count,
2480                    summary.account_mappings_count,
2481                );
2482            }
2483            Err(e) => warn!("Failed to write control master data: {}", e),
2484        }
2485    }
2486
2487    // ========================================================================
2488    // Accounting Standards
2489    // ========================================================================
2490    if !result.accounting_standards.contracts.is_empty()
2491        || !result.accounting_standards.impairment_tests.is_empty()
2492        || !result.accounting_standards.business_combinations.is_empty()
2493        || !result.accounting_standards.ecl_models.is_empty()
2494        || !result.accounting_standards.provisions.is_empty()
2495        || !result
2496            .accounting_standards
2497            .currency_translation_results
2498            .is_empty()
2499    {
2500        let acct_dir = output_dir.join("accounting_standards");
2501        std::fs::create_dir_all(&acct_dir)?;
2502        info!("Writing accounting standards data...");
2503
2504        write_json_safe(
2505            &result.accounting_standards.contracts,
2506            &acct_dir.join("customer_contracts.json"),
2507            "Customer contracts",
2508        );
2509        write_json_safe(
2510            &result.accounting_standards.impairment_tests,
2511            &acct_dir.join("impairment_tests.json"),
2512            "Impairment tests",
2513        );
2514        write_json_safe(
2515            &result.accounting_standards.business_combinations,
2516            &acct_dir.join("business_combinations.json"),
2517            "Business combinations",
2518        );
2519        write_json_safe(
2520            &result
2521                .accounting_standards
2522                .business_combination_journal_entries,
2523            &acct_dir.join("business_combination_journal_entries.json"),
2524            "Business combination journal entries",
2525        );
2526        write_json_safe(
2527            &result.accounting_standards.ecl_models,
2528            &acct_dir.join("ecl_models.json"),
2529            "ECL models",
2530        );
2531        write_json_safe(
2532            &result.accounting_standards.ecl_provision_movements,
2533            &acct_dir.join("ecl_provision_movements.json"),
2534            "ECL provision movements",
2535        );
2536        write_json_safe(
2537            &result.accounting_standards.ecl_journal_entries,
2538            &acct_dir.join("ecl_journal_entries.json"),
2539            "ECL journal entries",
2540        );
2541        write_json_safe(
2542            &result.accounting_standards.provisions,
2543            &acct_dir.join("provisions.json"),
2544            "Provisions (IAS 37 / ASC 450)",
2545        );
2546        write_json_safe(
2547            &result.accounting_standards.provision_movements,
2548            &acct_dir.join("provision_movements.json"),
2549            "Provision movements",
2550        );
2551        write_json_safe(
2552            &result.accounting_standards.contingent_liabilities,
2553            &acct_dir.join("contingent_liabilities.json"),
2554            "Contingent liabilities",
2555        );
2556        write_json_safe(
2557            &result.accounting_standards.provision_journal_entries,
2558            &acct_dir.join("provision_journal_entries.json"),
2559            "Provision journal entries",
2560        );
2561
2562        // IAS 21 — write under accounting_standards/fx/
2563        if !result
2564            .accounting_standards
2565            .currency_translation_results
2566            .is_empty()
2567        {
2568            let fx_dir = acct_dir.join("fx");
2569            std::fs::create_dir_all(&fx_dir)?;
2570            write_json_safe(
2571                &result.accounting_standards.currency_translation_results,
2572                &fx_dir.join("currency_translation_results.json"),
2573                "IAS 21 currency translation results",
2574            );
2575        }
2576
2577        // v3.3.1: Leases (IFRS 16 / ASC 842)
2578        if !result.accounting_standards.leases.is_empty() {
2579            let leases_dir = acct_dir.join("leases");
2580            std::fs::create_dir_all(&leases_dir)?;
2581            write_json_safe(
2582                &result.accounting_standards.leases,
2583                &leases_dir.join("leases.json"),
2584                "Leases (IFRS 16 / ASC 842) — v3.3.1",
2585            );
2586        }
2587
2588        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
2589        if !result
2590            .accounting_standards
2591            .fair_value_measurements
2592            .is_empty()
2593        {
2594            let fv_dir = acct_dir.join("fair_value");
2595            std::fs::create_dir_all(&fv_dir)?;
2596            write_json_safe(
2597                &result.accounting_standards.fair_value_measurements,
2598                &fv_dir.join("fair_value_measurements.json"),
2599                "Fair value measurements (IFRS 13 / ASC 820) — v3.3.1",
2600            );
2601        }
2602
2603        // v3.3.1: Framework reconciliation (dual reporting)
2604        if !result.accounting_standards.framework_differences.is_empty() {
2605            let diff_dir = acct_dir.join("framework_differences");
2606            std::fs::create_dir_all(&diff_dir)?;
2607            write_json_safe(
2608                &result.accounting_standards.framework_differences,
2609                &diff_dir.join("framework_differences.json"),
2610                "Framework differences (US GAAP vs IFRS) — v3.3.1",
2611            );
2612            write_json_safe(
2613                &result.accounting_standards.framework_reconciliations,
2614                &diff_dir.join("framework_reconciliations.json"),
2615                "Per-entity framework reconciliation — v3.3.1",
2616            );
2617        }
2618    }
2619
2620    // ========================================================================
2621    // Quality Gate Results
2622    // ========================================================================
2623    if let Some(ref gate_result) = result.gate_result {
2624        match serde_json::to_string_pretty(gate_result) {
2625            Ok(json) => {
2626                if let Err(e) = std::fs::write(output_dir.join("quality_gate_result.json"), json) {
2627                    warn!("Failed to write quality gate result: {}", e);
2628                } else {
2629                    info!(
2630                        "  Quality gate result written (passed={})",
2631                        gate_result.passed
2632                    );
2633                }
2634            }
2635            Err(e) => warn!("Failed to serialize quality gate result: {}", e),
2636        }
2637    }
2638
2639    // ========================================================================
2640    // Treasury
2641    // ========================================================================
2642    if !result.treasury.debt_instruments.is_empty()
2643        || !result.treasury.cash_positions.is_empty()
2644        || !result.treasury.hedging_instruments.is_empty()
2645    {
2646        let treasury_dir = output_dir.join("treasury");
2647        std::fs::create_dir_all(&treasury_dir)?;
2648        info!("Writing treasury data...");
2649
2650        write_json_safe(
2651            &result.treasury.debt_instruments,
2652            &treasury_dir.join("debt_instruments.json"),
2653            "Debt instruments",
2654        );
2655        write_json_safe(
2656            &result.treasury.hedging_instruments,
2657            &treasury_dir.join("hedging_instruments.json"),
2658            "Hedging instruments",
2659        );
2660        write_json_safe(
2661            &result.treasury.hedge_relationships,
2662            &treasury_dir.join("hedge_relationships.json"),
2663            "Hedge relationships",
2664        );
2665        write_json_safe(
2666            &result.treasury.cash_positions,
2667            &treasury_dir.join("cash_positions.json"),
2668            "Cash positions",
2669        );
2670        write_json_safe(
2671            &result.treasury.cash_forecasts,
2672            &treasury_dir.join("cash_forecasts.json"),
2673            "Cash forecasts",
2674        );
2675        write_json_safe(
2676            &result.treasury.cash_pools,
2677            &treasury_dir.join("cash_pools.json"),
2678            "Cash pools",
2679        );
2680        write_json_safe(
2681            &result.treasury.cash_pool_sweeps,
2682            &treasury_dir.join("cash_pool_sweeps.json"),
2683            "Cash pool sweeps",
2684        );
2685        write_json_safe(
2686            &result.treasury.bank_guarantees,
2687            &treasury_dir.join("bank_guarantees.json"),
2688            "Bank guarantees",
2689        );
2690        write_json_safe(
2691            &result.treasury.netting_runs,
2692            &treasury_dir.join("netting_runs.json"),
2693            "Netting runs",
2694        );
2695        if !result.treasury.treasury_anomaly_labels.is_empty() {
2696            write_json_safe(
2697                &result.treasury.treasury_anomaly_labels,
2698                &treasury_dir.join("treasury_anomaly_labels.json"),
2699                "Treasury anomaly labels",
2700            );
2701        }
2702    }
2703
2704    // ========================================================================
2705    // Project Accounting
2706    // ========================================================================
2707    if !result.project_accounting.projects.is_empty() {
2708        let pa_dir = output_dir.join("project_accounting");
2709        std::fs::create_dir_all(&pa_dir)?;
2710        info!("Writing project accounting data...");
2711
2712        write_json_safe(
2713            &result.project_accounting.projects,
2714            &pa_dir.join("projects.json"),
2715            "Projects",
2716        );
2717        write_json_safe(
2718            &result.project_accounting.cost_lines,
2719            &pa_dir.join("cost_lines.json"),
2720            "Project cost lines",
2721        );
2722        write_json_safe(
2723            &result.project_accounting.revenue_records,
2724            &pa_dir.join("revenue_records.json"),
2725            "Project revenue records",
2726        );
2727        write_json_safe(
2728            &result.project_accounting.earned_value_metrics,
2729            &pa_dir.join("earned_value_metrics.json"),
2730            "Earned value metrics",
2731        );
2732        write_json_safe(
2733            &result.project_accounting.change_orders,
2734            &pa_dir.join("change_orders.json"),
2735            "Change orders",
2736        );
2737        write_json_safe(
2738            &result.project_accounting.milestones,
2739            &pa_dir.join("milestones.json"),
2740            "Project milestones",
2741        );
2742    }
2743
2744    // ========================================================================
2745    // Evolution Events (Process Evolution + Organizational Events)
2746    // ========================================================================
2747    if !result.process_evolution.is_empty()
2748        || !result.organizational_events.is_empty()
2749        || !result.disruption_events.is_empty()
2750    {
2751        let events_dir = output_dir.join("events");
2752        std::fs::create_dir_all(&events_dir)?;
2753        info!("Writing evolution events...");
2754
2755        write_json_safe(
2756            &result.process_evolution,
2757            &events_dir.join("process_evolution_events.json"),
2758            "Process evolution events",
2759        );
2760        write_json_safe(
2761            &result.organizational_events,
2762            &events_dir.join("organizational_events.json"),
2763            "Organizational events",
2764        );
2765        write_json_safe(
2766            &result.disruption_events,
2767            &events_dir.join("disruption_events.json"),
2768            "Disruption events",
2769        );
2770    }
2771
2772    // ========================================================================
2773    // ML Training: Counterfactual Pairs
2774    // ========================================================================
2775    if !result.counterfactual_pairs.is_empty() {
2776        let ml_dir = output_dir.join("ml_training");
2777        std::fs::create_dir_all(&ml_dir)?;
2778        info!("Writing ML training data...");
2779
2780        write_json_safe(
2781            &result.counterfactual_pairs,
2782            &ml_dir.join("counterfactual_pairs.json"),
2783            "Counterfactual pairs",
2784        );
2785    }
2786
2787    // ========================================================================
2788    // Fraud Red-Flag Indicators
2789    // ========================================================================
2790    if !result.red_flags.is_empty() {
2791        let labels_dir = output_dir.join("labels");
2792        std::fs::create_dir_all(&labels_dir)?;
2793        info!("Writing fraud red-flag indicators...");
2794
2795        write_json_safe(
2796            &result.red_flags,
2797            &labels_dir.join("fraud_red_flags.json"),
2798            "Fraud red flags",
2799        );
2800    }
2801
2802    // ========================================================================
2803    // Collusion Rings
2804    // ========================================================================
2805    if !result.collusion_rings.is_empty() {
2806        let labels_dir = output_dir.join("labels");
2807        std::fs::create_dir_all(&labels_dir)?;
2808        info!("Writing collusion rings...");
2809
2810        write_json_safe(
2811            &result.collusion_rings,
2812            &labels_dir.join("collusion_rings.json"),
2813            "Collusion rings",
2814        );
2815    }
2816
2817    // ========================================================================
2818    // Temporal Vendor Version Chains
2819    // ========================================================================
2820    if !result.temporal_vendor_chains.is_empty() {
2821        let temporal_dir = output_dir.join("temporal");
2822        std::fs::create_dir_all(&temporal_dir)?;
2823        info!("Writing temporal vendor version chains...");
2824
2825        write_json_safe(
2826            &result.temporal_vendor_chains,
2827            &temporal_dir.join("vendor_version_chains.json"),
2828            "Vendor version chains",
2829        );
2830    }
2831
2832    // ========================================================================
2833    // Entity Relationship Graph + Cross-Process Links
2834    // ========================================================================
2835    if result.entity_relationship_graph.is_some() || !result.cross_process_links.is_empty() {
2836        let rel_dir = output_dir.join("relationships");
2837        std::fs::create_dir_all(&rel_dir)?;
2838        info!("Writing entity relationship data...");
2839
2840        if let Some(ref graph) = result.entity_relationship_graph {
2841            match serde_json::to_string_pretty(graph) {
2842                Ok(json) => {
2843                    let path = rel_dir.join("entity_relationship_graph.json");
2844                    if let Err(e) = std::fs::write(&path, json) {
2845                        warn!("Failed to write entity relationship graph: {}", e);
2846                    } else {
2847                        info!(
2848                            "  Entity relationship graph written: {} nodes, {} edges -> {}",
2849                            graph.nodes.len(),
2850                            graph.edges.len(),
2851                            path.display()
2852                        );
2853                    }
2854                }
2855                Err(e) => warn!("Failed to serialize entity relationship graph: {}", e),
2856            }
2857        }
2858
2859        write_json_safe(
2860            &result.cross_process_links,
2861            &rel_dir.join("cross_process_links.json"),
2862            "Cross-process links",
2863        );
2864    }
2865
2866    // ========================================================================
2867    // Industry-Specific Data
2868    // ========================================================================
2869    if let Some(ref industry_output) = result.industry_output {
2870        if !industry_output.gl_accounts.is_empty() {
2871            let industry_dir = output_dir.join("industry");
2872            std::fs::create_dir_all(&industry_dir).ok();
2873            info!("Writing industry-specific data...");
2874            match serde_json::to_string_pretty(industry_output) {
2875                Ok(json) => {
2876                    if let Err(e) = std::fs::write(industry_dir.join("industry_data.json"), json) {
2877                        warn!("Failed to write industry data: {}", e);
2878                    } else {
2879                        info!(
2880                            "  Industry data written: {} GL accounts for {}",
2881                            industry_output.gl_accounts.len(),
2882                            industry_output.industry
2883                        );
2884                    }
2885                }
2886                Err(e) => warn!("Failed to serialize industry data: {}", e),
2887            }
2888        }
2889    }
2890
2891    // ========================================================================
2892    // Graph Export Summary
2893    // ========================================================================
2894    if result.graph_export.exported {
2895        let graph_dir = output_dir.join("graph_export");
2896        std::fs::create_dir_all(&graph_dir).ok();
2897        match serde_json::to_string_pretty(&result.graph_export) {
2898            Ok(json) => {
2899                if let Err(e) = std::fs::write(graph_dir.join("graph_export_summary.json"), json) {
2900                    warn!("Failed to write graph export summary: {}", e);
2901                } else {
2902                    info!("  Graph export summary written");
2903                }
2904            }
2905            Err(e) => warn!("Failed to serialize graph export summary: {}", e),
2906        }
2907    }
2908
2909    // ========================================================================
2910    // Compliance Regulations
2911    // ========================================================================
2912    let cr = &result.compliance_regulations;
2913    let has_compliance_data = !cr.standard_records.is_empty()
2914        || !cr.audit_procedures.is_empty()
2915        || !cr.findings.is_empty()
2916        || !cr.filings.is_empty();
2917    if has_compliance_data {
2918        let cr_dir = output_dir.join("compliance_regulations");
2919        std::fs::create_dir_all(&cr_dir)?;
2920        info!("Writing compliance regulations data...");
2921
2922        write_json_safe(
2923            &cr.standard_records,
2924            &cr_dir.join("compliance_standards.json"),
2925            "Compliance standards",
2926        );
2927        write_json_safe(
2928            &cr.cross_reference_records,
2929            &cr_dir.join("cross_references.json"),
2930            "Cross-references",
2931        );
2932        write_json_safe(
2933            &cr.jurisdiction_records,
2934            &cr_dir.join("jurisdiction_profiles.json"),
2935            "Jurisdiction profiles",
2936        );
2937        write_json_safe(
2938            &cr.audit_procedures,
2939            &cr_dir.join("audit_procedures.json"),
2940            "Audit procedures",
2941        );
2942        write_json_safe(
2943            &cr.findings,
2944            &cr_dir.join("compliance_findings.json"),
2945            "Compliance findings",
2946        );
2947        write_json_safe(
2948            &cr.filings,
2949            &cr_dir.join("regulatory_filings.json"),
2950            "Regulatory filings",
2951        );
2952
2953        if let Some(ref graph) = cr.compliance_graph {
2954            match serde_json::to_string_pretty(graph) {
2955                Ok(json) => {
2956                    if let Err(e) = std::fs::write(cr_dir.join("compliance_graph.json"), json) {
2957                        warn!("Failed to write compliance graph: {}", e);
2958                    } else {
2959                        info!(
2960                            "  Compliance graph written: {} nodes, {} edges",
2961                            graph.nodes.len(),
2962                            graph.edges.len()
2963                        );
2964                    }
2965                }
2966                Err(e) => warn!("Failed to serialize compliance graph: {}", e),
2967            }
2968        }
2969    }
2970
2971    // ========================================================================
2972    // Generation Statistics
2973    // ========================================================================
2974    match serde_json::to_string_pretty(&result.statistics) {
2975        Ok(json) => {
2976            if let Err(e) = std::fs::write(output_dir.join("generation_statistics.json"), json) {
2977                warn!("Failed to write generation statistics: {}", e);
2978            } else {
2979                info!("  Generation statistics written");
2980            }
2981        }
2982        Err(e) => warn!("Failed to serialize generation statistics: {}", e),
2983    }
2984
2985    info!("Output writing complete.");
2986    Ok(())
2987}
2988
2989/// Write JSON with error handling - logs a warning on failure but does not abort.
2990///
2991/// When the `FLAT_LAYOUT_ACTIVE` thread-local is true (set by
2992/// `write_all_output_with_layout` when `export_layout: flat`), this routes
2993/// through `write_json_flat` so nested `{header, lines|items|allocations}`
2994/// shapes are automatically flattened. For structures without that shape,
2995/// `write_json_flat` passes through unchanged.
2996fn write_json_safe<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
2997    // Skip JSON entirely when not in requested output formats
2998    if SKIP_JSON.with(|c| c.get()) {
2999        return;
3000    }
3001    if FLAT_LAYOUT_ACTIVE.with(|c| c.get()) {
3002        write_json_flat(data, path, label);
3003    } else if let Err(e) = write_json(data, path, label) {
3004        warn!("Failed to write {}: {}", label, e);
3005    }
3006}
3007
3008/// Write JSON, choosing flat or nested layout based on the flag.
3009fn write_json_auto<T: serde::Serialize>(data: &[T], path: &Path, label: &str, flat: bool) {
3010    if flat {
3011        write_json_flat(data, path, label);
3012    } else {
3013        write_json_safe(data, path, label);
3014    }
3015}
3016
3017/// Write a JSON file ALWAYS, even when the slice is empty (writes `[]`).
3018///
3019/// Use for files that must exist in the archive for SDK consumers
3020/// (e.g., `audit_opinions.json`) regardless of whether the phase that
3021/// populates them ran. `write_json_safe` / `write_json` short-circuit
3022/// on empty slices, which would break manifest-driven clients that
3023/// expect the file to be present.
3024fn write_json_always<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
3025    if SKIP_JSON.with(|c| c.get()) {
3026        return;
3027    }
3028    match std::fs::File::create(path) {
3029        Ok(file) => {
3030            let mut writer = std::io::BufWriter::with_capacity(64 * 1024, file);
3031            if let Err(e) = (|| -> Result<(), Box<dyn std::error::Error>> {
3032                writer.write_all(b"[\n")?;
3033                for (i, item) in data.iter().enumerate() {
3034                    if i > 0 {
3035                        writer.write_all(b",\n")?;
3036                    }
3037                    serde_json::to_writer_pretty(&mut writer, item)?;
3038                }
3039                if !data.is_empty() {
3040                    writer.write_all(b"\n")?;
3041                }
3042                writer.write_all(b"]\n")?;
3043                writer.flush()?;
3044                Ok(())
3045            })() {
3046                warn!("Failed to write {}: {}", label, e);
3047            } else {
3048                info!(
3049                    "  {} written: {} records -> {}",
3050                    label,
3051                    data.len(),
3052                    path.display()
3053                );
3054            }
3055        }
3056        Err(e) => {
3057            warn!("Failed to create {}: {}", path.display(), e);
3058        }
3059    }
3060}
3061
3062/// Write a flat JSON file by expanding a primary items array and merging the
3063/// surrounding context onto each line.
3064///
3065/// Flattens any record that contains a recognised items array
3066/// (`items`, `lines`, `line_items`, or `allocations`) into one row per line,
3067/// carrying over both the optional `header` sub-object and all other
3068/// top-level fields. Records without a recognised items array are emitted
3069/// as-is, except that an optional nested `header` sub-object is unwrapped
3070/// onto the top level so consumers see a uniformly flat shape.
3071///
3072/// Flow-style documents (`{header, items}`) and subledger-style documents
3073/// (`{..top-level scalars.., lines}`, e.g. AP/AR invoices, inventory
3074/// valuation runs) are both handled — fixing the SDK-team-reported gap
3075/// where subledger invoices were left with `lines` nested in flat mode.
3076///
3077/// Uses heap-allocated intermediates to avoid stack overflow with large
3078/// records in constrained environments (e.g., distroless containers with
3079/// glibc 2.36). Fixes #116.
3080fn write_json_flat<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
3081    if data.is_empty() {
3082        return;
3083    }
3084
3085    // Pre-allocate on heap — avoid flat_map closure accumulating on the stack
3086    let mut flat: Vec<serde_json::Value> = Vec::with_capacity(data.len());
3087
3088    for item in data {
3089        let val = match serde_json::to_value(item) {
3090            Ok(v) => v,
3091            Err(e) => {
3092                warn!("Failed to serialize record for flat export: {}", e);
3093                continue;
3094            }
3095        };
3096
3097        let serde_json::Value::Object(map) = val else {
3098            flat.push(val);
3099            continue;
3100        };
3101
3102        // Find the primary items array key (first match wins).
3103        let items_key = ["items", "lines", "allocations", "line_items"]
3104            .iter()
3105            .find(|k| map.contains_key(**k))
3106            .copied();
3107
3108        // Optional nested header sub-object (used by document flows).
3109        let header_map = match map.get("header") {
3110            Some(serde_json::Value::Object(h)) => Some(h),
3111            _ => None,
3112        };
3113
3114        let Some(items_key) = items_key else {
3115            // No items array. Emit one row, unwrapping the optional header
3116            // sub-object so consumers see a flat shape regardless of model
3117            // layout (e.g. Payments have `header` but no items/allocations
3118            // when allocations are empty).
3119            if let Some(header_map) = header_map {
3120                let mut merged = map.clone();
3121                merged.remove("header");
3122                for (k, v) in header_map {
3123                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3124                }
3125                flat.push(serde_json::Value::Object(merged));
3126            } else {
3127                flat.push(serde_json::Value::Object(map));
3128            }
3129            continue;
3130        };
3131
3132        let Some(serde_json::Value::Array(items)) = map.get(items_key) else {
3133            // `items_key` present but not an array — passthrough.
3134            flat.push(serde_json::Value::Object(map));
3135            continue;
3136        };
3137
3138        // Empty items array: emit one row with the (unwrapped) header
3139        // context so downstream consumers can still find the parent
3140        // record — prevents silently dropping empty-lines invoices.
3141        if items.is_empty() {
3142            let mut merged = map.clone();
3143            merged.remove(items_key);
3144            if let Some(header_map) = header_map {
3145                merged.remove("header");
3146                for (k, v) in header_map {
3147                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3148                }
3149            }
3150            flat.push(serde_json::Value::Object(merged));
3151            continue;
3152        }
3153
3154        // Collect all other top-level fields (scalars, objects, arrays)
3155        // so they carry over onto every flattened line — matching pandas
3156        // `explode()` semantics. This is the behaviour SDK consumers
3157        // expect: header context is repeated per line, nested objects
3158        // like `net_amount: {amount, currency}` come along for the ride.
3159        let top_fields: Vec<(&String, &serde_json::Value)> = map
3160            .iter()
3161            .filter(|(k, _)| k.as_str() != "header" && k.as_str() != items_key)
3162            .collect();
3163
3164        flat.reserve(items.len());
3165        for item_val in items {
3166            let mut merged = serde_json::Map::new();
3167            // Line/item fields first (take precedence on collisions).
3168            if let serde_json::Value::Object(m) = item_val {
3169                merged.extend(m.iter().map(|(k, v)| (k.clone(), v.clone())));
3170            }
3171            // Header sub-object (when present) — don't overwrite line fields.
3172            if let Some(header_map) = header_map {
3173                for (k, v) in header_map {
3174                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3175                }
3176            }
3177            // All other top-level fields.
3178            for &(k, v) in &top_fields {
3179                merged.entry(k.clone()).or_insert_with(|| v.clone());
3180            }
3181            flat.push(serde_json::Value::Object(merged));
3182        }
3183    }
3184
3185    if flat.is_empty() {
3186        return;
3187    }
3188
3189    // Stream-write each flattened record instead of serializing the whole Vec
3190    let count = flat.len();
3191    match std::fs::File::create(path) {
3192        Ok(file) => {
3193            use std::io::Write;
3194            let mut writer = std::io::BufWriter::with_capacity(512 * 1024, file);
3195            if let Err(e) = (|| -> Result<(), Box<dyn std::error::Error>> {
3196                writer.write_all(b"[\n")?;
3197                for (i, item) in flat.iter().enumerate() {
3198                    if i > 0 {
3199                        writer.write_all(b",\n")?;
3200                    }
3201                    serde_json::to_writer_pretty(&mut writer, item)?;
3202                }
3203                writer.write_all(b"\n]\n")?;
3204                writer.flush()?;
3205                Ok(())
3206            })() {
3207                warn!("Failed to write {}: {}", label, e);
3208            } else {
3209                info!(
3210                    "  {} written (flat): {} records -> {}",
3211                    label,
3212                    count,
3213                    path.display()
3214                );
3215            }
3216        }
3217        Err(e) => warn!("Failed to create {}: {}", label, e),
3218    }
3219}
3220
3221/// Write a single serializable value as a JSON file.
3222fn write_json_single<T: serde::Serialize>(
3223    data: &T,
3224    path: &Path,
3225    label: &str,
3226) -> Result<(), Box<dyn std::error::Error>> {
3227    let file = std::fs::File::create(path)?;
3228    let writer = std::io::BufWriter::with_capacity(256 * 1024, file);
3229    serde_json::to_writer_pretty(writer, data)?;
3230    info!("  {} written -> {}", label, path.display());
3231    Ok(())
3232}
3233
3234/// Write a single serializable value as a JSON file, logging a warning on failure.
3235fn write_json_single_safe<T: serde::Serialize>(data: &T, path: &Path, label: &str) {
3236    if SKIP_JSON.with(|c| c.get()) {
3237        return;
3238    }
3239    if let Err(e) = write_json_single(data, path, label) {
3240        warn!("Failed to write {}: {}", label, e);
3241    }
3242}
3243
3244/// Serializable summary of balance validation (avoids serializing the full
3245/// `BalanceValidationResult` which has non-Serialize validation error types).
3246#[derive(serde::Serialize)]
3247struct BalanceValidationSummary {
3248    validated: bool,
3249    is_balanced: bool,
3250    entries_processed: u64,
3251    total_debits: String,
3252    total_credits: String,
3253    accounts_tracked: usize,
3254    companies_tracked: usize,
3255    has_unbalanced_entries: bool,
3256    validation_error_count: usize,
3257}
3258
3259impl BalanceValidationSummary {
3260    fn from(v: &crate::enhanced_orchestrator::BalanceValidationResult) -> Self {
3261        Self {
3262            validated: v.validated,
3263            is_balanced: v.is_balanced,
3264            entries_processed: v.entries_processed,
3265            total_debits: v.total_debits.to_string(),
3266            total_credits: v.total_credits.to_string(),
3267            accounts_tracked: v.accounts_tracked,
3268            companies_tracked: v.companies_tracked,
3269            has_unbalanced_entries: v.has_unbalanced_entries,
3270            validation_error_count: v.validation_errors.len(),
3271        }
3272    }
3273}
3274
3275#[cfg(test)]
3276mod tests {
3277    /// v5.17.0 — verify the journal_entries.csv header has exactly 48 columns
3278    /// (44 from SP3.8a + fraud_type + anomaly_type appended last).  This
3279    /// catches any accidental drift between the header string and the row
3280    /// format string.
3281    #[test]
3282    fn journal_entries_csv_header_has_46_columns() {
3283        let header =
3284            "document_id,company_code,fiscal_year,fiscal_period,posting_date,document_date,\
3285                      document_type,currency,exchange_rate,reference,header_text,created_by,source,\
3286                      business_process,ledger,is_fraud,is_anomaly,\
3287                      line_number,gl_account,debit_amount,credit_amount,local_amount,transaction_amount,\
3288                      cost_center,profit_center,business_unit,line_text,\
3289                      auxiliary_account_number,auxiliary_account_label,lettrage,lettrage_date,\
3290                      is_manual,is_post_close,source_system,\
3291                      account_description,financial_statement_category,\
3292                      assignment,value_date,tax_code,transaction_id,\
3293                      account_class,account_class_name,account_sub_class,account_sub_class_name,\
3294                      predecessor_line_id,trading_partner,fraud_type,anomaly_type";
3295        // Strip any embedded whitespace from line continuations before counting.
3296        let normalized: String = header.chars().filter(|c| !c.is_whitespace()).collect();
3297        let n_cols = normalized.split(',').count();
3298        assert_eq!(
3299            n_cols, 48,
3300            "expected 48 columns in journal_entries.csv header, got {n_cols}"
3301        );
3302    }
3303
3304    /// v5.17.0 — fraud_type column emits the FraudType variant name via Debug.
3305    #[test]
3306    fn journal_entries_csv_fraud_type_column_populated() {
3307        use datasynth_core::models::FraudType;
3308        use datasynth_core::models::{JournalEntry, JournalEntryHeader};
3309
3310        // Build a minimal JE with fraud_type = GhostEmployee.
3311        let posting_date = chrono::NaiveDate::from_ymd_opt(2024, 3, 1).unwrap();
3312        let mut header = JournalEntryHeader::new("DE10".to_string(), posting_date);
3313        header.is_fraud = true;
3314        header.fraud_type = Some(FraudType::GhostEmployee);
3315        let je = JournalEntry::new(header);
3316
3317        // Reproduce the fraud_type / anomaly_type extraction exactly as in
3318        // write_journal_entries_csv so we can unit-test the helper logic
3319        // without spinning up a full EnhancedGenerationResult.
3320        let h = &je.header;
3321        let fraud_type_str = h.fraud_type.map(|ft| format!("{ft:?}")).unwrap_or_default();
3322        let anomaly_type_str = h.anomaly_type.as_deref().unwrap_or("").to_string();
3323
3324        // fraud_type column must be "GhostEmployee".
3325        assert_eq!(
3326            fraud_type_str, "GhostEmployee",
3327            "expected 'GhostEmployee' for FraudType::GhostEmployee; got: {fraud_type_str}"
3328        );
3329        // anomaly_type is None by default → empty string.
3330        assert!(
3331            anomaly_type_str.is_empty(),
3332            "expected empty anomaly_type when None; got: {anomaly_type_str}"
3333        );
3334    }
3335
3336    /// v5.17.0 — fraud_type and anomaly_type columns emit empty strings when None.
3337    #[test]
3338    fn journal_entries_csv_fraud_type_none_is_empty() {
3339        use datasynth_core::models::{JournalEntry, JournalEntryHeader};
3340
3341        let posting_date = chrono::NaiveDate::from_ymd_opt(2024, 3, 1).unwrap();
3342        let header = JournalEntryHeader::new("DE10".to_string(), posting_date);
3343        let je = JournalEntry::new(header);
3344
3345        let h = &je.header;
3346        // fraud_type is None by default.
3347        let fraud_type_str = h.fraud_type.map(|ft| format!("{ft:?}")).unwrap_or_default();
3348        // anomaly_type is None by default.
3349        let anomaly_type_str = h.anomaly_type.as_deref().unwrap_or("").to_string();
3350
3351        assert!(
3352            fraud_type_str.is_empty(),
3353            "expected empty fraud_type for None; got: {fraud_type_str}"
3354        );
3355        assert!(
3356            anomaly_type_str.is_empty(),
3357            "expected empty anomaly_type for None; got: {anomaly_type_str}"
3358        );
3359    }
3360}