Skip to main content

datasynth_runtime/
output_writer.rs

1//! Comprehensive output writer for all generated data.
2//!
3//! Writes all generated data from the EnhancedGenerationResult to files
4//! in the output directory. Uses CSV for flat tabular data (journal entry
5//! lines) and JSON for types with nested structures (Vecs, sub-structs).
6
7use std::cell::Cell;
8use std::io::Write;
9use std::path::Path;
10
11use crate::enhanced_orchestrator::EnhancedGenerationResult;
12use datasynth_core::documents::PaymentType;
13use datasynth_output::OutputRootConfig;
14use tracing::{info, warn};
15
16thread_local! {
17    /// Thread-local flat-layout flag. When true, every `write_json_safe` call
18    /// routes through `write_json_flat` so nested `{header, lines}` shapes get
19    /// flattened. Set by `write_all_output_with_layout` at the top of its body,
20    /// reset on exit.
21    static FLAT_LAYOUT_ACTIVE: Cell<bool> = const { Cell::new(false) };
22
23    /// Thread-local JSON skip flag. When true, `write_json_safe` becomes a no-op.
24    /// Set by `write_all_output_with_layout` when the requested formats don't
25    /// include JSON. This avoids wrapping 190+ call sites in `if write_json`.
26    static SKIP_JSON: Cell<bool> = const { Cell::new(false) };
27}
28
29/// Write a JSON file for any serializable slice. Skips empty slices.
30///
31/// Streams JSON directly to a buffered file writer instead of allocating
32/// the entire JSON string in memory (Phase 3 I/O optimization).
33/// Write a JSON array by streaming one record at a time.
34///
35/// Instead of serializing the entire `&[T]` in one `to_writer_pretty` call
36/// (which builds a massive in-memory serde state for large arrays), this
37/// writes `[\n` + per-record pretty-printed JSON with commas + `\n]`.
38///
39/// For 200K+ records this reduces peak memory and improves write throughput
40/// by avoiding serde's internal buffering of the full array structure.
41fn write_json<T: serde::Serialize>(
42    data: &[T],
43    path: &Path,
44    label: &str,
45) -> Result<(), Box<dyn std::error::Error>> {
46    use std::io::Write;
47
48    if data.is_empty() {
49        return Ok(());
50    }
51
52    let file = std::fs::File::create(path)?;
53    let mut writer = std::io::BufWriter::with_capacity(512 * 1024, file);
54
55    // Stream records one at a time into a JSON array
56    writer.write_all(b"[\n")?;
57    for (i, item) in data.iter().enumerate() {
58        if i > 0 {
59            writer.write_all(b",\n")?;
60        }
61        serde_json::to_writer_pretty(&mut writer, item)?;
62    }
63    writer.write_all(b"\n]\n")?;
64    writer.flush()?;
65
66    info!(
67        "  {} written: {} records -> {}",
68        label,
69        data.len(),
70        path.display()
71    );
72    Ok(())
73}
74
75/// Write journal entry lines as a flat CSV file.
76///
77/// This extracts the key fields from both the header and each line item to
78/// produce a single flat CSV that can be loaded directly into dataframes.
79fn write_journal_entries_csv(
80    result: &EnhancedGenerationResult,
81    output_dir: &Path,
82) -> Result<(), Box<dyn std::error::Error>> {
83    if result.journal_entries.is_empty() {
84        return Ok(());
85    }
86
87    let path = output_dir.join("journal_entries.csv");
88    let file = std::fs::File::create(&path)?;
89    let mut w = std::io::BufWriter::with_capacity(256 * 1024, file);
90
91    // Write header.
92    //
93    // Schema note: each release that widens the schema appends new
94    // columns at the end so existing column-positional consumers keep
95    // working.
96    //   v5.5.1 added:
97    //     is_manual, is_post_close, source_system     (audit / ETL provenance)
98    //     account_description                         (joined from CoA)
99    //     financial_statement_category                (asset/liability/...)
100    //     assignment, value_date, tax_code            (already-populated line fields)
101    //     transaction_id                              (stable per-line id)
102    //   v5.6.0 added (ISO 21378 Audit Data Collection classification):
103    //     account_class, account_class_name           (Level-2 e.g. "A.B" / "Trade Receivables")
104    //     account_sub_class, account_sub_class_name   (Level-3 e.g. "A.B.A" / "Trade Accounts Receivable")
105    //   v5.8.0 added:
106    //     predecessor_line_id                         (UUID v5 of preceding line in document chain;
107    //                                                  populated by document_flow_je_generator for
108    //                                                  P2P / O2C chains, empty for chain heads and
109    //                                                  for purely-GL adjustments)
110    //   v5.16.1 (SP3.8a) added:
111    //     trading_partner                             (counterparty company code; populated from
112    //                                                  SP3.7 per-source conditional when priors are
113    //                                                  loaded; empty for chain heads / GL adjustments
114    //                                                  when priors are absent)
115    //   v5.17.0 (HF community request) added:
116    //     fraud_type                                  (FraudType enum variant via Debug format, e.g.
117    //                                                  "GhostEmployee"; empty string when None)
118    //     anomaly_type                                (AnomalyType serialized name, e.g.
119    //                                                  "DuplicateEntry"; empty string when None)
120    writeln!(
121        w,
122        "document_id,company_code,fiscal_year,fiscal_period,posting_date,document_date,\
123         document_type,currency,exchange_rate,reference,header_text,created_by,source,\
124         business_process,ledger,is_fraud,is_anomaly,\
125         line_number,gl_account,debit_amount,credit_amount,local_amount,transaction_amount,\
126         cost_center,profit_center,business_unit,line_text,\
127         auxiliary_account_number,auxiliary_account_label,lettrage,lettrage_date,\
128         is_manual,is_post_close,source_system,\
129         account_description,financial_statement_category,\
130         assignment,value_date,tax_code,transaction_id,\
131         account_class,account_class_name,account_sub_class,account_sub_class_name,\
132         predecessor_line_id,trading_partner,fraud_type,anomaly_type"
133    )?;
134
135    // Build a CoA → (short_description, ISO class, ISO sub-class) lookup.
136    // Empty when no CoA was generated (e.g. some smoke tests); resolution
137    // falls back to the line's already-populated `account_description`
138    // and to empty ISO codes.
139    let coa_index: std::collections::HashMap<&str, (&str, &str, &str, &str, &str)> = result
140        .chart_of_accounts
141        .accounts
142        .iter()
143        .map(|a| {
144            (
145                a.account_number.as_str(),
146                (
147                    a.short_description.as_str(),
148                    a.account_class.as_str(),
149                    a.account_class_name.as_str(),
150                    a.account_sub_class.as_str(),
151                    a.account_sub_class_name.as_str(),
152                ),
153            )
154        })
155        .collect();
156
157    // SP5.2 — Secondary index built from the CoA semantic prior (when loaded).
158    // The per-source attribute conditional (SP3.7) draws corpus GL account
159    // numbers (e.g. `0000105000`) that typically are NOT present in the synthetic
160    // CoA master table, so the primary `coa_index` misses ~85% of lines.  This
161    // fallback index covers 3,123 corpus accounts sourced from the `.dsf`
162    // bundle, resolving `account_description` and ISO 21378 class codes for any
163    // account number the prior knows about.
164    //
165    // When no prior is loaded the map is empty and the existing behaviour is
166    // byte-identical to earlier releases.
167    let coa_semantic_index: std::collections::HashMap<&str, (&str, &str, &str, &str, &str)> =
168        result
169            .coa_semantic_prior
170            .as_ref()
171            .map(|prior| {
172                prior
173                    .accounts
174                    .iter()
175                    .map(|(account_number, sem)| {
176                        (
177                            account_number.as_str(),
178                            (
179                                sem.description.as_str(),
180                                sem.account_class.as_deref().unwrap_or(""),
181                                sem.account_class_name.as_deref().unwrap_or(""),
182                                sem.account_sub_class.as_deref().unwrap_or(""),
183                                sem.account_sub_class_name.as_deref().unwrap_or(""),
184                            ),
185                        )
186                    })
187                    .collect()
188            })
189            .unwrap_or_default();
190
191    for je in &result.journal_entries {
192        let h = &je.header;
193        // SP3.6 — when priors are loaded, `sap_source_code` holds a canonical
194        // SAP source code (`KR`, `RV`, …); fall back to the TransactionSource
195        // Display label for the priors-disabled path.
196        let source_label: std::borrow::Cow<str> = match &h.sap_source_code {
197            Some(code) => std::borrow::Cow::Borrowed(code.as_str()),
198            None => std::borrow::Cow::Owned(h.source.to_string()),
199        };
200        for line in &je.lines {
201            let lettrage_date_str = line
202                .lettrage_date
203                .map(|d| d.to_string())
204                .unwrap_or_default();
205            let value_date_str = line.value_date.map(|d| d.to_string()).unwrap_or_default();
206            // Look up CoA-joined fields in one shot.
207            // SP5.2 — try primary (synthetic CoA) then fall through to the
208            // corpus prior secondary index when the primary misses.
209            let coa_hit = coa_index
210                .get(line.gl_account.as_str())
211                .copied()
212                .or_else(|| coa_semantic_index.get(line.gl_account.as_str()).copied());
213            let coa_short_desc = coa_hit.map(|t| t.0).unwrap_or("");
214            let coa_class = coa_hit.map(|t| t.1).unwrap_or("");
215            let coa_class_name = coa_hit.map(|t| t.2).unwrap_or("");
216            let coa_sub_class = coa_hit.map(|t| t.3).unwrap_or("");
217            let coa_sub_class_name = coa_hit.map(|t| t.4).unwrap_or("");
218            // Prefer the line's own account_description; fall back to the CoA
219            // lookup so consumers always get a name even when the generator
220            // forgot to populate the field.
221            let account_description: &str = line
222                .account_description
223                .as_deref()
224                .filter(|s| !s.is_empty())
225                .unwrap_or(coa_short_desc);
226            // Derive the FSA category from the gl_account prefix (1xxx=asset,
227            // 2xxx=liability, ...). Cheap, deterministic, no CoA dependency.
228            let fsa_category =
229                datasynth_core::accounts::AccountCategory::from_account(line.gl_account.as_str())
230                    .as_label();
231            // Stable per-line identifier (UUID v5 of document_id+line_number).
232            let transaction_id = line.transaction_id.clone().unwrap_or_else(|| {
233                datasynth_core::models::JournalEntryLine::derive_transaction_id(
234                    line.document_id,
235                    line.line_number,
236                )
237            });
238            // v5.17.0 — fraud_type and anomaly_type category columns (cols 45-46).
239            // fraud_type: Option<FraudType> → Debug format, empty when None.
240            // anomaly_type: Option<String> → already serialized, empty when None.
241            let fraud_type_str = h.fraud_type.map(|ft| format!("{ft:?}")).unwrap_or_default();
242            let anomaly_type_str = h.anomaly_type.as_deref().unwrap_or("").to_string();
243            writeln!(
244                w,
245                "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}",
246                h.document_id,
247                csv_escape(&h.company_code),
248                h.fiscal_year,
249                h.fiscal_period,
250                h.posting_date,
251                h.document_date,
252                csv_escape(&h.document_type),
253                csv_escape(&h.currency),
254                h.exchange_rate,
255                csv_opt_str(&h.reference),
256                csv_opt_str(&h.header_text),
257                csv_escape(&h.created_by),
258                source_label,
259                h.business_process
260                    .map(|bp| format!("{bp:?}"))
261                    .unwrap_or_default(),
262                csv_escape(&h.ledger),
263                h.is_fraud,
264                h.is_anomaly,
265                line.line_number,
266                csv_escape(&line.gl_account),
267                line.debit_amount,
268                line.credit_amount,
269                line.local_amount,
270                line.transaction_amount.map(|d| d.to_string()).unwrap_or_default(),
271                csv_opt_str(&line.cost_center),
272                csv_opt_str(&line.profit_center),
273                csv_opt_str(&line.business_unit),
274                csv_opt_str(&line.line_text),
275                csv_opt_str(&line.auxiliary_account_number),
276                csv_opt_str(&line.auxiliary_account_label),
277                csv_opt_str(&line.lettrage),
278                lettrage_date_str,
279                h.is_manual,
280                h.is_post_close,
281                csv_escape(&h.source_system),
282                csv_escape(account_description),
283                fsa_category,
284                csv_opt_str(&line.assignment),
285                value_date_str,
286                csv_opt_str(&line.tax_code),
287                csv_escape(&transaction_id),
288                csv_escape(coa_class),
289                csv_escape(coa_class_name),
290                csv_escape(coa_sub_class),
291                csv_escape(coa_sub_class_name),
292                csv_opt_str(&line.predecessor_line_id),
293                // SP3.8a — trading_partner appended as column 44.
294                csv_opt_str(&line.trading_partner),
295                // v5.17.0 — fraud_type (col 45) and anomaly_type (col 46).
296                csv_escape(&fraud_type_str),
297                csv_escape(&anomaly_type_str),
298            )?;
299        }
300    }
301
302    w.flush()?;
303    let total_lines: usize = result.journal_entries.iter().map(|je| je.lines.len()).sum();
304    info!(
305        "  Journal entries CSV written: {} entries, {} line items -> {}",
306        result.journal_entries.len(),
307        total_lines,
308        path.display()
309    );
310    Ok(())
311}
312
313/// **v5.8.0** — write `graphs/je_network.csv`: a flat edge-list of the
314/// accounting network derived from journal entries.
315///
316/// Each row represents one debit↔credit flow within a single JE,
317/// formed via the cartesian product of debit lines × credit lines (the
318/// approach in `datasynth-graph::TransactionGraphBuilder`). For a
319/// 2-line JE this is exactly the bijective Method-A flow from
320/// Ivertowski et al. (2024); for larger JEs it is a Method-B/C
321/// approximation with proportional amount allocation.
322///
323/// v5.10: edge construction has been extracted into
324/// [`crate::je_network::build_je_network_edges`] so the same logic is
325/// reused by the `datasynth-group` aggregate emitter.  The CSV format
326/// is unchanged — this writer keeps the v5.8.0 13-column schema.
327///
328/// Joins back to `journal_entries.csv` via:
329///   - `document_id` → JE-level header
330///   - `from_line_id` / `to_line_id` → per-line `transaction_id`
331///   - `predecessor_edge_id` → previous flow in a document chain
332fn write_je_network_csv(
333    result: &EnhancedGenerationResult,
334    output_dir: &Path,
335    method: datasynth_config::JeNetworkMethod,
336) -> Result<(), Box<dyn std::error::Error>> {
337    if result.journal_entries.is_empty() {
338        return Ok(());
339    }
340    let graphs_dir = output_dir.join("graphs");
341    std::fs::create_dir_all(&graphs_dir)?;
342    let path = graphs_dir.join("je_network.csv");
343    let file = std::fs::File::create(&path)?;
344    let mut w = std::io::BufWriter::with_capacity(256 * 1024, file);
345
346    writeln!(
347        w,
348        "edge_id,document_id,posting_date,from_account,to_account,\
349         from_line_id,to_line_id,amount,confidence,\
350         predecessor_edge_id,business_process,is_fraud,is_anomaly,fraud_type"
351    )?;
352
353    let edges = crate::je_network::build_je_network_edges(&result.journal_entries, method);
354
355    for e in &edges {
356        writeln!(
357            w,
358            "{},{},{},{},{},{},{},{},{},{},{},{},{},{}",
359            csv_escape(&e.edge_id),
360            csv_escape(&e.document_id.to_string()),
361            csv_escape(&e.posting_date.to_string()),
362            csv_escape(&e.from_account),
363            csv_escape(&e.to_account),
364            csv_escape(&e.from_line_id),
365            csv_escape(&e.to_line_id),
366            e.amount,
367            e.confidence,
368            csv_escape(&e.predecessor_edge_id),
369            csv_escape(&e.business_process),
370            e.is_fraud,
371            e.is_anomaly,
372            csv_escape(e.fraud_type.as_deref().unwrap_or("")),
373        )?;
374    }
375
376    w.flush()?;
377    info!(
378        "  JE network CSV written: {} edges from {} entries -> {}",
379        edges.len(),
380        result.journal_entries.len(),
381        path.display()
382    );
383    Ok(())
384}
385
386/// Write journal entries as flat JSON (header fields merged onto each line).
387///
388/// Each object in the output array contains all header fields plus all line fields,
389/// with no nesting. This is the analytics-friendly format.
390fn write_journal_entries_flat_json(
391    result: &EnhancedGenerationResult,
392    output_dir: &Path,
393) -> Result<(), Box<dyn std::error::Error>> {
394    if result.journal_entries.is_empty() {
395        return Ok(());
396    }
397
398    let path = output_dir.join("journal_entries.json");
399    let file = std::fs::File::create(&path)?;
400    let mut writer = std::io::BufWriter::with_capacity(256 * 1024, file);
401
402    // Write opening bracket
403    writer.write_all(b"[\n")?;
404
405    let mut first = true;
406    let mut total_lines = 0usize;
407    for je in &result.journal_entries {
408        // Serialize header to a JSON map
409        let header_value = serde_json::to_value(&je.header)?;
410
411        for line in &je.lines {
412            if !first {
413                writer.write_all(b",\n")?;
414            }
415            first = false;
416            total_lines += 1;
417
418            // Serialize line to a JSON map, then merge header fields in
419            let mut line_value = serde_json::to_value(line)?;
420
421            if let serde_json::Value::Object(ref header_map) = header_value {
422                if let serde_json::Value::Object(ref mut line_map) = line_value {
423                    for (key, val) in header_map {
424                        // Line fields take precedence for shared keys (e.g. document_id)
425                        if !line_map.contains_key(key) {
426                            line_map.insert(key.clone(), val.clone());
427                        }
428                    }
429                }
430            }
431
432            serde_json::to_writer_pretty(&mut writer, &line_value)?;
433        }
434    }
435
436    writer.write_all(b"\n]\n")?;
437    writer.flush()?;
438    info!(
439        "  Journal entries (flat JSON) written: {} line items -> {}",
440        total_lines,
441        path.display()
442    );
443    Ok(())
444}
445
446/// v4.4.2 helper — walk a serialized OCEL event-log `Value` tree and
447/// mirror `object_type_id` into `object_type` on every
448/// `object_refs[*]` entry. The canonical OCEL 2.0 field name is
449/// `object_type`; DataSynth's internal model carries it as
450/// `object_type_id` for historical reasons. Emitting both keys lets
451/// OCEL-spec-compliant consumers (pm4py, Celonis, etc.) see the type
452/// without a rename step.
453fn add_ocel_object_type_alias(value: &mut serde_json::Value) {
454    if let Some(events) = value.get_mut("events").and_then(|v| v.as_array_mut()) {
455        for event in events.iter_mut() {
456            if let Some(refs) = event.get_mut("object_refs").and_then(|r| r.as_array_mut()) {
457                for oref in refs.iter_mut() {
458                    if let Some(obj) = oref.as_object_mut() {
459                        if let Some(oti) = obj.get("object_type_id").cloned() {
460                            obj.entry("object_type").or_insert(oti);
461                        }
462                    }
463                }
464            }
465        }
466    }
467}
468
469/// Escape a string for CSV output by quoting if it contains commas or quotes.
470fn csv_escape(s: &str) -> String {
471    if s.contains(',') || s.contains('"') || s.contains('\n') {
472        format!("\"{}\"", s.replace('"', "\"\""))
473    } else {
474        s.to_string()
475    }
476}
477
478/// Format an Option<String> for CSV output (empty string for None).
479fn csv_opt_str(opt: &Option<String>) -> String {
480    match opt {
481        Some(s) => csv_escape(s),
482        None => String::new(),
483    }
484}
485
486/// Write all generated data to the output directory.
487///
488/// This function exports every non-empty dataset from the generation result.
489/// Journal entries are written as a flat CSV file (one row per line item)
490/// and as a nested JSON file. Other data is written as JSON files since
491/// many model types contain nested structures.
492#[allow(dead_code)]
493pub fn write_all_output(
494    result: &EnhancedGenerationResult,
495    output_dir: &Path,
496) -> Result<(), Box<dyn std::error::Error>> {
497    write_all_output_with_layout(
498        result,
499        output_dir,
500        datasynth_config::ExportLayout::Nested,
501        &[
502            datasynth_config::FileFormat::Csv,
503            datasynth_config::FileFormat::Json,
504        ],
505        datasynth_config::JeNetworkMethod::default(),
506    )
507}
508
509/// Variant of [`write_all_output_with_layout`] that routes output through
510/// an [`OutputRootConfig`] instead of a raw `&Path`.
511///
512/// Per-entity subtree mode is used by the group-audit shard runner
513/// (v5.0+): the runner sets `per_entity_subtree: true` and
514/// `entity_code: Some(code)` on `root`, and this helper drops each
515/// entity's archive under `{root_dir}/entities/{code}/` so group-wide
516/// artifacts can still live at `{root_dir}/`.
517///
518/// In flat mode (the default for single-entity runs) this is exactly
519/// equivalent to calling [`write_all_output_with_layout`] with
520/// `output_dir = root.root_dir`, so the signature and behavior of the
521/// existing single-entity entrypoints are unchanged.
522#[allow(dead_code)]
523pub fn write_all_output_with_root(
524    result: &EnhancedGenerationResult,
525    root: &OutputRootConfig,
526    export_layout: datasynth_config::ExportLayout,
527    formats: &[datasynth_config::FileFormat],
528) -> Result<(), Box<dyn std::error::Error>> {
529    let effective = root.effective_dir();
530    write_all_output_with_layout(
531        result,
532        &effective,
533        export_layout,
534        formats,
535        datasynth_config::JeNetworkMethod::default(),
536    )
537}
538
539/// Write all generated data with a configurable export layout and format set.
540///
541/// Only writes files for formats present in `formats`. If `formats` is empty,
542/// writes both CSV and JSON (backward compatible). This allows skipping JSON
543/// when only CSV is needed, which halves output time for large datasets.
544pub fn write_all_output_with_layout(
545    result: &EnhancedGenerationResult,
546    output_dir: &Path,
547    export_layout: datasynth_config::ExportLayout,
548    formats: &[datasynth_config::FileFormat],
549    je_network_method: datasynth_config::JeNetworkMethod,
550) -> Result<(), Box<dyn std::error::Error>> {
551    let csv_enabled = formats.is_empty()
552        || formats.contains(&datasynth_config::FileFormat::Csv)
553        || formats.contains(&datasynth_config::FileFormat::Parquet);
554    let json_enabled = formats.is_empty()
555        || formats.contains(&datasynth_config::FileFormat::Json)
556        || formats.contains(&datasynth_config::FileFormat::JsonLines);
557    std::fs::create_dir_all(output_dir)?;
558    info!("Writing comprehensive output to: {}", output_dir.display());
559
560    // Set flat-layout flag for all `write_json_safe` calls in this pass.
561    // Scope guard ensures we reset on return (including error paths).
562    struct FlatLayoutGuard;
563    impl Drop for FlatLayoutGuard {
564        fn drop(&mut self) {
565            FLAT_LAYOUT_ACTIVE.with(|c| c.set(false));
566        }
567    }
568    let _flat_guard = if export_layout == datasynth_config::ExportLayout::Flat {
569        FLAT_LAYOUT_ACTIVE.with(|c| c.set(true));
570        Some(FlatLayoutGuard)
571    } else {
572        None
573    };
574
575    // Set JSON skip flag so `write_json_safe` becomes a no-op when JSON not requested.
576    struct SkipJsonGuard;
577    impl Drop for SkipJsonGuard {
578        fn drop(&mut self) {
579            SKIP_JSON.with(|c| c.set(false));
580        }
581    }
582    let _skip_json_guard = if !json_enabled {
583        SKIP_JSON.with(|c| c.set(true));
584        info!("JSON output skipped (not in requested formats)");
585        Some(SkipJsonGuard)
586    } else {
587        None
588    };
589
590    // ========================================================================
591    // Journal Entries (CSV + JSON in parallel when both enabled)
592    // ========================================================================
593    if !result.journal_entries.is_empty() {
594        let do_csv = csv_enabled;
595        let do_json = json_enabled;
596        let is_flat = export_layout == datasynth_config::ExportLayout::Flat;
597
598        std::thread::scope(|s| {
599            if do_csv {
600                s.spawn(|| {
601                    if let Err(e) = write_journal_entries_csv(result, output_dir) {
602                        warn!("Failed to write journal_entries.csv: {}", e);
603                    }
604                });
605                // v5.8.0 — flat edge-list for accounting-network construction.
606                // Always emit when CSV is requested; cheap relative to the
607                // main JE table.
608                s.spawn(|| {
609                    if let Err(e) = write_je_network_csv(result, output_dir, je_network_method) {
610                        warn!("Failed to write graphs/je_network.csv: {}", e);
611                    }
612                });
613            }
614            if do_json {
615                s.spawn(|| {
616                    if is_flat {
617                        if let Err(e) = write_journal_entries_flat_json(result, output_dir) {
618                            warn!("Failed to write flat journal_entries.json: {}", e);
619                        }
620                    } else if let Err(e) = write_json(
621                        &result.journal_entries,
622                        &output_dir.join("journal_entries.json"),
623                        "Journal entries (JSON)",
624                    ) {
625                        warn!("Failed to write journal_entries.json: {}", e);
626                    }
627                });
628            }
629        });
630    }
631
632    // ========================================================================
633    // Master Data
634    // ========================================================================
635    let md_dir = output_dir.join("master_data");
636    if !result.master_data.vendors.is_empty()
637        || !result.master_data.customers.is_empty()
638        || !result.master_data.materials.is_empty()
639        || !result.master_data.assets.is_empty()
640        || !result.master_data.employees.is_empty()
641        || !result.master_data.cost_centers.is_empty()
642        || !result.master_data.profit_centers.is_empty()
643    {
644        std::fs::create_dir_all(&md_dir)?;
645        info!("Writing master data...");
646
647        write_json_safe(
648            &result.master_data.vendors,
649            &md_dir.join("vendors.json"),
650            "Vendors",
651        );
652        write_json_safe(
653            &result.master_data.customers,
654            &md_dir.join("customers.json"),
655            "Customers",
656        );
657        write_json_safe(
658            &result.master_data.materials,
659            &md_dir.join("materials.json"),
660            "Materials",
661        );
662        write_json_safe(
663            &result.master_data.assets,
664            &md_dir.join("fixed_assets.json"),
665            "Fixed assets",
666        );
667        write_json_safe(
668            &result.master_data.employees,
669            &md_dir.join("employees.json"),
670            "Employees",
671        );
672        write_json_safe(
673            &result.master_data.cost_centers,
674            &md_dir.join("cost_centers.json"),
675            "Cost centers",
676        );
677        // v5.1: profit-centre hierarchy (segments + sub-units).
678        write_json_safe(
679            &result.master_data.profit_centers,
680            &md_dir.join("profit_centers.json"),
681            "Profit centres",
682        );
683        // v3.3.0: organizational profiles (one per company)
684        write_json_safe(
685            &result.master_data.organizational_profiles,
686            &md_dir.join("organizational_profiles.json"),
687            "Organizational profiles (v3.3.0)",
688        );
689    }
690
691    // ========================================================================
692    // Document Flows
693    // ========================================================================
694    let df_dir = output_dir.join("document_flows");
695    let flat_mode = export_layout == datasynth_config::ExportLayout::Flat;
696    if !result.document_flows.purchase_orders.is_empty()
697        || !result.document_flows.sales_orders.is_empty()
698    {
699        std::fs::create_dir_all(&df_dir)?;
700        info!("Writing document flows...");
701
702        write_json_auto(
703            &result.document_flows.purchase_orders,
704            &df_dir.join("purchase_orders.json"),
705            "Purchase orders",
706            flat_mode,
707        );
708        write_json_auto(
709            &result.document_flows.goods_receipts,
710            &df_dir.join("goods_receipts.json"),
711            "Goods receipts",
712            flat_mode,
713        );
714        write_json_auto(
715            &result.document_flows.vendor_invoices,
716            &df_dir.join("vendor_invoices.json"),
717            "Vendor invoices",
718            flat_mode,
719        );
720        write_json_auto(
721            &result.document_flows.payments,
722            &df_dir.join("payments.json"),
723            "Payments",
724            flat_mode,
725        );
726        let customer_receipts: Vec<_> = result
727            .document_flows
728            .payments
729            .iter()
730            .filter(|p| p.payment_type == PaymentType::ArReceipt)
731            .collect();
732        write_json_auto(
733            &customer_receipts,
734            &df_dir.join("customer_receipts.json"),
735            "Customer receipts",
736            flat_mode,
737        );
738        write_json_auto(
739            &result.document_flows.sales_orders,
740            &df_dir.join("sales_orders.json"),
741            "Sales orders",
742            flat_mode,
743        );
744        write_json_auto(
745            &result.document_flows.deliveries,
746            &df_dir.join("deliveries.json"),
747            "Deliveries",
748            flat_mode,
749        );
750        write_json_auto(
751            &result.document_flows.customer_invoices,
752            &df_dir.join("customer_invoices.json"),
753            "Customer invoices",
754            flat_mode,
755        );
756
757        // Document cross-references (PO→GR, GR→Invoice, Invoice→Payment, etc.).
758        // v4.4.2+: inject SDK-friendly `from_type`/`from_id`/`to_type`/`to_id`
759        // aliases so consumers that follow the graph convention see the
760        // types populated. The canonical `source_doc_*`/`target_doc_*`
761        // keys continue to emit unchanged for backwards compatibility.
762        match serde_json::to_value(&result.document_flows.document_references) {
763            Ok(mut v) => {
764                if let Some(arr) = v.as_array_mut() {
765                    for r in arr.iter_mut() {
766                        if let Some(obj) = r.as_object_mut() {
767                            if let Some(st) = obj.get("source_doc_type").cloned() {
768                                obj.entry("from_type").or_insert(st);
769                            }
770                            if let Some(si) = obj.get("source_doc_id").cloned() {
771                                obj.entry("from_id").or_insert(si);
772                            }
773                            if let Some(tt) = obj.get("target_doc_type").cloned() {
774                                obj.entry("to_type").or_insert(tt);
775                            }
776                            if let Some(ti) = obj.get("target_doc_id").cloned() {
777                                obj.entry("to_id").or_insert(ti);
778                            }
779                        }
780                    }
781                }
782                match serde_json::to_string_pretty(&v) {
783                    Ok(json) => {
784                        let path = df_dir.join("document_references.json");
785                        if let Err(e) = std::fs::write(&path, json) {
786                            warn!("Failed to write document references: {}", e);
787                        } else {
788                            info!(
789                                "  Document references written: {} records -> {}",
790                                result.document_flows.document_references.len(),
791                                path.display()
792                            );
793                        }
794                    }
795                    Err(e) => warn!("Failed to serialize document references: {}", e),
796                }
797            }
798            Err(e) => warn!("Failed to build document references Value: {}", e),
799        }
800
801        // Note: P2P/O2C chain types do not implement Serialize, so we log
802        // their counts instead. The individual documents above capture all data.
803        if !result.document_flows.p2p_chains.is_empty() {
804            info!(
805                "  P2P chains: {} (data exported via individual document files)",
806                result.document_flows.p2p_chains.len()
807            );
808        }
809        if !result.document_flows.o2c_chains.is_empty() {
810            info!(
811                "  O2C chains: {} (data exported via individual document files)",
812                result.document_flows.o2c_chains.len()
813            );
814        }
815    }
816
817    // ========================================================================
818    // Subledger
819    // ========================================================================
820    let sl_dir = output_dir.join("subledger");
821    if !result.subledger.ap_invoices.is_empty()
822        || !result.subledger.ar_invoices.is_empty()
823        || !result.subledger.fa_records.is_empty()
824        || !result.subledger.inventory_positions.is_empty()
825    {
826        std::fs::create_dir_all(&sl_dir)?;
827        info!("Writing subledger data...");
828
829        write_json_safe(
830            &result.subledger.ap_invoices,
831            &sl_dir.join("ap_invoices.json"),
832            "AP invoices",
833        );
834        write_json_safe(
835            &result.subledger.ar_invoices,
836            &sl_dir.join("ar_invoices.json"),
837            "AR invoices",
838        );
839        write_json_safe(
840            &result.subledger.fa_records,
841            &sl_dir.join("fa_records.json"),
842            "FA records",
843        );
844        write_json_safe(
845            &result.subledger.inventory_positions,
846            &sl_dir.join("inventory_positions.json"),
847            "Inventory positions",
848        );
849        write_json_safe(
850            &result.subledger.inventory_movements,
851            &sl_dir.join("inventory_movements.json"),
852            "Inventory movements",
853        );
854        write_json_safe(
855            &result.subledger.ar_aging_reports,
856            &sl_dir.join("ar_aging.json"),
857            "AR aging reports",
858        );
859        write_json_safe(
860            &result.subledger.ap_aging_reports,
861            &sl_dir.join("ap_aging.json"),
862            "AP aging reports",
863        );
864        write_json_safe(
865            &result.subledger.depreciation_runs,
866            &sl_dir.join("depreciation_runs.json"),
867            "Depreciation runs",
868        );
869        write_json_safe(
870            &result.subledger.inventory_valuations,
871            &sl_dir.join("inventory_valuation.json"),
872            "Inventory valuations",
873        );
874        // Dunning runs and letters (generated after AR aging)
875        write_json_safe(
876            &result.subledger.dunning_runs,
877            &sl_dir.join("dunning_runs.json"),
878            "Dunning runs",
879        );
880        write_json_safe(
881            &result.subledger.dunning_letters,
882            &sl_dir.join("dunning_letters.json"),
883            "Dunning letters",
884        );
885    }
886
887    // ========================================================================
888    // Audit
889    // ========================================================================
890    let audit_dir = output_dir.join("audit");
891    if !result.audit.engagements.is_empty() {
892        std::fs::create_dir_all(&audit_dir)?;
893        info!("Writing audit data...");
894
895        write_json_safe(
896            &result.audit.engagements,
897            &audit_dir.join("audit_engagements.json"),
898            "Audit engagements",
899        );
900        write_json_safe(
901            &result.audit.audit_scopes,
902            &audit_dir.join("audit_scopes.json"),
903            "Audit scopes (ISA 220 / ISA 300)",
904        );
905        write_json_safe(
906            &result.audit.workpapers,
907            &audit_dir.join("audit_workpapers.json"),
908            "Audit workpapers",
909        );
910        write_json_safe(
911            &result.audit.evidence,
912            &audit_dir.join("audit_evidence.json"),
913            "Audit evidence",
914        );
915        write_json_safe(
916            &result.audit.risk_assessments,
917            &audit_dir.join("audit_risk_assessments.json"),
918            "Audit risk assessments",
919        );
920        write_json_safe(
921            &result.audit.findings,
922            &audit_dir.join("audit_findings.json"),
923            "Audit findings",
924        );
925        write_json_safe(
926            &result.audit.judgments,
927            &audit_dir.join("audit_judgments.json"),
928            "Audit judgments",
929        );
930        write_json_safe(
931            &result.audit.confirmations,
932            &audit_dir.join("audit_confirmations.json"),
933            "Audit confirmations",
934        );
935        write_json_safe(
936            &result.audit.confirmation_responses,
937            &audit_dir.join("audit_confirmation_responses.json"),
938            "Audit confirmation responses",
939        );
940        write_json_safe(
941            &result.audit.procedure_steps,
942            &audit_dir.join("audit_procedure_steps.json"),
943            "Audit procedure steps",
944        );
945        write_json_safe(
946            &result.audit.samples,
947            &audit_dir.join("audit_samples.json"),
948            "Audit samples",
949        );
950        write_json_safe(
951            &result.audit.analytical_results,
952            &audit_dir.join("audit_analytical_results.json"),
953            "Audit analytical results",
954        );
955        write_json_safe(
956            &result.audit.ia_functions,
957            &audit_dir.join("audit_ia_functions.json"),
958            "Audit IA functions",
959        );
960        write_json_safe(
961            &result.audit.ia_reports,
962            &audit_dir.join("audit_ia_reports.json"),
963            "Audit IA reports",
964        );
965        write_json_safe(
966            &result.audit.related_parties,
967            &audit_dir.join("audit_related_parties.json"),
968            "Audit related parties",
969        );
970        write_json_safe(
971            &result.audit.related_party_transactions,
972            &audit_dir.join("audit_related_party_transactions.json"),
973            "Audit related party transactions",
974        );
975        // ISA 600: Group audit artefacts
976        if !result.audit.component_auditors.is_empty() {
977            write_json_safe(
978                &result.audit.component_auditors,
979                &audit_dir.join("component_auditors.json"),
980                "Component auditors (ISA 600)",
981            );
982            if let Some(plan) = &result.audit.group_audit_plan {
983                write_json_single_safe(
984                    plan,
985                    &audit_dir.join("group_audit_plan.json"),
986                    "Group audit plan (ISA 600)",
987                );
988            }
989            write_json_safe(
990                &result.audit.component_instructions,
991                &audit_dir.join("component_instructions.json"),
992                "Component instructions (ISA 600)",
993            );
994            write_json_safe(
995                &result.audit.component_reports,
996                &audit_dir.join("component_reports.json"),
997                "Component auditor reports (ISA 600)",
998            );
999        }
1000        // ISA 210: Engagement letters
1001        write_json_safe(
1002            &result.audit.engagement_letters,
1003            &audit_dir.join("engagement_letters.json"),
1004            "Engagement letters (ISA 210)",
1005        );
1006        // ISA 560 / IAS 10: Subsequent events
1007        write_json_safe(
1008            &result.audit.subsequent_events,
1009            &audit_dir.join("subsequent_events.json"),
1010            "Subsequent events (ISA 560 / IAS 10)",
1011        );
1012        // ISA 402: Service organization controls
1013        write_json_safe(
1014            &result.audit.service_organizations,
1015            &audit_dir.join("service_organizations.json"),
1016            "Service organizations (ISA 402)",
1017        );
1018        write_json_safe(
1019            &result.audit.soc_reports,
1020            &audit_dir.join("soc_reports.json"),
1021            "SOC reports (ISA 402)",
1022        );
1023        write_json_safe(
1024            &result.audit.user_entity_controls,
1025            &audit_dir.join("user_entity_controls.json"),
1026            "User entity controls (ISA 402)",
1027        );
1028
1029        // ISA 570: Going concern assessments
1030        write_json_safe(
1031            &result.audit.going_concern_assessments,
1032            &audit_dir.join("going_concern_assessments.json"),
1033            "Going concern assessments (ISA 570)",
1034        );
1035
1036        // ISA 540: Accounting estimates
1037        write_json_safe(
1038            &result.audit.accounting_estimates,
1039            &audit_dir.join("accounting_estimates.json"),
1040            "Accounting estimates (ISA 540)",
1041        );
1042
1043        // ISA 700/701/705/706: Audit opinions and Key Audit Matters.
1044        // Always write even if the vec is empty; see the always-emit block
1045        // below (outside the `engagements.is_empty()` guard) for the case
1046        // where audit is entirely disabled — the files still appear in the
1047        // archive with `[]` so SDK consumers don't get 404s on the manifest.
1048        write_json_always(
1049            &result.audit.audit_opinions,
1050            &audit_dir.join("audit_opinions.json"),
1051            "Audit opinions (ISA 700/705/706)",
1052        );
1053        write_json_always(
1054            &result.audit.key_audit_matters,
1055            &audit_dir.join("key_audit_matters.json"),
1056            "Key Audit Matters (ISA 701)",
1057        );
1058
1059        // SOX 302 / 404
1060        if !result.audit.sox_302_certifications.is_empty() {
1061            write_json_safe(
1062                &result.audit.sox_302_certifications,
1063                &audit_dir.join("sox_302_certifications.json"),
1064                "SOX 302 certifications",
1065            );
1066            write_json_safe(
1067                &result.audit.sox_404_assessments,
1068                &audit_dir.join("sox_404_assessments.json"),
1069                "SOX 404 ICFR assessments",
1070            );
1071        }
1072
1073        // ISA 320: Materiality calculations
1074        if !result.audit.materiality_calculations.is_empty() {
1075            write_json_safe(
1076                &result.audit.materiality_calculations,
1077                &audit_dir.join("materiality_calculations.json"),
1078                "Materiality calculations (ISA 320)",
1079            );
1080        }
1081
1082        // ISA 315: Combined Risk Assessments
1083        if !result.audit.combined_risk_assessments.is_empty() {
1084            write_json_safe(
1085                &result.audit.combined_risk_assessments,
1086                &audit_dir.join("combined_risk_assessments.json"),
1087                "Combined Risk Assessments (ISA 315)",
1088            );
1089        }
1090
1091        // ISA 530: Sampling Plans and Sampled Items
1092        if !result.audit.sampling_plans.is_empty() {
1093            write_json_safe(
1094                &result.audit.sampling_plans,
1095                &audit_dir.join("sampling_plans.json"),
1096                "Sampling plans (ISA 530)",
1097            );
1098            write_json_safe(
1099                &result.audit.sampled_items,
1100                &audit_dir.join("sampled_items.json"),
1101                "Sampled items (ISA 530)",
1102            );
1103        }
1104
1105        // ISA 315: Significant Classes of Transactions (SCOTS)
1106        if !result.audit.significant_transaction_classes.is_empty() {
1107            write_json_safe(
1108                &result.audit.significant_transaction_classes,
1109                &audit_dir.join("significant_transaction_classes.json"),
1110                "Significant Classes of Transactions / SCOTS (ISA 315)",
1111            );
1112        }
1113
1114        // ISA 520: Unusual Item Markers
1115        if !result.audit.unusual_items.is_empty() {
1116            write_json_safe(
1117                &result.audit.unusual_items,
1118                &audit_dir.join("unusual_items.json"),
1119                "Unusual item flags (ISA 520)",
1120            );
1121        }
1122
1123        // ISA 520: Analytical Relationships
1124        if !result.audit.analytical_relationships.is_empty() {
1125            write_json_safe(
1126                &result.audit.analytical_relationships,
1127                &audit_dir.join("analytical_relationships.json"),
1128                "Analytical relationships (ISA 520)",
1129            );
1130        }
1131
1132        // PCAOB-ISA cross-reference mappings
1133        if !result.audit.isa_pcaob_mappings.is_empty() {
1134            write_json_safe(
1135                &result.audit.isa_pcaob_mappings,
1136                &audit_dir.join("isa_pcaob_mappings.json"),
1137                "PCAOB-ISA standard mappings",
1138            );
1139        }
1140
1141        // ISA standard reference (number, title, series for all 34 ISA standards)
1142        if !result.audit.isa_mappings.is_empty() {
1143            write_json_safe(
1144                &result.audit.isa_mappings,
1145                &audit_dir.join("isa_mappings.json"),
1146                "ISA standard reference mappings",
1147            );
1148        }
1149
1150        // FSM event trail (when audit.fsm.enabled: true)
1151        if let Some(ref event_trail) = result.audit.fsm_event_trail {
1152            if !event_trail.is_empty() {
1153                write_json_safe(
1154                    event_trail,
1155                    &audit_dir.join("fsm_event_trail.json"),
1156                    "FSM audit event trail",
1157                );
1158            }
1159        }
1160
1161        // v3.3.0: legal documents (when compliance_regulations.legal_documents.enabled)
1162        write_json_safe(
1163            &result.audit.legal_documents,
1164            &audit_dir.join("legal_documents.json"),
1165            "Legal documents (v3.3.0)",
1166        );
1167
1168        // v3.3.0: IT general controls — access logs + change records
1169        write_json_safe(
1170            &result.audit.it_controls_access_logs,
1171            &audit_dir.join("it_controls_access_logs.json"),
1172            "IT general controls — access logs (v3.3.0)",
1173        );
1174        write_json_safe(
1175            &result.audit.it_controls_change_records,
1176            &audit_dir.join("it_controls_change_records.json"),
1177            "IT general controls — change management records (v3.3.0)",
1178        );
1179    } else {
1180        // Audit phase disabled or ran with no engagements — still emit
1181        // audit_opinions.json + key_audit_matters.json so the archive
1182        // structure is consistent and SDK consumers can rely on these
1183        // files always existing. v3.1 announced these as archive-shipping
1184        // files; v3.1.1 guarantees it regardless of audit.enabled.
1185        std::fs::create_dir_all(&audit_dir)?;
1186        write_json_always(
1187            &result.audit.audit_opinions,
1188            &audit_dir.join("audit_opinions.json"),
1189            "Audit opinions (ISA 700/705/706) — empty (audit phase disabled)",
1190        );
1191        write_json_always(
1192            &result.audit.key_audit_matters,
1193            &audit_dir.join("key_audit_matters.json"),
1194            "Key Audit Matters (ISA 701) — empty (audit phase disabled)",
1195        );
1196    }
1197
1198    // ========================================================================
1199    // Banking (JSON - keep existing format for backward compat)
1200    // ========================================================================
1201    let banking_dir = output_dir.join("banking");
1202    if !result.banking.customers.is_empty() {
1203        std::fs::create_dir_all(&banking_dir)?;
1204        info!("Writing banking data...");
1205
1206        // v4.4.2: dual-key risk tier. SDK consumers inspect `risk_level`;
1207        // the struct stores it as `risk_tier` for historical reasons.
1208        // Serialize through a `serde_json::Value` so we can inject the
1209        // `risk_level` alias key on every customer row without touching
1210        // the `BankingCustomer` Serialize impl (which has 40+ fields).
1211        match serde_json::to_value(&result.banking.customers) {
1212            Ok(mut v) => {
1213                if let Some(arr) = v.as_array_mut() {
1214                    for c in arr.iter_mut() {
1215                        if let Some(obj) = c.as_object_mut() {
1216                            if let Some(rt) = obj.get("risk_tier").cloned() {
1217                                obj.entry("risk_level").or_insert(rt);
1218                            }
1219                        }
1220                    }
1221                }
1222                match serde_json::to_string_pretty(&v) {
1223                    Ok(json) => {
1224                        let path = banking_dir.join("banking_customers.json");
1225                        if let Err(e) = std::fs::write(&path, json) {
1226                            warn!("Failed to write banking_customers.json: {}", e);
1227                        } else {
1228                            info!(
1229                                "  Banking customers written: {} records -> {}",
1230                                result.banking.customers.len(),
1231                                path.display()
1232                            );
1233                        }
1234                    }
1235                    Err(e) => warn!("Failed to serialize banking customers: {}", e),
1236                }
1237            }
1238            Err(e) => warn!("Failed to build banking customers Value: {}", e),
1239        }
1240        write_json_safe(
1241            &result.banking.accounts,
1242            &banking_dir.join("banking_accounts.json"),
1243            "Banking accounts",
1244        );
1245        write_json_safe(
1246            &result.banking.transactions,
1247            &banking_dir.join("banking_transactions.json"),
1248            "Banking transactions",
1249        );
1250        write_json_safe(
1251            &result.banking.transaction_labels,
1252            &banking_dir.join("aml_transaction_labels.json"),
1253            "AML transaction labels",
1254        );
1255        write_json_safe(
1256            &result.banking.customer_labels,
1257            &banking_dir.join("aml_customer_labels.json"),
1258            "AML customer labels",
1259        );
1260        write_json_safe(
1261            &result.banking.account_labels,
1262            &banking_dir.join("aml_account_labels.json"),
1263            "AML account labels",
1264        );
1265        write_json_safe(
1266            &result.banking.relationship_labels,
1267            &banking_dir.join("aml_relationship_labels.json"),
1268            "AML relationship labels",
1269        );
1270        write_json_safe(
1271            &result.banking.narratives,
1272            &banking_dir.join("aml_narratives.json"),
1273            "AML narratives",
1274        );
1275    }
1276
1277    // ========================================================================
1278    // Sourcing (S2C)
1279    // ========================================================================
1280    let s2c_dir = output_dir.join("sourcing");
1281    if !result.sourcing.spend_analyses.is_empty() || !result.sourcing.sourcing_projects.is_empty() {
1282        std::fs::create_dir_all(&s2c_dir)?;
1283        info!("Writing sourcing (S2C) data...");
1284
1285        write_json_safe(
1286            &result.sourcing.spend_analyses,
1287            &s2c_dir.join("spend_analyses.json"),
1288            "Spend analyses",
1289        );
1290        write_json_safe(
1291            &result.sourcing.sourcing_projects,
1292            &s2c_dir.join("sourcing_projects.json"),
1293            "Sourcing projects",
1294        );
1295        write_json_safe(
1296            &result.sourcing.qualifications,
1297            &s2c_dir.join("supplier_qualifications.json"),
1298            "Supplier qualifications",
1299        );
1300        write_json_safe(
1301            &result.sourcing.rfx_events,
1302            &s2c_dir.join("rfx_events.json"),
1303            "RFx events",
1304        );
1305        write_json_safe(
1306            &result.sourcing.bids,
1307            &s2c_dir.join("supplier_bids.json"),
1308            "Supplier bids",
1309        );
1310        write_json_safe(
1311            &result.sourcing.bid_evaluations,
1312            &s2c_dir.join("bid_evaluations.json"),
1313            "Bid evaluations",
1314        );
1315        write_json_safe(
1316            &result.sourcing.contracts,
1317            &s2c_dir.join("procurement_contracts.json"),
1318            "Procurement contracts",
1319        );
1320        write_json_safe(
1321            &result.sourcing.catalog_items,
1322            &s2c_dir.join("catalog_items.json"),
1323            "Catalog items",
1324        );
1325        write_json_safe(
1326            &result.sourcing.scorecards,
1327            &s2c_dir.join("supplier_scorecards.json"),
1328            "Supplier scorecards",
1329        );
1330    }
1331
1332    // ========================================================================
1333    // Intercompany
1334    // ========================================================================
1335    let ic_dir = output_dir.join("intercompany");
1336    if result.intercompany.group_structure.is_some()
1337        || !result.intercompany.matched_pairs.is_empty()
1338    {
1339        std::fs::create_dir_all(&ic_dir)?;
1340        info!("Writing intercompany data...");
1341
1342        // Always write group structure when present (independent of IC transactions).
1343        if let Some(gs) = &result.intercompany.group_structure {
1344            write_json_single_safe(gs, &ic_dir.join("group_structure.json"), "Group structure");
1345        }
1346
1347        write_json_safe(
1348            &result.intercompany.matched_pairs,
1349            &ic_dir.join("ic_matched_pairs.json"),
1350            "IC matched pairs",
1351        );
1352        write_json_safe(
1353            &result.intercompany.seller_journal_entries,
1354            &ic_dir.join("ic_seller_journal_entries.json"),
1355            "IC seller journal entries",
1356        );
1357        write_json_safe(
1358            &result.intercompany.buyer_journal_entries,
1359            &ic_dir.join("ic_buyer_journal_entries.json"),
1360            "IC buyer journal entries",
1361        );
1362        write_json_safe(
1363            &result.intercompany.elimination_entries,
1364            &ic_dir.join("ic_elimination_entries.json"),
1365            "IC elimination entries",
1366        );
1367
1368        // NCI measurements from group structure ownership percentages
1369        if !result.intercompany.nci_measurements.is_empty() {
1370            write_json_safe(
1371                &result.intercompany.nci_measurements,
1372                &ic_dir.join("nci_measurements.json"),
1373                "NCI measurements",
1374            );
1375        }
1376    }
1377
1378    // ========================================================================
1379    // Financial Reporting
1380    // ========================================================================
1381    let fin_dir = output_dir.join("financial_reporting");
1382    if !result.financial_reporting.financial_statements.is_empty()
1383        || !result.financial_reporting.bank_reconciliations.is_empty()
1384        || !result
1385            .financial_reporting
1386            .consolidated_statements
1387            .is_empty()
1388    {
1389        std::fs::create_dir_all(&fin_dir)?;
1390        info!("Writing financial reporting data...");
1391
1392        // Legacy flat file (all standalone statements combined)
1393        write_json_safe(
1394            &result.financial_reporting.financial_statements,
1395            &fin_dir.join("financial_statements.json"),
1396            "Financial statements",
1397        );
1398
1399        // Per-entity standalone statements
1400        if !result.financial_reporting.standalone_statements.is_empty() {
1401            let standalone_dir = fin_dir.join("standalone");
1402            std::fs::create_dir_all(&standalone_dir)?;
1403            for (entity_code, stmts) in &result.financial_reporting.standalone_statements {
1404                let file_name = format!("{}_financial_statements.json", entity_code);
1405                write_json_safe(
1406                    stmts,
1407                    &standalone_dir.join(&file_name),
1408                    &format!("Standalone statements for {}", entity_code),
1409                );
1410            }
1411        }
1412
1413        // Consolidated statements + schedule
1414        if !result
1415            .financial_reporting
1416            .consolidated_statements
1417            .is_empty()
1418            || !result
1419                .financial_reporting
1420                .consolidation_schedules
1421                .is_empty()
1422        {
1423            let consolidated_dir = fin_dir.join("consolidated");
1424            std::fs::create_dir_all(&consolidated_dir)?;
1425            write_json_safe(
1426                &result.financial_reporting.consolidated_statements,
1427                &consolidated_dir.join("consolidated_financial_statements.json"),
1428                "Consolidated financial statements",
1429            );
1430            write_json_safe(
1431                &result.financial_reporting.consolidation_schedules,
1432                &consolidated_dir.join("consolidation_schedule.json"),
1433                "Consolidation schedule",
1434            );
1435        }
1436
1437        write_json_safe(
1438            &result.financial_reporting.bank_reconciliations,
1439            &fin_dir.join("bank_reconciliations.json"),
1440            "Bank reconciliations",
1441        );
1442
1443        // IFRS 8 / ASC 280 Segment Reporting
1444        if !result.financial_reporting.segment_reports.is_empty()
1445            || !result
1446                .financial_reporting
1447                .segment_reconciliations
1448                .is_empty()
1449        {
1450            let seg_dir = fin_dir.join("segment_reporting");
1451            std::fs::create_dir_all(&seg_dir)?;
1452            write_json_safe(
1453                &result.financial_reporting.segment_reports,
1454                &seg_dir.join("segment_reports.json"),
1455                "Segment reports",
1456            );
1457            write_json_safe(
1458                &result.financial_reporting.segment_reconciliations,
1459                &seg_dir.join("segment_reconciliations.json"),
1460                "Segment reconciliations",
1461            );
1462        }
1463
1464        // IAS 1 / ASC 235: Notes to financial statements
1465        write_json_safe(
1466            &result.financial_reporting.notes_to_financial_statements,
1467            &fin_dir.join("notes_to_financial_statements.json"),
1468            "Notes to financial statements",
1469        );
1470    }
1471
1472    // ========================================================================
1473    // Period-Close Trial Balances
1474    // ========================================================================
1475    //
1476    // v5.1: convert each in-memory `PeriodTrialBalance` to the
1477    // canonical `datasynth_core::models::balance::TrialBalance` before
1478    // writing.  The on-disk shape is now identical to what the group
1479    // aggregate phase loads via `tb_loader::load_entity_trial_balance`,
1480    // so the loader's v5.0 dual-shape detection (`PeriodTrialBalanceOnDisk`
1481    // → `TrialBalance` synthesis) is no longer required.
1482    if !result.financial_reporting.trial_balances.is_empty() {
1483        let pc_dir = output_dir.join("period_close");
1484        std::fs::create_dir_all(&pc_dir)?;
1485        info!(
1486            "Writing {} period-close trial balances...",
1487            result.financial_reporting.trial_balances.len()
1488        );
1489        // Pick the first JE's company_code + currency as the
1490        // canonical identifiers; the orchestrator only emits one TB
1491        // per period (gated by `if company_idx == 0` at the push
1492        // site), so all trial-balance entries belong to that company.
1493        // Fallback to safe defaults when the JE list is empty
1494        // (effectively only test fixtures).
1495        let (company_code, currency) = result
1496            .journal_entries
1497            .first()
1498            .map(|je| (je.header.company_code.as_str(), je.header.currency.as_str()))
1499            .unwrap_or(("UNKNOWN", "USD"));
1500        let canonical: Vec<datasynth_core::models::balance::TrialBalance> = result
1501            .financial_reporting
1502            .trial_balances
1503            .iter()
1504            .cloned()
1505            .map(|tb| tb.into_canonical(company_code, currency))
1506            .collect();
1507        write_json_safe(
1508            &canonical,
1509            &pc_dir.join("trial_balances.json"),
1510            "Period-close trial balances (canonical)",
1511        );
1512    }
1513
1514    // ========================================================================
1515    // Balance: Opening Balances + GL-Subledger Reconciliation
1516    // ========================================================================
1517    if !result.opening_balances.is_empty() || !result.subledger_reconciliation.is_empty() {
1518        let balance_dir = output_dir.join("balance");
1519        std::fs::create_dir_all(&balance_dir)?;
1520        info!("Writing balance data...");
1521
1522        write_json_safe(
1523            &result.opening_balances,
1524            &balance_dir.join("opening_balances.json"),
1525            "Opening balances",
1526        );
1527        write_json_safe(
1528            &result.subledger_reconciliation,
1529            &balance_dir.join("subledger_reconciliation.json"),
1530            "Subledger reconciliation",
1531        );
1532    }
1533
1534    // ========================================================================
1535    // HR (Payroll, Time Entries, Expense Reports, Benefit Enrollments, Pensions)
1536    // ========================================================================
1537    let hr_dir = output_dir.join("hr");
1538    if !result.hr.payroll_runs.is_empty()
1539        || !result.hr.time_entries.is_empty()
1540        || !result.hr.expense_reports.is_empty()
1541        || !result.hr.benefit_enrollments.is_empty()
1542        || !result.hr.pension_plans.is_empty()
1543        || !result.hr.stock_grants.is_empty()
1544        || !result.master_data.employee_change_history.is_empty()
1545    {
1546        std::fs::create_dir_all(&hr_dir)?;
1547        info!("Writing HR data...");
1548
1549        write_json_safe(
1550            &result.hr.payroll_runs,
1551            &hr_dir.join("payroll_runs.json"),
1552            "Payroll runs",
1553        );
1554        write_json_safe(
1555            &result.hr.payroll_line_items,
1556            &hr_dir.join("payroll_line_items.json"),
1557            "Payroll line items",
1558        );
1559        write_json_safe(
1560            &result.hr.time_entries,
1561            &hr_dir.join("time_entries.json"),
1562            "Time entries",
1563        );
1564        write_json_safe(
1565            &result.hr.expense_reports,
1566            &hr_dir.join("expense_reports.json"),
1567            "Expense reports",
1568        );
1569        write_json_safe(
1570            &result.hr.benefit_enrollments,
1571            &hr_dir.join("benefit_enrollments.json"),
1572            "Benefit enrollments",
1573        );
1574        write_json_safe(
1575            &result.hr.pension_plans,
1576            &hr_dir.join("pension_plans.json"),
1577            "Pension plans",
1578        );
1579        write_json_safe(
1580            &result.hr.pension_obligations,
1581            &hr_dir.join("pension_obligations.json"),
1582            "Pension obligations",
1583        );
1584        write_json_safe(
1585            &result.hr.pension_plan_assets,
1586            &hr_dir.join("plan_assets.json"),
1587            "Plan assets",
1588        );
1589        write_json_safe(
1590            &result.hr.pension_disclosures,
1591            &hr_dir.join("pension_disclosures.json"),
1592            "Pension disclosures",
1593        );
1594        write_json_safe(
1595            &result.hr.stock_grants,
1596            &hr_dir.join("stock_grants.json"),
1597            "Stock grants",
1598        );
1599        write_json_safe(
1600            &result.hr.stock_comp_expenses,
1601            &hr_dir.join("stock_comp_expense.json"),
1602            "Stock comp expense",
1603        );
1604        write_json_safe(
1605            &result.master_data.employee_change_history,
1606            &hr_dir.join("employee_change_history.json"),
1607            "Employee change history",
1608        );
1609    }
1610
1611    // ========================================================================
1612    // Manufacturing
1613    // ========================================================================
1614    let mfg_dir = output_dir.join("manufacturing");
1615    if !result.manufacturing.production_orders.is_empty()
1616        || !result.manufacturing.quality_inspections.is_empty()
1617        || !result.manufacturing.cycle_counts.is_empty()
1618        || !result.manufacturing.bom_components.is_empty()
1619        || !result.manufacturing.inventory_movements.is_empty()
1620    {
1621        std::fs::create_dir_all(&mfg_dir)?;
1622        info!("Writing manufacturing data...");
1623
1624        write_json_safe(
1625            &result.manufacturing.production_orders,
1626            &mfg_dir.join("production_orders.json"),
1627            "Production orders",
1628        );
1629        write_json_safe(
1630            &result.manufacturing.quality_inspections,
1631            &mfg_dir.join("quality_inspections.json"),
1632            "Quality inspections",
1633        );
1634        write_json_safe(
1635            &result.manufacturing.cycle_counts,
1636            &mfg_dir.join("cycle_counts.json"),
1637            "Cycle counts",
1638        );
1639        write_json_safe(
1640            &result.manufacturing.bom_components,
1641            &mfg_dir.join("bom_components.json"),
1642            "BOM components",
1643        );
1644        write_json_safe(
1645            &result.manufacturing.inventory_movements,
1646            &mfg_dir.join("inventory_movements.json"),
1647            "Inventory movements",
1648        );
1649    }
1650
1651    // ========================================================================
1652    // Sales, KPIs, Budgets
1653    // ========================================================================
1654    let sales_dir = output_dir.join("sales_kpi_budgets");
1655    if !result.sales_kpi_budgets.sales_quotes.is_empty()
1656        || !result.sales_kpi_budgets.kpis.is_empty()
1657        || !result.sales_kpi_budgets.budgets.is_empty()
1658    {
1659        std::fs::create_dir_all(&sales_dir)?;
1660        info!("Writing sales, KPI, and budget data...");
1661
1662        write_json_safe(
1663            &result.sales_kpi_budgets.sales_quotes,
1664            &sales_dir.join("sales_quotes.json"),
1665            "Sales quotes",
1666        );
1667        write_json_safe(
1668            &result.sales_kpi_budgets.kpis,
1669            &sales_dir.join("management_kpis.json"),
1670            "Management KPIs",
1671        );
1672        write_json_safe(
1673            &result.sales_kpi_budgets.budgets,
1674            &sales_dir.join("budgets.json"),
1675            "Budgets",
1676        );
1677    }
1678
1679    // ========================================================================
1680    // Tax
1681    // ========================================================================
1682    let tax_dir = output_dir.join("tax");
1683    if !result.tax.jurisdictions.is_empty()
1684        || !result.tax.codes.is_empty()
1685        || !result.tax.tax_provisions.is_empty()
1686    {
1687        std::fs::create_dir_all(&tax_dir)?;
1688        info!("Writing tax data...");
1689
1690        write_json_safe(
1691            &result.tax.jurisdictions,
1692            &tax_dir.join("tax_jurisdictions.json"),
1693            "Tax jurisdictions",
1694        );
1695        write_json_safe(
1696            &result.tax.codes,
1697            &tax_dir.join("tax_codes.json"),
1698            "Tax codes",
1699        );
1700        write_json_safe(
1701            &result.tax.tax_provisions,
1702            &tax_dir.join("tax_provisions.json"),
1703            "Tax provisions",
1704        );
1705        write_json_safe(
1706            &result.tax.tax_lines,
1707            &tax_dir.join("tax_lines.json"),
1708            "Tax lines",
1709        );
1710        write_json_safe(
1711            &result.tax.tax_returns,
1712            &tax_dir.join("tax_returns.json"),
1713            "Tax returns",
1714        );
1715        write_json_safe(
1716            &result.tax.withholding_records,
1717            &tax_dir.join("withholding_records.json"),
1718            "Withholding tax records",
1719        );
1720        if !result.tax.tax_anomaly_labels.is_empty() {
1721            write_json_safe(
1722                &result.tax.tax_anomaly_labels,
1723                &tax_dir.join("tax_anomaly_labels.json"),
1724                "Tax anomaly labels",
1725            );
1726        }
1727        // Deferred tax engine output (IAS 12 / ASC 740)
1728        if !result.tax.deferred_tax.temporary_differences.is_empty() {
1729            write_json_safe(
1730                &result.tax.deferred_tax.temporary_differences,
1731                &tax_dir.join("temporary_differences.json"),
1732                "Temporary differences",
1733            );
1734            write_json_safe(
1735                &result.tax.deferred_tax.etr_reconciliations,
1736                &tax_dir.join("etr_reconciliation.json"),
1737                "ETR reconciliation",
1738            );
1739            write_json_safe(
1740                &result.tax.deferred_tax.rollforwards,
1741                &tax_dir.join("deferred_tax_rollforward.json"),
1742                "Deferred tax rollforward",
1743            );
1744            write_json_safe(
1745                &result.tax.deferred_tax.journal_entries,
1746                &tax_dir.join("deferred_tax_journal_entries.json"),
1747                "Deferred tax journal entries",
1748            );
1749        }
1750    }
1751
1752    // ========================================================================
1753    // ESG
1754    // ========================================================================
1755    let esg_dir = output_dir.join("esg");
1756    if !result.esg.emissions.is_empty()
1757        || !result.esg.energy.is_empty()
1758        || !result.esg.diversity.is_empty()
1759        || !result.esg.governance.is_empty()
1760    {
1761        std::fs::create_dir_all(&esg_dir)?;
1762        info!("Writing ESG data...");
1763
1764        write_json_safe(
1765            &result.esg.emissions,
1766            &esg_dir.join("emission_records.json"),
1767            "Emission records",
1768        );
1769        write_json_safe(
1770            &result.esg.energy,
1771            &esg_dir.join("energy_consumption.json"),
1772            "Energy consumption",
1773        );
1774        write_json_safe(
1775            &result.esg.water,
1776            &esg_dir.join("water_usage.json"),
1777            "Water usage",
1778        );
1779        write_json_safe(
1780            &result.esg.waste,
1781            &esg_dir.join("waste_records.json"),
1782            "Waste records",
1783        );
1784        write_json_safe(
1785            &result.esg.diversity,
1786            &esg_dir.join("workforce_diversity.json"),
1787            "Workforce diversity",
1788        );
1789        write_json_safe(
1790            &result.esg.pay_equity,
1791            &esg_dir.join("pay_equity.json"),
1792            "Pay equity",
1793        );
1794        write_json_safe(
1795            &result.esg.safety_incidents,
1796            &esg_dir.join("safety_incidents.json"),
1797            "Safety incidents",
1798        );
1799        write_json_safe(
1800            &result.esg.safety_metrics,
1801            &esg_dir.join("safety_metrics.json"),
1802            "Safety metrics",
1803        );
1804        write_json_safe(
1805            &result.esg.governance,
1806            &esg_dir.join("governance_metrics.json"),
1807            "Governance metrics",
1808        );
1809        write_json_safe(
1810            &result.esg.supplier_assessments,
1811            &esg_dir.join("supplier_esg_assessments.json"),
1812            "Supplier ESG assessments",
1813        );
1814        write_json_safe(
1815            &result.esg.materiality,
1816            &esg_dir.join("materiality_assessments.json"),
1817            "Materiality assessments",
1818        );
1819        write_json_safe(
1820            &result.esg.disclosures,
1821            &esg_dir.join("esg_disclosures.json"),
1822            "ESG disclosures",
1823        );
1824        write_json_safe(
1825            &result.esg.climate_scenarios,
1826            &esg_dir.join("climate_scenarios.json"),
1827            "Climate scenarios",
1828        );
1829        write_json_safe(
1830            &result.esg.anomaly_labels,
1831            &esg_dir.join("esg_anomaly_labels.json"),
1832            "ESG anomaly labels",
1833        );
1834    }
1835
1836    // ========================================================================
1837    // Process Mining (OCPM)
1838    // ========================================================================
1839    if let Some(ref event_log) = result.ocpm.event_log {
1840        if !event_log.events.is_empty() || !event_log.objects.is_empty() {
1841            let pm_dir = output_dir.join("process_mining");
1842            std::fs::create_dir_all(&pm_dir)?;
1843            info!("Writing process mining (OCPM) data...");
1844
1845            // Write the full OCEL 2.0 event log. v4.4.2+ patches every
1846            // `object_refs[*].object_type_id` with a companion
1847            // `object_type` key, matching the OCEL 2.0 spec and SDK
1848            // consumer expectations that previously saw `object_type`
1849            // arrive as null. See `add_ocel_object_type_alias` below.
1850            match serde_json::to_value(event_log) {
1851                Ok(mut v) => {
1852                    add_ocel_object_type_alias(&mut v);
1853                    match serde_json::to_string_pretty(&v) {
1854                        Ok(json) => {
1855                            if let Err(e) = std::fs::write(pm_dir.join("event_log.json"), json) {
1856                                warn!("Failed to write OCPM event log: {}", e);
1857                            } else {
1858                                info!(
1859                                    "  Event log written: {} events, {} objects",
1860                                    result.ocpm.event_count, result.ocpm.object_count
1861                                );
1862                            }
1863                        }
1864                        Err(e) => warn!("Failed to serialize OCPM event log: {}", e),
1865                    }
1866                }
1867                Err(e) => warn!("Failed to build OCPM event log Value: {}", e),
1868            }
1869
1870            // Write events separately for easy consumption
1871            if !event_log.events.is_empty() {
1872                match serde_json::to_string_pretty(&event_log.events) {
1873                    Ok(json) => {
1874                        if let Err(e) = std::fs::write(pm_dir.join("events.json"), json) {
1875                            warn!("Failed to write OCPM events: {}", e);
1876                        } else {
1877                            info!("  Events written: {} records", event_log.events.len());
1878                        }
1879                    }
1880                    Err(e) => warn!("Failed to serialize OCPM events: {}", e),
1881                }
1882            }
1883
1884            // Write objects separately for easy consumption
1885            if !event_log.objects.is_empty() {
1886                let objects: Vec<&_> = event_log.objects.iter().collect();
1887                match serde_json::to_string_pretty(&objects) {
1888                    Ok(json) => {
1889                        if let Err(e) = std::fs::write(pm_dir.join("objects.json"), json) {
1890                            warn!("Failed to write OCPM objects: {}", e);
1891                        } else {
1892                            info!("  Objects written: {} records", event_log.objects.len());
1893                        }
1894                    }
1895                    Err(e) => warn!("Failed to serialize OCPM objects: {}", e),
1896                }
1897            }
1898
1899            // Write process variants if any were computed
1900            if !event_log.variants.is_empty() {
1901                let variants: Vec<&_> = event_log.variants.values().collect();
1902                match serde_json::to_string_pretty(&variants) {
1903                    Ok(json) => {
1904                        if let Err(e) = std::fs::write(pm_dir.join("process_variants.json"), json) {
1905                            warn!("Failed to write process variants: {}", e);
1906                        } else {
1907                            info!(
1908                                "  Process variants written: {} variants",
1909                                event_log.variants.len()
1910                            );
1911                        }
1912                    }
1913                    Err(e) => warn!("Failed to serialize process variants: {}", e),
1914                }
1915            }
1916        }
1917    }
1918
1919    // ========================================================================
1920    // Chart of Accounts
1921    // ========================================================================
1922    // Primary file: flat array of accounts (shape stable since v3.x —
1923    // consumers iterate over it).
1924    match serde_json::to_string_pretty(&result.chart_of_accounts.accounts) {
1925        Ok(json) => {
1926            if let Err(e) = std::fs::write(output_dir.join("chart_of_accounts.json"), json) {
1927                warn!("Failed to write chart of accounts: {}", e);
1928            } else {
1929                info!("  Chart of accounts written");
1930            }
1931        }
1932        Err(e) => warn!("Failed to serialize chart of accounts: {}", e),
1933    }
1934    // v4.4.1 — companion metadata file so SDK consumers can read the
1935    // accounting framework + complexity + ID without having to infer
1936    // them from each account row. The SDK team flagged
1937    // `CoA.accounting_framework` arriving as null in v4.1.x; the field
1938    // didn't exist at all until v4.4.1.
1939    let coa_meta = serde_json::json!({
1940        "coa_id": result.chart_of_accounts.coa_id,
1941        "name": result.chart_of_accounts.name,
1942        "country": result.chart_of_accounts.country,
1943        "industry": result.chart_of_accounts.industry,
1944        "complexity": result.chart_of_accounts.complexity,
1945        "account_format": result.chart_of_accounts.account_format,
1946        "accounting_framework": result.chart_of_accounts.accounting_framework,
1947        "account_count": result.chart_of_accounts.accounts.len(),
1948    });
1949    match serde_json::to_string_pretty(&coa_meta) {
1950        Ok(json) => {
1951            if let Err(e) = std::fs::write(output_dir.join("chart_of_accounts_meta.json"), json) {
1952                warn!("Failed to write CoA metadata: {}", e);
1953            } else {
1954                info!(
1955                    "  Chart of accounts metadata written (accounting_framework: {:?})",
1956                    result.chart_of_accounts.accounting_framework
1957                );
1958            }
1959        }
1960        Err(e) => warn!("Failed to serialize CoA metadata: {}", e),
1961    }
1962
1963    // ========================================================================
1964    // Balance Validation Summary
1965    // ========================================================================
1966    if result.balance_validation.validated {
1967        match serde_json::to_string_pretty(&BalanceValidationSummary::from(
1968            &result.balance_validation,
1969        )) {
1970            Ok(json) => {
1971                if let Err(e) = std::fs::write(output_dir.join("balance_validation.json"), json) {
1972                    warn!("Failed to write balance validation: {}", e);
1973                } else {
1974                    info!("  Balance validation summary written");
1975                }
1976            }
1977            Err(e) => warn!("Failed to serialize balance validation: {}", e),
1978        }
1979    }
1980
1981    // ========================================================================
1982    // Data Quality Statistics (now serializable directly via Serialize derives)
1983    // ========================================================================
1984    {
1985        match serde_json::to_string_pretty(&result.data_quality_stats) {
1986            Ok(json) => {
1987                if let Err(e) = std::fs::write(output_dir.join("data_quality_stats.json"), json) {
1988                    warn!("Failed to write data quality stats: {}", e);
1989                } else {
1990                    info!("  Data quality stats written (full detail)");
1991                }
1992            }
1993            Err(e) => warn!("Failed to serialize data quality stats: {}", e),
1994        }
1995    }
1996
1997    // ========================================================================
1998    // v3.3.0: Analytics-metadata phase outputs (prior year, industry
1999    // benchmarks, management reports, drift events).
2000    // ========================================================================
2001    {
2002        let am = &result.analytics_metadata;
2003        if !am.prior_year_comparatives.is_empty()
2004            || !am.industry_benchmarks.is_empty()
2005            || !am.management_reports.is_empty()
2006            || !am.drift_events.is_empty()
2007        {
2008            let analytics_dir = output_dir.join("analytics");
2009            std::fs::create_dir_all(&analytics_dir)?;
2010            write_json_safe(
2011                &am.prior_year_comparatives,
2012                &analytics_dir.join("prior_year_comparatives.json"),
2013                "Prior-year comparatives (v3.3.0)",
2014            );
2015            write_json_safe(
2016                &am.industry_benchmarks,
2017                &analytics_dir.join("industry_benchmarks.json"),
2018                "Industry benchmarks (v3.3.0)",
2019            );
2020            write_json_safe(
2021                &am.management_reports,
2022                &analytics_dir.join("management_reports.json"),
2023                "Management reports (v3.3.0)",
2024            );
2025            write_json_safe(
2026                &am.drift_events,
2027                &analytics_dir.join("drift_events.json"),
2028                "Drift event labels (v3.3.0)",
2029            );
2030        }
2031    }
2032
2033    // ========================================================================
2034    // Pre-built Analytics (Benford, amount distribution, process variants)
2035    // ========================================================================
2036    {
2037        let analytics_dir = output_dir.join("analytics");
2038
2039        // Collect non-zero amounts from journal entry lines
2040        let amounts: Vec<_> = result
2041            .journal_entries
2042            .iter()
2043            .flat_map(|je| je.lines.iter())
2044            .flat_map(|line| {
2045                let d = (!line.debit_amount.is_zero()).then_some(line.debit_amount);
2046                let c = (!line.credit_amount.is_zero()).then_some(line.credit_amount);
2047                d.into_iter().chain(c)
2048            })
2049            .collect();
2050
2051        if amounts.len() >= 10 {
2052            std::fs::create_dir_all(&analytics_dir)?;
2053            info!("Writing pre-built analytics ({} amounts)...", amounts.len());
2054
2055            // Benford's Law analysis
2056            let benford_analyzer = datasynth_eval::BenfordAnalyzer::default();
2057            match benford_analyzer.analyze(&amounts) {
2058                Ok(ref benford_result) => {
2059                    if let Ok(json) = serde_json::to_string_pretty(benford_result) {
2060                        if let Err(e) =
2061                            std::fs::write(analytics_dir.join("benford_analysis.json"), json)
2062                        {
2063                            warn!("Failed to write Benford analysis: {}", e);
2064                        } else {
2065                            info!(
2066                                "  Benford analysis written (conformity: {:?}, MAD: {:.4})",
2067                                benford_result.conformity, benford_result.mad
2068                            );
2069                        }
2070                    }
2071                }
2072                Err(e) => warn!("Benford analysis skipped: {}", e),
2073            }
2074
2075            // Amount distribution analysis
2076            let amount_analyzer = datasynth_eval::AmountDistributionAnalyzer::new();
2077            match amount_analyzer.analyze(&amounts) {
2078                Ok(ref dist_result) => {
2079                    if let Ok(json) = serde_json::to_string_pretty(dist_result) {
2080                        if let Err(e) =
2081                            std::fs::write(analytics_dir.join("amount_distribution.json"), json)
2082                        {
2083                            warn!("Failed to write amount distribution: {}", e);
2084                        } else {
2085                            info!(
2086                                "  Amount distribution written (skewness: {:.2}, kurtosis: {:.2})",
2087                                dist_result.skewness, dist_result.kurtosis
2088                            );
2089                        }
2090                    }
2091                }
2092                Err(e) => warn!("Amount distribution analysis skipped: {}", e),
2093            }
2094        }
2095
2096        // Process variant summary (from OCPM event log).
2097        //
2098        // v3.1.1 — always emit the file when an event_log exists. When the
2099        // event_log has no pre-computed `variants` map (older OCPM phases
2100        // didn't populate it), derive variants on the fly from the raw
2101        // events so SDK consumers see `analytics/process_variant_summary.json`
2102        // in every archive rather than `null`. Without this, the v3.1
2103        // claim that the file exists was only true when OCPM happened to
2104        // populate its variants map.
2105        if let Some(ref event_log) = result.ocpm.event_log {
2106            std::fs::create_dir_all(&analytics_dir)?;
2107            let variant_data: Vec<datasynth_eval::VariantData> = if !event_log.variants.is_empty() {
2108                event_log
2109                    .variants
2110                    .values()
2111                    .map(|v| datasynth_eval::VariantData {
2112                        variant_id: v.variant_id.clone(),
2113                        case_count: v.frequency as usize,
2114                        is_happy_path: v.is_happy_path,
2115                    })
2116                    .collect()
2117            } else {
2118                // Fallback: derive variants from raw events by case_id.
2119                // Each case's activity sequence (by activity_id) defines a
2120                // variant; cases with the same sequence collapse into one
2121                // variant. Events without a case_id are skipped since they
2122                // can't be grouped into a process instance.
2123                use std::collections::HashMap;
2124                // Key by case_id's string form to avoid pulling the uuid
2125                // crate into the output writer's dependency graph.
2126                let mut per_case: HashMap<String, Vec<String>> = HashMap::new();
2127                for ev in &event_log.events {
2128                    if let Some(case_id) = ev.case_id {
2129                        per_case
2130                            .entry(case_id.to_string())
2131                            .or_default()
2132                            .push(ev.activity_id.clone());
2133                    }
2134                }
2135                let mut variant_counts: HashMap<Vec<String>, usize> = HashMap::new();
2136                for activities in per_case.into_values() {
2137                    *variant_counts.entry(activities).or_insert(0) += 1;
2138                }
2139                // Happy path heuristic: the highest-frequency variant.
2140                let max_count = variant_counts.values().copied().max().unwrap_or(0);
2141                variant_counts
2142                    .into_iter()
2143                    .enumerate()
2144                    .map(|(i, (seq, count))| datasynth_eval::VariantData {
2145                        variant_id: format!("V{i:04}:{}", seq.join("->")),
2146                        case_count: count,
2147                        is_happy_path: count == max_count && max_count > 0,
2148                    })
2149                    .collect()
2150            };
2151
2152            let variant_analyzer = datasynth_eval::VariantAnalyzer::new();
2153            match variant_analyzer.analyze(&variant_data) {
2154                Ok(ref variant_result) => {
2155                    if let Ok(json) = serde_json::to_string_pretty(variant_result) {
2156                        if let Err(e) =
2157                            std::fs::write(analytics_dir.join("process_variant_summary.json"), json)
2158                        {
2159                            warn!("Failed to write variant summary: {}", e);
2160                        } else {
2161                            info!(
2162                                "  Process variant summary written ({} variants, entropy: {:.2})",
2163                                variant_result.variant_count, variant_result.variant_entropy
2164                            );
2165                        }
2166                    }
2167                }
2168                Err(e) => {
2169                    // Even on analyzer error, emit a minimal JSON placeholder
2170                    // so the file always exists in the archive.
2171                    warn!("Variant analysis failed: {}; emitting empty summary", e);
2172                    let placeholder = serde_json::json!({
2173                        "variant_count": 0,
2174                        "variant_entropy": null,
2175                        "happy_path_concentration": null,
2176                        "top_variants": [],
2177                        "passes": false,
2178                        "issues": [format!("analyzer error: {e}")],
2179                    });
2180                    if let Ok(json) = serde_json::to_string_pretty(&placeholder) {
2181                        let _ = std::fs::write(
2182                            analytics_dir.join("process_variant_summary.json"),
2183                            json,
2184                        );
2185                    }
2186                }
2187            }
2188        }
2189
2190        // Banking evaluation (KYC completeness + AML detectability).
2191        // Matches the payload served by /v1/jobs/{id}/analytics so
2192        // archive-mode consumers see the same four files the endpoint returns.
2193        if !result.banking.customers.is_empty() {
2194            use datasynth_core::models::banking::BankingCustomerType;
2195            use datasynth_eval::banking::{
2196                AmlDetectabilityAnalyzer, AmlTransactionData, BankingEvaluation,
2197                KycCompletenessAnalyzer, KycProfileData, TypologyData,
2198            };
2199            use std::collections::HashMap;
2200            std::fs::create_dir_all(&analytics_dir)?;
2201
2202            let kyc_data: Vec<KycProfileData> = result
2203                .banking
2204                .customers
2205                .iter()
2206                .map(|c| KycProfileData {
2207                    profile_id: c.customer_id.to_string(),
2208                    has_name: true,
2209                    has_dob: c.date_of_birth.is_some(),
2210                    has_address: c.address_line1.is_some(),
2211                    has_id_document: c.national_id.is_some() || c.passport_number.is_some(),
2212                    has_risk_rating: true,
2213                    has_beneficial_owner: !c.beneficial_owners.is_empty(),
2214                    is_entity: c.customer_type == BankingCustomerType::Business,
2215                    is_verified: c.kyc_truthful,
2216                })
2217                .collect();
2218
2219            let mut banking_eval = BankingEvaluation::new();
2220            if let Ok(kyc_res) = KycCompletenessAnalyzer::new().analyze(&kyc_data) {
2221                banking_eval.kyc = Some(kyc_res);
2222            }
2223
2224            let suspicious: Vec<&_> = result
2225                .banking
2226                .transactions
2227                .iter()
2228                .filter(|t| t.is_suspicious)
2229                .collect();
2230            if !suspicious.is_empty() {
2231                // Use AmlTypology::canonical_name() so the evaluator's
2232                // exact-string match against EXPECTED_TYPOLOGIES succeeds.
2233                // Prior to v3.1.1 we used `format!("{:?}", r)` (Debug /
2234                // PascalCase) which never matched the lowercase expected
2235                // names and produced "typology_coverage = 0.000" in every
2236                // run regardless of actual typology injection.
2237                let aml_data: Vec<AmlTransactionData> = suspicious
2238                    .iter()
2239                    .map(|t| AmlTransactionData {
2240                        transaction_id: t.transaction_id.to_string(),
2241                        typology: t
2242                            .suspicion_reason
2243                            .as_ref()
2244                            .map(|r| r.canonical_name().to_string())
2245                            .unwrap_or_default(),
2246                        case_id: t.case_id.clone().unwrap_or_default(),
2247                        amount: t.amount.try_into().unwrap_or(0.0),
2248                        is_flagged: t.is_suspicious,
2249                    })
2250                    .collect();
2251
2252                let mut typology_map: HashMap<String, (usize, HashMap<String, bool>)> =
2253                    HashMap::new();
2254                for txn in &aml_data {
2255                    if !txn.typology.is_empty() {
2256                        let entry = typology_map
2257                            .entry(txn.typology.clone())
2258                            .or_insert_with(|| (0, HashMap::new()));
2259                        entry.0 += 1;
2260                        entry.1.insert(txn.case_id.clone(), true);
2261                    }
2262                }
2263                let typology_data: Vec<TypologyData> = typology_map
2264                    .iter()
2265                    .map(|(name, (count, cases))| TypologyData {
2266                        name: name.clone(),
2267                        scenario_count: *count,
2268                        case_ids_consistent: cases.len() <= *count,
2269                    })
2270                    .collect();
2271
2272                if let Ok(aml_res) =
2273                    AmlDetectabilityAnalyzer::new().analyze(&aml_data, &typology_data)
2274                {
2275                    banking_eval.aml = Some(aml_res);
2276                }
2277            }
2278            banking_eval.check_thresholds();
2279
2280            match serde_json::to_string_pretty(&banking_eval) {
2281                Ok(json) => {
2282                    if let Err(e) =
2283                        std::fs::write(analytics_dir.join("banking_evaluation.json"), json)
2284                    {
2285                        warn!("Failed to write banking evaluation: {}", e);
2286                    } else {
2287                        info!(
2288                            "  Banking evaluation written ({} profiles, {} issues, passes={})",
2289                            result.banking.customers.len(),
2290                            banking_eval.issues.len(),
2291                            banking_eval.passes
2292                        );
2293                    }
2294                }
2295                Err(e) => warn!("Failed to serialize banking evaluation: {}", e),
2296            }
2297        }
2298    }
2299
2300    // ========================================================================
2301    // Data Quality Issue Records + Quality Labels
2302    // ========================================================================
2303    if !result.quality_issues.is_empty() {
2304        let labels_dir = output_dir.join("labels");
2305        std::fs::create_dir_all(&labels_dir)?;
2306        info!("Writing data quality issue records...");
2307        write_json_safe(
2308            &result.quality_issues,
2309            &labels_dir.join("quality_issues.json"),
2310            "Data quality issues",
2311        );
2312
2313        // Derive quality_labels.json from quality_issues: maps each QualityIssue
2314        // to a QualityIssueLabel with the corresponding LabeledIssueType and severity.
2315        use datasynth_generators::{
2316            LabeledIssueType, QualityIssueLabel, QualityIssueType, QualityLabels,
2317        };
2318        let mut quality_labels = QualityLabels::with_capacity(result.quality_issues.len());
2319        for issue in &result.quality_issues {
2320            let labeled_type = match issue.issue_type {
2321                QualityIssueType::MissingValue => LabeledIssueType::MissingValue,
2322                QualityIssueType::Typo => LabeledIssueType::Typo,
2323                QualityIssueType::DateFormatVariation
2324                | QualityIssueType::AmountFormatVariation
2325                | QualityIssueType::IdentifierFormatVariation
2326                | QualityIssueType::TextFormatVariation => LabeledIssueType::FormatVariation,
2327                QualityIssueType::ExactDuplicate
2328                | QualityIssueType::NearDuplicate
2329                | QualityIssueType::FuzzyDuplicate => LabeledIssueType::Duplicate,
2330                QualityIssueType::EncodingIssue => LabeledIssueType::EncodingIssue,
2331            };
2332            let mut label = QualityIssueLabel::new(
2333                labeled_type,
2334                issue.record_id.clone(),
2335                issue.field.clone().unwrap_or_else(|| "_record".to_string()),
2336                "data_quality_injector",
2337            );
2338            if let Some(ref orig) = issue.original_value {
2339                label = label.with_original(orig.clone());
2340            }
2341            if let Some(ref modified) = issue.modified_value {
2342                label = label.with_modified(modified.clone());
2343            }
2344            quality_labels.add(label);
2345        }
2346        if let Ok(json) = serde_json::to_string_pretty(&quality_labels) {
2347            if let Err(e) = std::fs::write(labels_dir.join("quality_labels.json"), json.as_bytes())
2348            {
2349                warn!("Failed to write quality labels: {}", e);
2350            } else {
2351                info!(
2352                    "  Quality labels written: {} labels -> labels/quality_labels.json",
2353                    quality_labels.len()
2354                );
2355            }
2356        }
2357    }
2358
2359    // ========================================================================
2360    // Internal Controls
2361    // ========================================================================
2362    if !result.internal_controls.is_empty() || !result.sod_violations.is_empty() {
2363        let ctrl_dir = output_dir.join("internal_controls");
2364        std::fs::create_dir_all(&ctrl_dir)?;
2365        info!("Writing internal controls data...");
2366
2367        write_json_safe(
2368            &result.internal_controls,
2369            &ctrl_dir.join("internal_controls.json"),
2370            "Internal controls",
2371        );
2372        // SoD violations extracted from control-annotated journal entries
2373        write_json_safe(
2374            &result.sod_violations,
2375            &ctrl_dir.join("sod_violations.json"),
2376            "SoD violations",
2377        );
2378
2379        // SoD conflict pairs, SoD rules, control mappings, and COSO control mapping
2380        // are static reference data — export via ControlExporter regardless of whether
2381        // individual violations were generated so the master catalog is always present.
2382        let exporter = datasynth_output::ControlExporter::new(&ctrl_dir);
2383        match exporter.export_standard() {
2384            Ok(summary) => {
2385                info!(
2386                    "  Control master data written: {} controls, {} SoD conflicts, {} SoD rules, {} COSO mappings, {} account mappings",
2387                    summary.controls_count,
2388                    summary.sod_conflicts_count,
2389                    summary.sod_rules_count,
2390                    summary.coso_mappings_count,
2391                    summary.account_mappings_count,
2392                );
2393            }
2394            Err(e) => warn!("Failed to write control master data: {}", e),
2395        }
2396    }
2397
2398    // ========================================================================
2399    // Accounting Standards
2400    // ========================================================================
2401    if !result.accounting_standards.contracts.is_empty()
2402        || !result.accounting_standards.impairment_tests.is_empty()
2403        || !result.accounting_standards.business_combinations.is_empty()
2404        || !result.accounting_standards.ecl_models.is_empty()
2405        || !result.accounting_standards.provisions.is_empty()
2406        || !result
2407            .accounting_standards
2408            .currency_translation_results
2409            .is_empty()
2410    {
2411        let acct_dir = output_dir.join("accounting_standards");
2412        std::fs::create_dir_all(&acct_dir)?;
2413        info!("Writing accounting standards data...");
2414
2415        write_json_safe(
2416            &result.accounting_standards.contracts,
2417            &acct_dir.join("customer_contracts.json"),
2418            "Customer contracts",
2419        );
2420        write_json_safe(
2421            &result.accounting_standards.impairment_tests,
2422            &acct_dir.join("impairment_tests.json"),
2423            "Impairment tests",
2424        );
2425        write_json_safe(
2426            &result.accounting_standards.business_combinations,
2427            &acct_dir.join("business_combinations.json"),
2428            "Business combinations",
2429        );
2430        write_json_safe(
2431            &result
2432                .accounting_standards
2433                .business_combination_journal_entries,
2434            &acct_dir.join("business_combination_journal_entries.json"),
2435            "Business combination journal entries",
2436        );
2437        write_json_safe(
2438            &result.accounting_standards.ecl_models,
2439            &acct_dir.join("ecl_models.json"),
2440            "ECL models",
2441        );
2442        write_json_safe(
2443            &result.accounting_standards.ecl_provision_movements,
2444            &acct_dir.join("ecl_provision_movements.json"),
2445            "ECL provision movements",
2446        );
2447        write_json_safe(
2448            &result.accounting_standards.ecl_journal_entries,
2449            &acct_dir.join("ecl_journal_entries.json"),
2450            "ECL journal entries",
2451        );
2452        write_json_safe(
2453            &result.accounting_standards.provisions,
2454            &acct_dir.join("provisions.json"),
2455            "Provisions (IAS 37 / ASC 450)",
2456        );
2457        write_json_safe(
2458            &result.accounting_standards.provision_movements,
2459            &acct_dir.join("provision_movements.json"),
2460            "Provision movements",
2461        );
2462        write_json_safe(
2463            &result.accounting_standards.contingent_liabilities,
2464            &acct_dir.join("contingent_liabilities.json"),
2465            "Contingent liabilities",
2466        );
2467        write_json_safe(
2468            &result.accounting_standards.provision_journal_entries,
2469            &acct_dir.join("provision_journal_entries.json"),
2470            "Provision journal entries",
2471        );
2472
2473        // IAS 21 — write under accounting_standards/fx/
2474        if !result
2475            .accounting_standards
2476            .currency_translation_results
2477            .is_empty()
2478        {
2479            let fx_dir = acct_dir.join("fx");
2480            std::fs::create_dir_all(&fx_dir)?;
2481            write_json_safe(
2482                &result.accounting_standards.currency_translation_results,
2483                &fx_dir.join("currency_translation_results.json"),
2484                "IAS 21 currency translation results",
2485            );
2486        }
2487
2488        // v3.3.1: Leases (IFRS 16 / ASC 842)
2489        if !result.accounting_standards.leases.is_empty() {
2490            let leases_dir = acct_dir.join("leases");
2491            std::fs::create_dir_all(&leases_dir)?;
2492            write_json_safe(
2493                &result.accounting_standards.leases,
2494                &leases_dir.join("leases.json"),
2495                "Leases (IFRS 16 / ASC 842) — v3.3.1",
2496            );
2497        }
2498
2499        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
2500        if !result
2501            .accounting_standards
2502            .fair_value_measurements
2503            .is_empty()
2504        {
2505            let fv_dir = acct_dir.join("fair_value");
2506            std::fs::create_dir_all(&fv_dir)?;
2507            write_json_safe(
2508                &result.accounting_standards.fair_value_measurements,
2509                &fv_dir.join("fair_value_measurements.json"),
2510                "Fair value measurements (IFRS 13 / ASC 820) — v3.3.1",
2511            );
2512        }
2513
2514        // v3.3.1: Framework reconciliation (dual reporting)
2515        if !result.accounting_standards.framework_differences.is_empty() {
2516            let diff_dir = acct_dir.join("framework_differences");
2517            std::fs::create_dir_all(&diff_dir)?;
2518            write_json_safe(
2519                &result.accounting_standards.framework_differences,
2520                &diff_dir.join("framework_differences.json"),
2521                "Framework differences (US GAAP vs IFRS) — v3.3.1",
2522            );
2523            write_json_safe(
2524                &result.accounting_standards.framework_reconciliations,
2525                &diff_dir.join("framework_reconciliations.json"),
2526                "Per-entity framework reconciliation — v3.3.1",
2527            );
2528        }
2529    }
2530
2531    // ========================================================================
2532    // Quality Gate Results
2533    // ========================================================================
2534    if let Some(ref gate_result) = result.gate_result {
2535        match serde_json::to_string_pretty(gate_result) {
2536            Ok(json) => {
2537                if let Err(e) = std::fs::write(output_dir.join("quality_gate_result.json"), json) {
2538                    warn!("Failed to write quality gate result: {}", e);
2539                } else {
2540                    info!(
2541                        "  Quality gate result written (passed={})",
2542                        gate_result.passed
2543                    );
2544                }
2545            }
2546            Err(e) => warn!("Failed to serialize quality gate result: {}", e),
2547        }
2548    }
2549
2550    // ========================================================================
2551    // Treasury
2552    // ========================================================================
2553    if !result.treasury.debt_instruments.is_empty()
2554        || !result.treasury.cash_positions.is_empty()
2555        || !result.treasury.hedging_instruments.is_empty()
2556    {
2557        let treasury_dir = output_dir.join("treasury");
2558        std::fs::create_dir_all(&treasury_dir)?;
2559        info!("Writing treasury data...");
2560
2561        write_json_safe(
2562            &result.treasury.debt_instruments,
2563            &treasury_dir.join("debt_instruments.json"),
2564            "Debt instruments",
2565        );
2566        write_json_safe(
2567            &result.treasury.hedging_instruments,
2568            &treasury_dir.join("hedging_instruments.json"),
2569            "Hedging instruments",
2570        );
2571        write_json_safe(
2572            &result.treasury.hedge_relationships,
2573            &treasury_dir.join("hedge_relationships.json"),
2574            "Hedge relationships",
2575        );
2576        write_json_safe(
2577            &result.treasury.cash_positions,
2578            &treasury_dir.join("cash_positions.json"),
2579            "Cash positions",
2580        );
2581        write_json_safe(
2582            &result.treasury.cash_forecasts,
2583            &treasury_dir.join("cash_forecasts.json"),
2584            "Cash forecasts",
2585        );
2586        write_json_safe(
2587            &result.treasury.cash_pools,
2588            &treasury_dir.join("cash_pools.json"),
2589            "Cash pools",
2590        );
2591        write_json_safe(
2592            &result.treasury.cash_pool_sweeps,
2593            &treasury_dir.join("cash_pool_sweeps.json"),
2594            "Cash pool sweeps",
2595        );
2596        write_json_safe(
2597            &result.treasury.bank_guarantees,
2598            &treasury_dir.join("bank_guarantees.json"),
2599            "Bank guarantees",
2600        );
2601        write_json_safe(
2602            &result.treasury.netting_runs,
2603            &treasury_dir.join("netting_runs.json"),
2604            "Netting runs",
2605        );
2606        if !result.treasury.treasury_anomaly_labels.is_empty() {
2607            write_json_safe(
2608                &result.treasury.treasury_anomaly_labels,
2609                &treasury_dir.join("treasury_anomaly_labels.json"),
2610                "Treasury anomaly labels",
2611            );
2612        }
2613    }
2614
2615    // ========================================================================
2616    // Project Accounting
2617    // ========================================================================
2618    if !result.project_accounting.projects.is_empty() {
2619        let pa_dir = output_dir.join("project_accounting");
2620        std::fs::create_dir_all(&pa_dir)?;
2621        info!("Writing project accounting data...");
2622
2623        write_json_safe(
2624            &result.project_accounting.projects,
2625            &pa_dir.join("projects.json"),
2626            "Projects",
2627        );
2628        write_json_safe(
2629            &result.project_accounting.cost_lines,
2630            &pa_dir.join("cost_lines.json"),
2631            "Project cost lines",
2632        );
2633        write_json_safe(
2634            &result.project_accounting.revenue_records,
2635            &pa_dir.join("revenue_records.json"),
2636            "Project revenue records",
2637        );
2638        write_json_safe(
2639            &result.project_accounting.earned_value_metrics,
2640            &pa_dir.join("earned_value_metrics.json"),
2641            "Earned value metrics",
2642        );
2643        write_json_safe(
2644            &result.project_accounting.change_orders,
2645            &pa_dir.join("change_orders.json"),
2646            "Change orders",
2647        );
2648        write_json_safe(
2649            &result.project_accounting.milestones,
2650            &pa_dir.join("milestones.json"),
2651            "Project milestones",
2652        );
2653    }
2654
2655    // ========================================================================
2656    // Evolution Events (Process Evolution + Organizational Events)
2657    // ========================================================================
2658    if !result.process_evolution.is_empty()
2659        || !result.organizational_events.is_empty()
2660        || !result.disruption_events.is_empty()
2661    {
2662        let events_dir = output_dir.join("events");
2663        std::fs::create_dir_all(&events_dir)?;
2664        info!("Writing evolution events...");
2665
2666        write_json_safe(
2667            &result.process_evolution,
2668            &events_dir.join("process_evolution_events.json"),
2669            "Process evolution events",
2670        );
2671        write_json_safe(
2672            &result.organizational_events,
2673            &events_dir.join("organizational_events.json"),
2674            "Organizational events",
2675        );
2676        write_json_safe(
2677            &result.disruption_events,
2678            &events_dir.join("disruption_events.json"),
2679            "Disruption events",
2680        );
2681    }
2682
2683    // ========================================================================
2684    // ML Training: Counterfactual Pairs
2685    // ========================================================================
2686    if !result.counterfactual_pairs.is_empty() {
2687        let ml_dir = output_dir.join("ml_training");
2688        std::fs::create_dir_all(&ml_dir)?;
2689        info!("Writing ML training data...");
2690
2691        write_json_safe(
2692            &result.counterfactual_pairs,
2693            &ml_dir.join("counterfactual_pairs.json"),
2694            "Counterfactual pairs",
2695        );
2696    }
2697
2698    // ========================================================================
2699    // Fraud Red-Flag Indicators
2700    // ========================================================================
2701    if !result.red_flags.is_empty() {
2702        let labels_dir = output_dir.join("labels");
2703        std::fs::create_dir_all(&labels_dir)?;
2704        info!("Writing fraud red-flag indicators...");
2705
2706        write_json_safe(
2707            &result.red_flags,
2708            &labels_dir.join("fraud_red_flags.json"),
2709            "Fraud red flags",
2710        );
2711    }
2712
2713    // ========================================================================
2714    // Collusion Rings
2715    // ========================================================================
2716    if !result.collusion_rings.is_empty() {
2717        let labels_dir = output_dir.join("labels");
2718        std::fs::create_dir_all(&labels_dir)?;
2719        info!("Writing collusion rings...");
2720
2721        write_json_safe(
2722            &result.collusion_rings,
2723            &labels_dir.join("collusion_rings.json"),
2724            "Collusion rings",
2725        );
2726    }
2727
2728    // ========================================================================
2729    // Temporal Vendor Version Chains
2730    // ========================================================================
2731    if !result.temporal_vendor_chains.is_empty() {
2732        let temporal_dir = output_dir.join("temporal");
2733        std::fs::create_dir_all(&temporal_dir)?;
2734        info!("Writing temporal vendor version chains...");
2735
2736        write_json_safe(
2737            &result.temporal_vendor_chains,
2738            &temporal_dir.join("vendor_version_chains.json"),
2739            "Vendor version chains",
2740        );
2741    }
2742
2743    // ========================================================================
2744    // Entity Relationship Graph + Cross-Process Links
2745    // ========================================================================
2746    if result.entity_relationship_graph.is_some() || !result.cross_process_links.is_empty() {
2747        let rel_dir = output_dir.join("relationships");
2748        std::fs::create_dir_all(&rel_dir)?;
2749        info!("Writing entity relationship data...");
2750
2751        if let Some(ref graph) = result.entity_relationship_graph {
2752            match serde_json::to_string_pretty(graph) {
2753                Ok(json) => {
2754                    let path = rel_dir.join("entity_relationship_graph.json");
2755                    if let Err(e) = std::fs::write(&path, json) {
2756                        warn!("Failed to write entity relationship graph: {}", e);
2757                    } else {
2758                        info!(
2759                            "  Entity relationship graph written: {} nodes, {} edges -> {}",
2760                            graph.nodes.len(),
2761                            graph.edges.len(),
2762                            path.display()
2763                        );
2764                    }
2765                }
2766                Err(e) => warn!("Failed to serialize entity relationship graph: {}", e),
2767            }
2768        }
2769
2770        write_json_safe(
2771            &result.cross_process_links,
2772            &rel_dir.join("cross_process_links.json"),
2773            "Cross-process links",
2774        );
2775    }
2776
2777    // ========================================================================
2778    // Industry-Specific Data
2779    // ========================================================================
2780    if let Some(ref industry_output) = result.industry_output {
2781        if !industry_output.gl_accounts.is_empty() {
2782            let industry_dir = output_dir.join("industry");
2783            std::fs::create_dir_all(&industry_dir).ok();
2784            info!("Writing industry-specific data...");
2785            match serde_json::to_string_pretty(industry_output) {
2786                Ok(json) => {
2787                    if let Err(e) = std::fs::write(industry_dir.join("industry_data.json"), json) {
2788                        warn!("Failed to write industry data: {}", e);
2789                    } else {
2790                        info!(
2791                            "  Industry data written: {} GL accounts for {}",
2792                            industry_output.gl_accounts.len(),
2793                            industry_output.industry
2794                        );
2795                    }
2796                }
2797                Err(e) => warn!("Failed to serialize industry data: {}", e),
2798            }
2799        }
2800    }
2801
2802    // ========================================================================
2803    // Graph Export Summary
2804    // ========================================================================
2805    if result.graph_export.exported {
2806        let graph_dir = output_dir.join("graph_export");
2807        std::fs::create_dir_all(&graph_dir).ok();
2808        match serde_json::to_string_pretty(&result.graph_export) {
2809            Ok(json) => {
2810                if let Err(e) = std::fs::write(graph_dir.join("graph_export_summary.json"), json) {
2811                    warn!("Failed to write graph export summary: {}", e);
2812                } else {
2813                    info!("  Graph export summary written");
2814                }
2815            }
2816            Err(e) => warn!("Failed to serialize graph export summary: {}", e),
2817        }
2818    }
2819
2820    // ========================================================================
2821    // Compliance Regulations
2822    // ========================================================================
2823    let cr = &result.compliance_regulations;
2824    let has_compliance_data = !cr.standard_records.is_empty()
2825        || !cr.audit_procedures.is_empty()
2826        || !cr.findings.is_empty()
2827        || !cr.filings.is_empty();
2828    if has_compliance_data {
2829        let cr_dir = output_dir.join("compliance_regulations");
2830        std::fs::create_dir_all(&cr_dir)?;
2831        info!("Writing compliance regulations data...");
2832
2833        write_json_safe(
2834            &cr.standard_records,
2835            &cr_dir.join("compliance_standards.json"),
2836            "Compliance standards",
2837        );
2838        write_json_safe(
2839            &cr.cross_reference_records,
2840            &cr_dir.join("cross_references.json"),
2841            "Cross-references",
2842        );
2843        write_json_safe(
2844            &cr.jurisdiction_records,
2845            &cr_dir.join("jurisdiction_profiles.json"),
2846            "Jurisdiction profiles",
2847        );
2848        write_json_safe(
2849            &cr.audit_procedures,
2850            &cr_dir.join("audit_procedures.json"),
2851            "Audit procedures",
2852        );
2853        write_json_safe(
2854            &cr.findings,
2855            &cr_dir.join("compliance_findings.json"),
2856            "Compliance findings",
2857        );
2858        write_json_safe(
2859            &cr.filings,
2860            &cr_dir.join("regulatory_filings.json"),
2861            "Regulatory filings",
2862        );
2863
2864        if let Some(ref graph) = cr.compliance_graph {
2865            match serde_json::to_string_pretty(graph) {
2866                Ok(json) => {
2867                    if let Err(e) = std::fs::write(cr_dir.join("compliance_graph.json"), json) {
2868                        warn!("Failed to write compliance graph: {}", e);
2869                    } else {
2870                        info!(
2871                            "  Compliance graph written: {} nodes, {} edges",
2872                            graph.nodes.len(),
2873                            graph.edges.len()
2874                        );
2875                    }
2876                }
2877                Err(e) => warn!("Failed to serialize compliance graph: {}", e),
2878            }
2879        }
2880    }
2881
2882    // ========================================================================
2883    // Generation Statistics
2884    // ========================================================================
2885    match serde_json::to_string_pretty(&result.statistics) {
2886        Ok(json) => {
2887            if let Err(e) = std::fs::write(output_dir.join("generation_statistics.json"), json) {
2888                warn!("Failed to write generation statistics: {}", e);
2889            } else {
2890                info!("  Generation statistics written");
2891            }
2892        }
2893        Err(e) => warn!("Failed to serialize generation statistics: {}", e),
2894    }
2895
2896    info!("Output writing complete.");
2897    Ok(())
2898}
2899
2900/// Write JSON with error handling - logs a warning on failure but does not abort.
2901///
2902/// When the `FLAT_LAYOUT_ACTIVE` thread-local is true (set by
2903/// `write_all_output_with_layout` when `export_layout: flat`), this routes
2904/// through `write_json_flat` so nested `{header, lines|items|allocations}`
2905/// shapes are automatically flattened. For structures without that shape,
2906/// `write_json_flat` passes through unchanged.
2907fn write_json_safe<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
2908    // Skip JSON entirely when not in requested output formats
2909    if SKIP_JSON.with(|c| c.get()) {
2910        return;
2911    }
2912    if FLAT_LAYOUT_ACTIVE.with(|c| c.get()) {
2913        write_json_flat(data, path, label);
2914    } else if let Err(e) = write_json(data, path, label) {
2915        warn!("Failed to write {}: {}", label, e);
2916    }
2917}
2918
2919/// Write JSON, choosing flat or nested layout based on the flag.
2920fn write_json_auto<T: serde::Serialize>(data: &[T], path: &Path, label: &str, flat: bool) {
2921    if flat {
2922        write_json_flat(data, path, label);
2923    } else {
2924        write_json_safe(data, path, label);
2925    }
2926}
2927
2928/// Write a JSON file ALWAYS, even when the slice is empty (writes `[]`).
2929///
2930/// Use for files that must exist in the archive for SDK consumers
2931/// (e.g., `audit_opinions.json`) regardless of whether the phase that
2932/// populates them ran. `write_json_safe` / `write_json` short-circuit
2933/// on empty slices, which would break manifest-driven clients that
2934/// expect the file to be present.
2935fn write_json_always<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
2936    if SKIP_JSON.with(|c| c.get()) {
2937        return;
2938    }
2939    match std::fs::File::create(path) {
2940        Ok(file) => {
2941            let mut writer = std::io::BufWriter::with_capacity(64 * 1024, file);
2942            if let Err(e) = (|| -> Result<(), Box<dyn std::error::Error>> {
2943                writer.write_all(b"[\n")?;
2944                for (i, item) in data.iter().enumerate() {
2945                    if i > 0 {
2946                        writer.write_all(b",\n")?;
2947                    }
2948                    serde_json::to_writer_pretty(&mut writer, item)?;
2949                }
2950                if !data.is_empty() {
2951                    writer.write_all(b"\n")?;
2952                }
2953                writer.write_all(b"]\n")?;
2954                writer.flush()?;
2955                Ok(())
2956            })() {
2957                warn!("Failed to write {}: {}", label, e);
2958            } else {
2959                info!(
2960                    "  {} written: {} records -> {}",
2961                    label,
2962                    data.len(),
2963                    path.display()
2964                );
2965            }
2966        }
2967        Err(e) => {
2968            warn!("Failed to create {}: {}", path.display(), e);
2969        }
2970    }
2971}
2972
2973/// Write a flat JSON file by expanding a primary items array and merging the
2974/// surrounding context onto each line.
2975///
2976/// Flattens any record that contains a recognised items array
2977/// (`items`, `lines`, `line_items`, or `allocations`) into one row per line,
2978/// carrying over both the optional `header` sub-object and all other
2979/// top-level fields. Records without a recognised items array are emitted
2980/// as-is, except that an optional nested `header` sub-object is unwrapped
2981/// onto the top level so consumers see a uniformly flat shape.
2982///
2983/// Flow-style documents (`{header, items}`) and subledger-style documents
2984/// (`{..top-level scalars.., lines}`, e.g. AP/AR invoices, inventory
2985/// valuation runs) are both handled — fixing the SDK-team-reported gap
2986/// where subledger invoices were left with `lines` nested in flat mode.
2987///
2988/// Uses heap-allocated intermediates to avoid stack overflow with large
2989/// records in constrained environments (e.g., distroless containers with
2990/// glibc 2.36). Fixes #116.
2991fn write_json_flat<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
2992    if data.is_empty() {
2993        return;
2994    }
2995
2996    // Pre-allocate on heap — avoid flat_map closure accumulating on the stack
2997    let mut flat: Vec<serde_json::Value> = Vec::with_capacity(data.len());
2998
2999    for item in data {
3000        let val = match serde_json::to_value(item) {
3001            Ok(v) => v,
3002            Err(e) => {
3003                warn!("Failed to serialize record for flat export: {}", e);
3004                continue;
3005            }
3006        };
3007
3008        let serde_json::Value::Object(map) = val else {
3009            flat.push(val);
3010            continue;
3011        };
3012
3013        // Find the primary items array key (first match wins).
3014        let items_key = ["items", "lines", "allocations", "line_items"]
3015            .iter()
3016            .find(|k| map.contains_key(**k))
3017            .copied();
3018
3019        // Optional nested header sub-object (used by document flows).
3020        let header_map = match map.get("header") {
3021            Some(serde_json::Value::Object(h)) => Some(h),
3022            _ => None,
3023        };
3024
3025        let Some(items_key) = items_key else {
3026            // No items array. Emit one row, unwrapping the optional header
3027            // sub-object so consumers see a flat shape regardless of model
3028            // layout (e.g. Payments have `header` but no items/allocations
3029            // when allocations are empty).
3030            if let Some(header_map) = header_map {
3031                let mut merged = map.clone();
3032                merged.remove("header");
3033                for (k, v) in header_map {
3034                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3035                }
3036                flat.push(serde_json::Value::Object(merged));
3037            } else {
3038                flat.push(serde_json::Value::Object(map));
3039            }
3040            continue;
3041        };
3042
3043        let Some(serde_json::Value::Array(items)) = map.get(items_key) else {
3044            // `items_key` present but not an array — passthrough.
3045            flat.push(serde_json::Value::Object(map));
3046            continue;
3047        };
3048
3049        // Empty items array: emit one row with the (unwrapped) header
3050        // context so downstream consumers can still find the parent
3051        // record — prevents silently dropping empty-lines invoices.
3052        if items.is_empty() {
3053            let mut merged = map.clone();
3054            merged.remove(items_key);
3055            if let Some(header_map) = header_map {
3056                merged.remove("header");
3057                for (k, v) in header_map {
3058                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3059                }
3060            }
3061            flat.push(serde_json::Value::Object(merged));
3062            continue;
3063        }
3064
3065        // Collect all other top-level fields (scalars, objects, arrays)
3066        // so they carry over onto every flattened line — matching pandas
3067        // `explode()` semantics. This is the behaviour SDK consumers
3068        // expect: header context is repeated per line, nested objects
3069        // like `net_amount: {amount, currency}` come along for the ride.
3070        let top_fields: Vec<(&String, &serde_json::Value)> = map
3071            .iter()
3072            .filter(|(k, _)| k.as_str() != "header" && k.as_str() != items_key)
3073            .collect();
3074
3075        flat.reserve(items.len());
3076        for item_val in items {
3077            let mut merged = serde_json::Map::new();
3078            // Line/item fields first (take precedence on collisions).
3079            if let serde_json::Value::Object(m) = item_val {
3080                merged.extend(m.iter().map(|(k, v)| (k.clone(), v.clone())));
3081            }
3082            // Header sub-object (when present) — don't overwrite line fields.
3083            if let Some(header_map) = header_map {
3084                for (k, v) in header_map {
3085                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3086                }
3087            }
3088            // All other top-level fields.
3089            for &(k, v) in &top_fields {
3090                merged.entry(k.clone()).or_insert_with(|| v.clone());
3091            }
3092            flat.push(serde_json::Value::Object(merged));
3093        }
3094    }
3095
3096    if flat.is_empty() {
3097        return;
3098    }
3099
3100    // Stream-write each flattened record instead of serializing the whole Vec
3101    let count = flat.len();
3102    match std::fs::File::create(path) {
3103        Ok(file) => {
3104            use std::io::Write;
3105            let mut writer = std::io::BufWriter::with_capacity(512 * 1024, file);
3106            if let Err(e) = (|| -> Result<(), Box<dyn std::error::Error>> {
3107                writer.write_all(b"[\n")?;
3108                for (i, item) in flat.iter().enumerate() {
3109                    if i > 0 {
3110                        writer.write_all(b",\n")?;
3111                    }
3112                    serde_json::to_writer_pretty(&mut writer, item)?;
3113                }
3114                writer.write_all(b"\n]\n")?;
3115                writer.flush()?;
3116                Ok(())
3117            })() {
3118                warn!("Failed to write {}: {}", label, e);
3119            } else {
3120                info!(
3121                    "  {} written (flat): {} records -> {}",
3122                    label,
3123                    count,
3124                    path.display()
3125                );
3126            }
3127        }
3128        Err(e) => warn!("Failed to create {}: {}", label, e),
3129    }
3130}
3131
3132/// Write a single serializable value as a JSON file.
3133fn write_json_single<T: serde::Serialize>(
3134    data: &T,
3135    path: &Path,
3136    label: &str,
3137) -> Result<(), Box<dyn std::error::Error>> {
3138    let file = std::fs::File::create(path)?;
3139    let writer = std::io::BufWriter::with_capacity(256 * 1024, file);
3140    serde_json::to_writer_pretty(writer, data)?;
3141    info!("  {} written -> {}", label, path.display());
3142    Ok(())
3143}
3144
3145/// Write a single serializable value as a JSON file, logging a warning on failure.
3146fn write_json_single_safe<T: serde::Serialize>(data: &T, path: &Path, label: &str) {
3147    if SKIP_JSON.with(|c| c.get()) {
3148        return;
3149    }
3150    if let Err(e) = write_json_single(data, path, label) {
3151        warn!("Failed to write {}: {}", label, e);
3152    }
3153}
3154
3155/// Serializable summary of balance validation (avoids serializing the full
3156/// `BalanceValidationResult` which has non-Serialize validation error types).
3157#[derive(serde::Serialize)]
3158struct BalanceValidationSummary {
3159    validated: bool,
3160    is_balanced: bool,
3161    entries_processed: u64,
3162    total_debits: String,
3163    total_credits: String,
3164    accounts_tracked: usize,
3165    companies_tracked: usize,
3166    has_unbalanced_entries: bool,
3167    validation_error_count: usize,
3168}
3169
3170impl BalanceValidationSummary {
3171    fn from(v: &crate::enhanced_orchestrator::BalanceValidationResult) -> Self {
3172        Self {
3173            validated: v.validated,
3174            is_balanced: v.is_balanced,
3175            entries_processed: v.entries_processed,
3176            total_debits: v.total_debits.to_string(),
3177            total_credits: v.total_credits.to_string(),
3178            accounts_tracked: v.accounts_tracked,
3179            companies_tracked: v.companies_tracked,
3180            has_unbalanced_entries: v.has_unbalanced_entries,
3181            validation_error_count: v.validation_errors.len(),
3182        }
3183    }
3184}
3185
3186#[cfg(test)]
3187mod tests {
3188    /// v5.17.0 — verify the journal_entries.csv header has exactly 48 columns
3189    /// (44 from SP3.8a + fraud_type + anomaly_type appended last).  This
3190    /// catches any accidental drift between the header string and the row
3191    /// format string.
3192    #[test]
3193    fn journal_entries_csv_header_has_46_columns() {
3194        let header =
3195            "document_id,company_code,fiscal_year,fiscal_period,posting_date,document_date,\
3196                      document_type,currency,exchange_rate,reference,header_text,created_by,source,\
3197                      business_process,ledger,is_fraud,is_anomaly,\
3198                      line_number,gl_account,debit_amount,credit_amount,local_amount,transaction_amount,\
3199                      cost_center,profit_center,business_unit,line_text,\
3200                      auxiliary_account_number,auxiliary_account_label,lettrage,lettrage_date,\
3201                      is_manual,is_post_close,source_system,\
3202                      account_description,financial_statement_category,\
3203                      assignment,value_date,tax_code,transaction_id,\
3204                      account_class,account_class_name,account_sub_class,account_sub_class_name,\
3205                      predecessor_line_id,trading_partner,fraud_type,anomaly_type";
3206        // Strip any embedded whitespace from line continuations before counting.
3207        let normalized: String = header.chars().filter(|c| !c.is_whitespace()).collect();
3208        let n_cols = normalized.split(',').count();
3209        assert_eq!(
3210            n_cols, 48,
3211            "expected 48 columns in journal_entries.csv header, got {n_cols}"
3212        );
3213    }
3214
3215    /// v5.17.0 — fraud_type column emits the FraudType variant name via Debug.
3216    #[test]
3217    fn journal_entries_csv_fraud_type_column_populated() {
3218        use datasynth_core::models::FraudType;
3219        use datasynth_core::models::{JournalEntry, JournalEntryHeader};
3220
3221        // Build a minimal JE with fraud_type = GhostEmployee.
3222        let posting_date = chrono::NaiveDate::from_ymd_opt(2024, 3, 1).unwrap();
3223        let mut header = JournalEntryHeader::new("DE10".to_string(), posting_date);
3224        header.is_fraud = true;
3225        header.fraud_type = Some(FraudType::GhostEmployee);
3226        let je = JournalEntry::new(header);
3227
3228        // Reproduce the fraud_type / anomaly_type extraction exactly as in
3229        // write_journal_entries_csv so we can unit-test the helper logic
3230        // without spinning up a full EnhancedGenerationResult.
3231        let h = &je.header;
3232        let fraud_type_str = h.fraud_type.map(|ft| format!("{ft:?}")).unwrap_or_default();
3233        let anomaly_type_str = h.anomaly_type.as_deref().unwrap_or("").to_string();
3234
3235        // fraud_type column must be "GhostEmployee".
3236        assert_eq!(
3237            fraud_type_str, "GhostEmployee",
3238            "expected 'GhostEmployee' for FraudType::GhostEmployee; got: {fraud_type_str}"
3239        );
3240        // anomaly_type is None by default → empty string.
3241        assert!(
3242            anomaly_type_str.is_empty(),
3243            "expected empty anomaly_type when None; got: {anomaly_type_str}"
3244        );
3245    }
3246
3247    /// v5.17.0 — fraud_type and anomaly_type columns emit empty strings when None.
3248    #[test]
3249    fn journal_entries_csv_fraud_type_none_is_empty() {
3250        use datasynth_core::models::{JournalEntry, JournalEntryHeader};
3251
3252        let posting_date = chrono::NaiveDate::from_ymd_opt(2024, 3, 1).unwrap();
3253        let header = JournalEntryHeader::new("DE10".to_string(), posting_date);
3254        let je = JournalEntry::new(header);
3255
3256        let h = &je.header;
3257        // fraud_type is None by default.
3258        let fraud_type_str = h.fraud_type.map(|ft| format!("{ft:?}")).unwrap_or_default();
3259        // anomaly_type is None by default.
3260        let anomaly_type_str = h.anomaly_type.as_deref().unwrap_or("").to_string();
3261
3262        assert!(
3263            fraud_type_str.is_empty(),
3264            "expected empty fraud_type for None; got: {fraud_type_str}"
3265        );
3266        assert!(
3267            anomaly_type_str.is_empty(),
3268            "expected empty anomaly_type for None; got: {anomaly_type_str}"
3269        );
3270    }
3271}